{ "epoch": 0.9997600191984641, "eval_logits/chosen": -2.4037210941314697, "eval_logits/rejected": -2.271228313446045, "eval_logps/chosen": -5.422507286071777, "eval_logps/rejected": -6.740255355834961, "eval_loss": 18.408336639404297, "eval_rewards/accuracies": 0.738882839679718, "eval_rewards/chosen": -0.34205058217048645, "eval_rewards/margins": 0.13162928819656372, "eval_rewards/rejected": -0.47367987036705017, "eval_runtime": 42.4366, "eval_samples_per_second": 86.906, "eval_steps_per_second": 21.727 }