{ "epoch": 0.9989528795811519, "eval_logits/chosen": 2.1443471908569336, "eval_logits/rejected": 2.1642587184906006, "eval_logps/chosen": -1.0595875978469849, "eval_logps/rejected": -1.4812482595443726, "eval_loss": 1.0770305395126343, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -2.1191751956939697, "eval_rewards/margins": 0.8433210253715515, "eval_rewards/rejected": -2.962496519088745, "eval_runtime": 45.3759, "eval_samples": 2000, "eval_samples_per_second": 44.076, "eval_steps_per_second": 5.51 }