{ "epoch": 1.0, "eval_logits/chosen": -0.5044217109680176, "eval_logits/rejected": -0.4913681149482727, "eval_logps/chosen": -80.9881591796875, "eval_logps/ref_chosen": -71.49089813232422, "eval_logps/ref_rejected": -76.31332397460938, "eval_logps/rejected": -92.98614501953125, "eval_loss": 0.5379691123962402, "eval_margin_dpo/margin_mean": 7.175543308258057, "eval_margin_dpo/margin_std": 10.471020698547363, "eval_runtime": 18.8064, "eval_samples": 2303, "eval_samples_per_second": 122.458, "eval_steps_per_second": 0.957 }