{ "epoch": 1.0, "eval_logits/chosen": -1.4371687173843384, "eval_logits/rejected": -1.1416503190994263, "eval_logps/chosen": -132.72055053710938, "eval_logps/ref_chosen": -101.88616943359375, "eval_logps/ref_rejected": -92.68607330322266, "eval_logps/rejected": -139.40850830078125, "eval_loss": 0.42009782791137695, "eval_margin_dpo/margin_mean": 15.88807487487793, "eval_margin_dpo/margin_std": 17.02425765991211, "eval_runtime": 44.0516, "eval_samples": 2339, "eval_samples_per_second": 53.097, "eval_steps_per_second": 1.68 }