{ "epoch": 0.999244142101285, "eval_logits/chosen": 1.3520251512527466, "eval_logits/rejected": 1.231923222541809, "eval_logps/chosen": -90.12765502929688, "eval_logps/ref_chosen": -87.31719970703125, "eval_logps/ref_rejected": -95.23231506347656, "eval_logps/rejected": -105.97742462158203, "eval_loss": 0.5182201266288757, "eval_margin_dpo/margin_mean": 7.934661388397217, "eval_margin_dpo/margin_std": 11.753697395324707, "eval_runtime": 42.5799, "eval_samples": 2303, "eval_samples_per_second": 54.087, "eval_steps_per_second": 1.691 }