{ "epoch": 0.999244142101285, "eval_fcm_dpo/beta": 0.08221900463104248, "eval_fcm_dpo/delta": 0.006471974775195122, "eval_fcm_dpo/margin": 11.944085121154785, "eval_fcm_dpo/q_t": 0.33879798650741577, "eval_logits/chosen": 0.39933323860168457, "eval_logits/rejected": 0.3423865735530853, "eval_logps/chosen": -96.3701171875, "eval_logps/ref_chosen": -75.86933135986328, "eval_logps/ref_rejected": -80.85771942138672, "eval_logps/rejected": -113.30257415771484, "eval_loss": 0.5207385420799255, "eval_margin_dpo/margin_mean": 11.944085121154785, "eval_margin_dpo/margin_std": 18.461780548095703, "eval_runtime": 38.5096, "eval_samples": 2303, "eval_samples_per_second": 59.803, "eval_steps_per_second": 1.87 }