{ "epoch": 0.999244142101285, "eval_beta_dpo/beta": 0.1206294596195221, "eval_beta_dpo/beta_margin_grad_mean": -0.3669372498989105, "eval_beta_dpo/beta_margin_grad_std": 0.2000643014907837, "eval_beta_dpo/beta_margin_mean": 1.2764525413513184, "eval_beta_dpo/beta_margin_std": 1.8706386089324951, "eval_beta_dpo/beta_used": 0.1206294596195221, "eval_beta_dpo/beta_used_raw": 0.10496275871992111, "eval_beta_dpo/gap_mean": 8.563544273376465, "eval_beta_dpo/gap_std": 15.48448657989502, "eval_beta_dpo/loss_margin_mean": 8.646257400512695, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.1991723775863647, "eval_logits/rejected": 1.1331901550292969, "eval_loss": 0.6232607364654541, "eval_runtime": 43.3636, "eval_samples": 2303, "eval_samples_per_second": 53.109, "eval_steps_per_second": 1.66 }