{ "epoch": 0.999244142101285, "eval_beta_dpo/beta": 0.16831883788108826, "eval_beta_dpo/beta_margin_grad_mean": -0.3790470063686371, "eval_beta_dpo/beta_margin_grad_std": 0.1721421331167221, "eval_beta_dpo/beta_margin_mean": 8.606536865234375, "eval_beta_dpo/beta_margin_std": 10.774296760559082, "eval_beta_dpo/beta_used": 0.16831883788108826, "eval_beta_dpo/beta_used_raw": -0.15472783148288727, "eval_beta_dpo/gap_mean": 43.872432708740234, "eval_beta_dpo/gap_std": 63.57323455810547, "eval_beta_dpo/loss_margin_mean": 39.6269645690918, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -2.7342004776000977, "eval_logits/rejected": -2.749027967453003, "eval_loss": 1.3430638313293457, "eval_runtime": 36.2682, "eval_samples": 2303, "eval_samples_per_second": 63.499, "eval_steps_per_second": 1.985 }