{ "epoch": 0.999244142101285, "eval_beta_dpo/beta_used": 0.1372366100549698, "eval_beta_dpo/beta_used_raw": 0.10684214532375336, "eval_beta_dpo/gap_mean": 11.190993309020996, "eval_beta_dpo/gap_std": 18.76331901550293, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.3944156169891357, "eval_logits/rejected": 1.2692357301712036, "eval_loss": 0.6537412405014038, "eval_runtime": 42.4848, "eval_samples": 2303, "eval_samples_per_second": 54.208, "eval_steps_per_second": 1.695 }