{ "epoch": 1.0, "eval_beta_dpo/beta_used": 0.02526404708623886, "eval_beta_dpo/beta_used_raw": -0.14088919758796692, "eval_beta_dpo/gap_mean": 20.52260971069336, "eval_beta_dpo/gap_std": 30.19026756286621, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.6686146855354309, "eval_logits/rejected": -0.6267361044883728, "eval_loss": 0.6458496451377869, "eval_runtime": 20.2363, "eval_samples": 2339, "eval_samples_per_second": 115.585, "eval_steps_per_second": 0.939 }