{ "epoch": 1.0, "eval_beta_dpo/beta_used": 0.02526404708623886, "eval_beta_dpo/beta_used_raw": -0.14088919758796692, "eval_beta_dpo/gap_mean": 20.52260971069336, "eval_beta_dpo/gap_std": 30.19026756286621, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.6686146855354309, "eval_logits/rejected": -0.6267361044883728, "eval_loss": 0.6458496451377869, "eval_runtime": 20.2363, "eval_samples": 2339, "eval_samples_per_second": 115.585, "eval_steps_per_second": 0.939, "total_flos": 0.0, "train_loss": 0.5267414394546958, "train_runtime": 1440.2657, "train_samples": 43598, "train_samples_per_second": 30.271, "train_steps_per_second": 0.236 }