{ "epoch": 0.999244142101285, "eval_epsilon_dpo/beta": 0.005521266255527735, "eval_epsilon_dpo/beta_margin_grad_mean": -0.4411477744579315, "eval_epsilon_dpo/beta_margin_grad_std": 0.0961698442697525, "eval_epsilon_dpo/beta_margin_mean": 0.2462044507265091, "eval_epsilon_dpo/beta_margin_std": 0.40493056178092957, "eval_epsilon_dpo/loss_margin_mean": 45.06350326538086, "eval_kl/n_epsilon_steps": 0.28169015049934387, "eval_kl/p_epsilon_steps": 0.7174295783042908, "eval_logits/chosen": 0.043499208986759186, "eval_logits/rejected": -0.059857551008462906, "eval_logps/chosen": -169.52423095703125, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -219.27725219726562, "eval_loss": 0.5982230305671692, "eval_rewards/accuracies": 0.7174295783042908, "eval_rewards/chosen": -0.5240421295166016, "eval_rewards/margins": 0.2462044507265091, "eval_rewards/rejected": -0.770246684551239, "eval_runtime": 42.1471, "eval_samples": 2303, "eval_samples_per_second": 54.642, "eval_steps_per_second": 1.708 }