{ "epoch": 0.999244142101285, "eval_kl/n_epsilon_steps": 0.26716548204421997, "eval_kl/p_epsilon_steps": 0.7293133735656738, "eval_logits/chosen": -1.3011770248413086, "eval_logits/rejected": -1.4259974956512451, "eval_logps/chosen": -150.5532684326172, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -199.7850799560547, "eval_loss": 0.5832020044326782, "eval_rewards/accuracies": 0.7341549396514893, "eval_rewards/chosen": -0.49299976229667664, "eval_rewards/margins": 0.3015401065349579, "eval_rewards/rejected": -0.7945398688316345, "eval_runtime": 46.7622, "eval_samples": 2303, "eval_samples_per_second": 49.249, "eval_steps_per_second": 1.54 }