{ "epoch": 0.9989528795811519, "eval_logits/chosen": -0.7624644637107849, "eval_logits/rejected": -0.7490274310112, "eval_logps/chosen": -1.5084009170532227, "eval_logps/rejected": -2.0472562313079834, "eval_loss": 1.0240256786346436, "eval_rewards/accuracies": 0.7419354915618896, "eval_rewards/chosen": -3.0168018341064453, "eval_rewards/margins": 1.0777103900909424, "eval_rewards/rejected": -4.094512462615967, "eval_runtime": 24.4713, "eval_samples": 2000, "eval_samples_per_second": 81.728, "eval_steps_per_second": 1.308 }