{ "epoch": 16.0, "eval_logits/chosen": -3.25758695602417, "eval_logits/rejected": -3.3809757232666016, "eval_logps/chosen": -30.838027954101562, "eval_logps/rejected": -56.25493621826172, "eval_loss": 1.0559927225112915, "eval_rewards/accuracies": 0.550000011920929, "eval_rewards/chosen": -0.07679860293865204, "eval_rewards/margins": -0.07679860293865204, "eval_rewards/rejected": 0.0, "eval_runtime": 45.9469, "eval_samples": 310, "eval_samples_per_second": 6.747, "eval_steps_per_second": 0.218 }