{ "epoch": 0.9997600191984641, "eval_logits/chosen": -2.71634578704834, "eval_logits/rejected": -2.595935583114624, "eval_logps/chosen": -232.62538146972656, "eval_logps/rejected": -205.43319702148438, "eval_loss": 0.6742354035377502, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": 0.050759363919496536, "eval_rewards/margins": 0.041003111749887466, "eval_rewards/rejected": 0.009756244719028473, "eval_runtime": 21.1604, "eval_samples_per_second": 47.258, "eval_steps_per_second": 11.815 }