{ "epoch": 0.9989528795811519, "eval_logits/chosen": -0.6620355248451233, "eval_logits/rejected": -0.6459746956825256, "eval_logps/chosen": -2.3574233055114746, "eval_logps/rejected": -3.391777753829956, "eval_loss": 20.139907836914062, "eval_rewards/accuracies": 0.7580645084381104, "eval_rewards/chosen": -0.12184108048677444, "eval_rewards/margins": 0.0839909017086029, "eval_rewards/rejected": -0.20583198964595795, "eval_runtime": 44.1837, "eval_samples": 2000, "eval_samples_per_second": 45.266, "eval_steps_per_second": 1.426 }