{ "epoch": 0.9989528795811519, "eval_logits/chosen": -0.40516504645347595, "eval_logits/rejected": -0.40924227237701416, "eval_logps/chosen": -4.689553737640381, "eval_logps/rejected": -7.291287422180176, "eval_loss": 2328.408447265625, "eval_rewards/accuracies": 0.6840000152587891, "eval_rewards/chosen": -0.03572991117835045, "eval_rewards/margins": 0.024208687245845795, "eval_rewards/rejected": -0.05993859842419624, "eval_runtime": 78.645, "eval_samples": 2000, "eval_samples_per_second": 25.431, "eval_steps_per_second": 1.589 }