{ "epoch": 0.9989528795811519, "eval_logits/chosen": -0.46801021695137024, "eval_logits/rejected": -0.46652573347091675, "eval_logps/chosen": -4.461421012878418, "eval_logps/rejected": -6.7237443923950195, "eval_loss": 2342.1025390625, "eval_rewards/accuracies": 0.6880000233650208, "eval_rewards/chosen": -0.033448584377765656, "eval_rewards/margins": 0.020814577117562294, "eval_rewards/rejected": -0.0542631670832634, "eval_runtime": 79.0392, "eval_samples": 2000, "eval_samples_per_second": 25.304, "eval_steps_per_second": 1.581 }