{ "epoch": 0.9989528795811519, "eval_logits/chosen": -0.1925041377544403, "eval_logits/rejected": -0.21385852992534637, "eval_logps/chosen": -5.949731349945068, "eval_logps/rejected": -8.787737846374512, "eval_loss": 2305.42529296875, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -0.04833168908953667, "eval_rewards/margins": 0.026571419090032578, "eval_rewards/rejected": -0.07490310817956924, "eval_runtime": 78.3976, "eval_samples": 2000, "eval_samples_per_second": 25.511, "eval_steps_per_second": 1.594 }