{ "epoch": 1.971563981042654, "eval_logits/chosen": 361.1391296386719, "eval_logits/rejected": 364.19622802734375, "eval_logps/chosen": -373.9752197265625, "eval_logps/rejected": -463.1776123046875, "eval_loss": 0.5127096176147461, "eval_rewards/accuracies": 0.7446808218955994, "eval_rewards/chosen": -2.441816806793213, "eval_rewards/margins": 1.8080488443374634, "eval_rewards/rejected": -4.2498650550842285, "eval_runtime": 85.5361, "eval_samples": 750, "eval_samples_per_second": 8.768, "eval_steps_per_second": 0.549 }