{ "epoch": 0.9989528795811519, "eval_logits/chosen": -0.40516504645347595, "eval_logits/rejected": -0.40924227237701416, "eval_logps/chosen": -4.689553737640381, "eval_logps/rejected": -7.291287422180176, "eval_loss": 2328.408447265625, "eval_rewards/accuracies": 0.6840000152587891, "eval_rewards/chosen": -0.03572991117835045, "eval_rewards/margins": 0.024208687245845795, "eval_rewards/rejected": -0.05993859842419624, "eval_runtime": 78.645, "eval_samples": 2000, "eval_samples_per_second": 25.431, "eval_steps_per_second": 1.589, "total_flos": 0.0, "train_loss": 19203.485685272535, "train_runtime": 5961.2859, "train_samples": 61135, "train_samples_per_second": 10.255, "train_steps_per_second": 0.08 }