{ "epoch": 0.9989528795811519, "eval_logits/chosen": -0.6057044267654419, "eval_logits/rejected": -0.6120408177375793, "eval_logps/chosen": -260.8712158203125, "eval_logps/rejected": -247.0758819580078, "eval_loss": 341.7819519042969, "eval_rewards/accuracies": 0.49294355511665344, "eval_rewards/chosen": -260.8712158203125, "eval_rewards/margins": -13.795334815979004, "eval_rewards/rejected": -247.0758819580078, "eval_runtime": 23.6719, "eval_samples": 2000, "eval_samples_per_second": 84.488, "eval_slic/ce_loss": 260.8712158203125, "eval_slic/rank_loss": 80.91075134277344, "eval_steps_per_second": 2.661, "total_flos": 0.0, "train_loss": 1398.719245958628, "train_runtime": 2677.8284, "train_samples": 61135, "train_samples_per_second": 22.83, "train_steps_per_second": 0.178 }