{ "epoch": 0.9997600191984641, "eval_logits/chosen": -2.6097218990325928, "eval_logits/rejected": -2.4986746311187744, "eval_logps/chosen": -234.91114807128906, "eval_logps/rejected": -217.71800231933594, "eval_loss": 0.4486246407032013, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": -0.17781423032283783, "eval_rewards/margins": 1.0409064292907715, "eval_rewards/rejected": -1.218720555305481, "eval_runtime": 21.1512, "eval_samples_per_second": 47.279, "eval_steps_per_second": 11.82 }