{ "epoch": 0.9989528795811519, "eval_kl/n_epsilon_steps": 0.31703630089759827, "eval_kl/p_epsilon_steps": 0.6743951439857483, "eval_logits/chosen": -0.8084373474121094, "eval_logits/rejected": -0.7665925025939941, "eval_logps/chosen": -588.654052734375, "eval_logps/ref_chosen": -287.9388427734375, "eval_logps/ref_rejected": -266.7934875488281, "eval_logps/rejected": -683.635009765625, "eval_loss": 0.621621310710907, "eval_rewards/accuracies": 0.6955645084381104, "eval_rewards/chosen": -0.5053801536560059, "eval_rewards/margins": 0.19223107397556305, "eval_rewards/rejected": -0.6976111531257629, "eval_runtime": 50.6489, "eval_samples": 2000, "eval_samples_per_second": 39.488, "eval_steps_per_second": 1.244, "total_flos": 0.0, "train_loss": 2.463846208664356, "train_runtime": 4358.2481, "train_samples": 61135, "train_samples_per_second": 14.027, "train_steps_per_second": 0.109 }