{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14222222222222222, "eval_steps": 500, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023703703703703703, "grad_norm": 108.5, "learning_rate": 2.0930232558139536e-06, "logits/chosen": 4.719546318054199, "logits/rejected": 4.862860202789307, "logps/chosen": -389.59698486328125, "logps/rejected": -377.825439453125, "loss": 0.6845, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": 0.6139063835144043, "rewards/margins": 0.04724857956171036, "rewards/rejected": 0.5666579008102417, "step": 10 }, { "epoch": 0.047407407407407405, "grad_norm": 109.0, "learning_rate": 4.418604651162791e-06, "logits/chosen": 4.730769634246826, "logits/rejected": 4.875722408294678, "logps/chosen": -366.2303161621094, "logps/rejected": -381.54351806640625, "loss": 0.603, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 2.2159204483032227, "rewards/margins": 0.5498644113540649, "rewards/rejected": 1.6660559177398682, "step": 20 }, { "epoch": 0.07111111111111111, "grad_norm": 64.5, "learning_rate": 6.744186046511628e-06, "logits/chosen": 4.784668922424316, "logits/rejected": 4.905774116516113, "logps/chosen": -407.79986572265625, "logps/rejected": -423.7102966308594, "loss": 0.5481, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.20952901244163513, "rewards/margins": 0.7444091439247131, "rewards/rejected": -0.9539381265640259, "step": 30 }, { "epoch": 0.09481481481481481, "grad_norm": 75.5, "learning_rate": 9.069767441860465e-06, "logits/chosen": 4.709995746612549, "logits/rejected": 4.814078330993652, "logps/chosen": -397.01055908203125, "logps/rejected": -400.6348571777344, "loss": 0.4682, "rewards/accuracies": 0.796875, "rewards/chosen": 2.5699944496154785, "rewards/margins": 1.2299325466156006, "rewards/rejected": 1.340061902999878, "step": 40 }, { "epoch": 0.11851851851851852, "grad_norm": 102.0, "learning_rate": 9.993784606094612e-06, "logits/chosen": 4.691997528076172, "logits/rejected": 4.816564083099365, "logps/chosen": -410.54034423828125, "logps/rejected": -446.1881408691406, "loss": 0.4441, "rewards/accuracies": 0.778124988079071, "rewards/chosen": -0.03334064409136772, "rewards/margins": 1.9787142276763916, "rewards/rejected": -2.012054443359375, "step": 50 }, { "epoch": 0.14222222222222222, "grad_norm": 96.0, "learning_rate": 9.955857588395065e-06, "logits/chosen": 4.5857696533203125, "logits/rejected": 4.653135299682617, "logps/chosen": -393.78936767578125, "logps/rejected": -445.19342041015625, "loss": 0.4635, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 2.299717426300049, "rewards/margins": 2.069148302078247, "rewards/rejected": 0.23056945204734802, "step": 60 } ], "logging_steps": 10, "max_steps": 421, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }