{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 86, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05813953488372093, "grad_norm": 6.173820495605469, "learning_rate": 2.222222222222222e-07, "logits/chosen": -0.31656768918037415, "logits/rejected": -0.34523338079452515, "logps/chosen": -272.57794189453125, "logps/rejected": -270.36444091796875, "loss": 0.6931, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.00012703397078439593, "rewards/margins": 0.00034172655432485044, "rewards/rejected": -0.0002146924816770479, "step": 5 }, { "epoch": 0.11627906976744186, "grad_norm": 11.29118824005127, "learning_rate": 5e-07, "logits/chosen": -0.3255475163459778, "logits/rejected": -0.3258180320262909, "logps/chosen": -284.53802490234375, "logps/rejected": -276.3387451171875, "loss": 0.6936, "rewards/accuracies": 0.4468750059604645, "rewards/chosen": 0.0012468498898670077, "rewards/margins": -0.000429403327871114, "rewards/rejected": 0.0016762532759457827, "step": 10 }, { "epoch": 0.1744186046511628, "grad_norm": 22.03050422668457, "learning_rate": 4.948160396893553e-07, "logits/chosen": -0.3352300226688385, "logits/rejected": -0.3715844750404358, "logps/chosen": -273.3583984375, "logps/rejected": -256.2732849121094, "loss": 0.6917, "rewards/accuracies": 0.59375, "rewards/chosen": 0.0064769028685987, "rewards/margins": 0.0027799301315099, "rewards/rejected": 0.003696972969919443, "step": 15 }, { "epoch": 0.23255813953488372, "grad_norm": 12.313726425170898, "learning_rate": 4.794791463134399e-07, "logits/chosen": -0.37685471773147583, "logits/rejected": -0.3720394968986511, "logps/chosen": -287.3114013671875, "logps/rejected": -265.71466064453125, "loss": 0.6903, "rewards/accuracies": 0.628125011920929, "rewards/chosen": 0.016553811728954315, "rewards/margins": 0.005998819135129452, "rewards/rejected": 0.010554992593824863, "step": 20 }, { "epoch": 0.29069767441860467, "grad_norm": 6.40844202041626, "learning_rate": 4.5462536664464836e-07, "logits/chosen": -0.3328778147697449, "logits/rejected": -0.3745272159576416, "logps/chosen": -294.00567626953125, "logps/rejected": -251.9366455078125, "loss": 0.6885, "rewards/accuracies": 0.621874988079071, "rewards/chosen": 0.02947135828435421, "rewards/margins": 0.01008202601224184, "rewards/rejected": 0.019389333203434944, "step": 25 }, { "epoch": 0.3488372093023256, "grad_norm": 9.740550994873047, "learning_rate": 4.2128542874196107e-07, "logits/chosen": -0.3988240659236908, "logits/rejected": -0.3960237205028534, "logps/chosen": -315.44189453125, "logps/rejected": -269.29888916015625, "loss": 0.6847, "rewards/accuracies": 0.684374988079071, "rewards/chosen": 0.04561670497059822, "rewards/margins": 0.017333079129457474, "rewards/rejected": 0.0282836202532053, "step": 30 }, { "epoch": 0.4069767441860465, "grad_norm": 4.856711387634277, "learning_rate": 3.80841995924153e-07, "logits/chosen": -0.35535919666290283, "logits/rejected": -0.3754374086856842, "logps/chosen": -284.4345397949219, "logps/rejected": -253.5362091064453, "loss": 0.6845, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.05356748774647713, "rewards/margins": 0.018121790140867233, "rewards/rejected": 0.035445697605609894, "step": 35 }, { "epoch": 0.46511627906976744, "grad_norm": 5.220498561859131, "learning_rate": 3.349723253956541e-07, "logits/chosen": -0.3333882689476013, "logits/rejected": -0.3347395360469818, "logps/chosen": -287.35186767578125, "logps/rejected": -268.220703125, "loss": 0.6792, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.06367762386798859, "rewards/margins": 0.0287557952105999, "rewards/rejected": 0.03492182120680809, "step": 40 }, { "epoch": 0.5232558139534884, "grad_norm": 6.02023458480835, "learning_rate": 2.8557870956832133e-07, "logits/chosen": -0.3798816204071045, "logits/rejected": -0.3992946743965149, "logps/chosen": -291.197021484375, "logps/rejected": -267.6119689941406, "loss": 0.6806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.05876715108752251, "rewards/margins": 0.02658895030617714, "rewards/rejected": 0.03217820078134537, "step": 45 }, { "epoch": 0.5813953488372093, "grad_norm": 6.866888523101807, "learning_rate": 2.3470958479453937e-07, "logits/chosen": -0.3547196388244629, "logits/rejected": -0.38883891701698303, "logps/chosen": -254.36865234375, "logps/rejected": -254.80191040039062, "loss": 0.6745, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": 0.0644092783331871, "rewards/margins": 0.03914918377995491, "rewards/rejected": 0.025260094553232193, "step": 50 }, { "epoch": 0.6395348837209303, "grad_norm": 5.712334632873535, "learning_rate": 1.8447457926522452e-07, "logits/chosen": -0.39016178250312805, "logits/rejected": -0.3929459750652313, "logps/chosen": -303.5281066894531, "logps/rejected": -278.91619873046875, "loss": 0.6777, "rewards/accuracies": 0.659375011920929, "rewards/chosen": 0.05406096577644348, "rewards/margins": 0.03241851180791855, "rewards/rejected": 0.021642452105879784, "step": 55 }, { "epoch": 0.6976744186046512, "grad_norm": 6.261183738708496, "learning_rate": 1.369570231793286e-07, "logits/chosen": -0.3772028386592865, "logits/rejected": -0.42861443758010864, "logps/chosen": -282.66925048828125, "logps/rejected": -262.298583984375, "loss": 0.6733, "rewards/accuracies": 0.684374988079071, "rewards/chosen": 0.054535817354917526, "rewards/margins": 0.0425155907869339, "rewards/rejected": 0.012020227499306202, "step": 60 }, { "epoch": 0.7558139534883721, "grad_norm": 5.369093894958496, "learning_rate": 9.412754953531663e-08, "logits/chosen": -0.4042905271053314, "logits/rejected": -0.41270047426223755, "logps/chosen": -297.8207702636719, "logps/rejected": -262.1812744140625, "loss": 0.6734, "rewards/accuracies": 0.703125, "rewards/chosen": 0.050960998982191086, "rewards/margins": 0.042267706245183945, "rewards/rejected": 0.008693288080394268, "step": 65 }, { "epoch": 0.813953488372093, "grad_norm": 6.778103828430176, "learning_rate": 5.776236866515946e-08, "logits/chosen": -0.35539665818214417, "logits/rejected": -0.4030834138393402, "logps/chosen": -306.32501220703125, "logps/rejected": -253.0592803955078, "loss": 0.6717, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04382815584540367, "rewards/margins": 0.04540730267763138, "rewards/rejected": -0.0015791511395946145, "step": 70 }, { "epoch": 0.872093023255814, "grad_norm": 5.866343021392822, "learning_rate": 2.936960580341971e-08, "logits/chosen": -0.3599171042442322, "logits/rejected": -0.3811812400817871, "logps/chosen": -268.90374755859375, "logps/rejected": -259.1419677734375, "loss": 0.6714, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.03630157560110092, "rewards/margins": 0.04637160152196884, "rewards/rejected": -0.010070028714835644, "step": 75 }, { "epoch": 0.9302325581395349, "grad_norm": 6.844763278961182, "learning_rate": 1.0126756596375685e-08, "logits/chosen": -0.34698766469955444, "logits/rejected": -0.4250791072845459, "logps/chosen": -274.50775146484375, "logps/rejected": -277.90618896484375, "loss": 0.6729, "rewards/accuracies": 0.690625011920929, "rewards/chosen": 0.03152482956647873, "rewards/margins": 0.04362744092941284, "rewards/rejected": -0.012102612294256687, "step": 80 }, { "epoch": 0.9883720930232558, "grad_norm": 5.770267963409424, "learning_rate": 8.318543764516961e-10, "logits/chosen": -0.39356738328933716, "logits/rejected": -0.43114370107650757, "logps/chosen": -282.12542724609375, "logps/rejected": -264.55810546875, "loss": 0.6732, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.0381753146648407, "rewards/margins": 0.0427427664399147, "rewards/rejected": -0.004567448981106281, "step": 85 }, { "epoch": 1.0, "step": 86, "total_flos": 0.0, "train_loss": 0.6806982485360877, "train_runtime": 636.997, "train_samples_per_second": 8.636, "train_steps_per_second": 0.135 } ], "logging_steps": 5, "max_steps": 86, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }