{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 200, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020942408376963353, "grad_norm": 21.158132553100586, "learning_rate": 0.0, "log_odds_chosen": 0.06687486171722412, "log_odds_ratio": -0.7179359793663025, "logits/chosen": -0.6003108024597168, "logits/rejected": -0.5959680080413818, "logps/chosen": -1.3004443645477295, "logps/rejected": -1.3789687156677246, "loss": 6.0476, "nll_loss": 1.4909255504608154, "rewards/accuracies": 0.46875, "rewards/chosen": -0.013004443608224392, "rewards/margins": 0.0007852441049180925, "rewards/rejected": -0.013789687305688858, "step": 1 }, { "epoch": 0.020942408376963352, "grad_norm": 22.246910095214844, "learning_rate": 9.375e-08, "log_odds_chosen": 0.24125711619853973, "log_odds_ratio": -0.6763277053833008, "logits/chosen": -0.680765688419342, "logits/rejected": -0.6712806224822998, "logps/chosen": -1.1191811561584473, "logps/rejected": -1.29719877243042, "loss": 6.0187, "nll_loss": 1.5037554502487183, "rewards/accuracies": 0.5763888955116272, "rewards/chosen": -0.01119181141257286, "rewards/margins": 0.0017801759531721473, "rewards/rejected": -0.012971988879144192, "step": 10 }, { "epoch": 0.041884816753926704, "grad_norm": 21.124698638916016, "learning_rate": 1.9791666666666664e-07, "log_odds_chosen": 0.24139347672462463, "log_odds_ratio": -0.6791239976882935, "logits/chosen": -0.6397651433944702, "logits/rejected": -0.6568408608436584, "logps/chosen": -1.1540690660476685, "logps/rejected": -1.3257322311401367, "loss": 5.8777, "nll_loss": 1.4614884853363037, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.011540691368281841, "rewards/margins": 0.0017166311154142022, "rewards/rejected": -0.013257321901619434, "step": 20 }, { "epoch": 0.06282722513089005, "grad_norm": 19.380123138427734, "learning_rate": 3.020833333333333e-07, "log_odds_chosen": 0.15998375415802002, "log_odds_ratio": -0.715591549873352, "logits/chosen": -0.6493812799453735, "logits/rejected": -0.6421241164207458, "logps/chosen": -1.0813921689987183, "logps/rejected": -1.2045493125915527, "loss": 5.9475, "nll_loss": 1.485296607017517, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.010813921689987183, "rewards/margins": 0.0012315715430304408, "rewards/rejected": -0.01204549241811037, "step": 30 }, { "epoch": 0.08376963350785341, "grad_norm": 16.27351188659668, "learning_rate": 4.0625e-07, "log_odds_chosen": 0.18812714517116547, "log_odds_ratio": -0.7328466176986694, "logits/chosen": -0.63264399766922, "logits/rejected": -0.645917534828186, "logps/chosen": -1.008213758468628, "logps/rejected": -1.1435163021087646, "loss": 5.7467, "nll_loss": 1.3937628269195557, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.010082137770950794, "rewards/margins": 0.0013530252035707235, "rewards/rejected": -0.011435163207352161, "step": 40 }, { "epoch": 0.10471204188481675, "grad_norm": 14.714133262634277, "learning_rate": 4.999932966293553e-07, "log_odds_chosen": 0.18715843558311462, "log_odds_ratio": -0.7093220949172974, "logits/chosen": -0.6608070135116577, "logits/rejected": -0.6796087026596069, "logps/chosen": -0.9801030158996582, "logps/rejected": -1.10386061668396, "loss": 5.7231, "nll_loss": 1.4938442707061768, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.009801030158996582, "rewards/margins": 0.0012375751975923777, "rewards/rejected": -0.011038605123758316, "step": 50 }, { "epoch": 0.1256544502617801, "grad_norm": 15.548501014709473, "learning_rate": 4.991893270335525e-07, "log_odds_chosen": 0.13172771036624908, "log_odds_ratio": -0.7336539030075073, "logits/chosen": -0.6106709241867065, "logits/rejected": -0.620617687702179, "logps/chosen": -1.0109319686889648, "logps/rejected": -1.118139624595642, "loss": 5.7263, "nll_loss": 1.4132804870605469, "rewards/accuracies": 0.5625, "rewards/chosen": -0.010109319351613522, "rewards/margins": 0.0010720762657001615, "rewards/rejected": -0.011181396432220936, "step": 60 }, { "epoch": 0.14659685863874344, "grad_norm": 11.627691268920898, "learning_rate": 4.970496218214204e-07, "log_odds_chosen": 0.2610163390636444, "log_odds_ratio": -0.6738774180412292, "logits/chosen": -0.585883378982544, "logits/rejected": -0.5932791829109192, "logps/chosen": -0.9574364423751831, "logps/rejected": -1.1436620950698853, "loss": 5.5467, "nll_loss": 1.4156994819641113, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.009574364870786667, "rewards/margins": 0.0018622549250721931, "rewards/rejected": -0.011436618864536285, "step": 70 }, { "epoch": 0.16753926701570682, "grad_norm": 10.594218254089355, "learning_rate": 4.935856505068998e-07, "log_odds_chosen": 0.2706550061702728, "log_odds_ratio": -0.6398797035217285, "logits/chosen": -0.5370240211486816, "logits/rejected": -0.5456986427307129, "logps/chosen": -0.9297056198120117, "logps/rejected": -1.09745454788208, "loss": 5.4389, "nll_loss": 1.3115617036819458, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.009297055192291737, "rewards/margins": 0.0016774894902482629, "rewards/rejected": -0.010974545031785965, "step": 80 }, { "epoch": 0.18848167539267016, "grad_norm": 9.036905288696289, "learning_rate": 4.8881598109976e-07, "log_odds_chosen": 0.22219958901405334, "log_odds_ratio": -0.678338348865509, "logits/chosen": -0.5145021677017212, "logits/rejected": -0.5317824482917786, "logps/chosen": -0.9497036933898926, "logps/rejected": -1.101233959197998, "loss": 5.4486, "nll_loss": 1.3097865581512451, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.009497037157416344, "rewards/margins": 0.0015153035055845976, "rewards/rejected": -0.01101233996450901, "step": 90 }, { "epoch": 0.2094240837696335, "grad_norm": 8.396464347839355, "learning_rate": 4.827661805750437e-07, "log_odds_chosen": 0.29178586602211, "log_odds_ratio": -0.6671258211135864, "logits/chosen": -0.4934987425804138, "logits/rejected": -0.515052855014801, "logps/chosen": -0.9019988179206848, "logps/rejected": -1.0930007696151733, "loss": 5.3924, "nll_loss": 1.2699394226074219, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.009019988588988781, "rewards/margins": 0.0019100181525573134, "rewards/rejected": -0.010930007323622704, "step": 100 }, { "epoch": 0.23036649214659685, "grad_norm": 8.834160804748535, "learning_rate": 4.75468677825789e-07, "log_odds_chosen": 0.37880703806877136, "log_odds_ratio": -0.6369461417198181, "logits/chosen": -0.4875544607639313, "logits/rejected": -0.5104657411575317, "logps/chosen": -0.8797906637191772, "logps/rejected": -1.1241319179534912, "loss": 5.3743, "nll_loss": 1.3109803199768066, "rewards/accuracies": 0.65625, "rewards/chosen": -0.008797907270491123, "rewards/margins": 0.002443410689011216, "rewards/rejected": -0.01124131865799427, "step": 110 }, { "epoch": 0.2513089005235602, "grad_norm": 7.755215644836426, "learning_rate": 4.669625898336438e-07, "log_odds_chosen": 0.191465824842453, "log_odds_ratio": -0.7049621343612671, "logits/chosen": -0.5096135139465332, "logits/rejected": -0.51964271068573, "logps/chosen": -0.9437309503555298, "logps/rejected": -1.0870112180709839, "loss": 5.2767, "nll_loss": 1.3031724691390991, "rewards/accuracies": 0.59375, "rewards/chosen": -0.00943730864673853, "rewards/margins": 0.001432802644558251, "rewards/rejected": -0.010870112106204033, "step": 120 }, { "epoch": 0.27225130890052357, "grad_norm": 8.79680347442627, "learning_rate": 4.5729351198915705e-07, "log_odds_chosen": 0.10129977762699127, "log_odds_ratio": -0.7444788813591003, "logits/chosen": -0.5232862234115601, "logits/rejected": -0.5057969093322754, "logps/chosen": -0.9433780908584595, "logps/rejected": -1.0080385208129883, "loss": 5.3749, "nll_loss": 1.321378469467163, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.009433778934180737, "rewards/margins": 0.0006466054474003613, "rewards/rejected": -0.010080385021865368, "step": 130 }, { "epoch": 0.2931937172774869, "grad_norm": 8.298604965209961, "learning_rate": 4.4651327368569684e-07, "log_odds_chosen": 0.15155552327632904, "log_odds_ratio": -0.7214217185974121, "logits/chosen": -0.526405930519104, "logits/rejected": -0.5193445086479187, "logps/chosen": -0.9357401728630066, "logps/rejected": -1.0443140268325806, "loss": 5.3026, "nll_loss": 1.3262240886688232, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.009357400238513947, "rewards/margins": 0.0010857387678697705, "rewards/rejected": -0.010443138889968395, "step": 140 }, { "epoch": 0.31413612565445026, "grad_norm": 8.639591217041016, "learning_rate": 4.346796604970912e-07, "log_odds_chosen": 0.31407707929611206, "log_odds_ratio": -0.6722251772880554, "logits/chosen": -0.5174251198768616, "logits/rejected": -0.5082064867019653, "logps/chosen": -0.8792362213134766, "logps/rejected": -1.0834535360336304, "loss": 5.2925, "nll_loss": 1.304610252380371, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.008792361244559288, "rewards/margins": 0.0020421738736331463, "rewards/rejected": -0.010834535583853722, "step": 150 }, { "epoch": 0.33507853403141363, "grad_norm": 8.277928352355957, "learning_rate": 4.218561044282098e-07, "log_odds_chosen": 0.2669292092323303, "log_odds_ratio": -0.6675515174865723, "logits/chosen": -0.5230351686477661, "logits/rejected": -0.5290777087211609, "logps/chosen": -0.9324489831924438, "logps/rejected": -1.1104564666748047, "loss": 5.3537, "nll_loss": 1.3643314838409424, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.009324489161372185, "rewards/margins": 0.0017800761852413416, "rewards/rejected": -0.011104565113782883, "step": 160 }, { "epoch": 0.35602094240837695, "grad_norm": 8.850061416625977, "learning_rate": 4.081113438988443e-07, "log_odds_chosen": 0.2744578719139099, "log_odds_ratio": -0.6702545881271362, "logits/chosen": -0.49922794103622437, "logits/rejected": -0.5103174448013306, "logps/chosen": -0.9047120809555054, "logps/rejected": -1.0733238458633423, "loss": 5.2741, "nll_loss": 1.3267205953598022, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.009047120809555054, "rewards/margins": 0.0016861179610714316, "rewards/rejected": -0.010733239352703094, "step": 170 }, { "epoch": 0.3769633507853403, "grad_norm": 8.881376266479492, "learning_rate": 3.935190552834828e-07, "log_odds_chosen": 0.23824062943458557, "log_odds_ratio": -0.6932843923568726, "logits/chosen": -0.4861333966255188, "logits/rejected": -0.5041022896766663, "logps/chosen": -0.8888905644416809, "logps/rejected": -1.0255122184753418, "loss": 5.259, "nll_loss": 1.3062529563903809, "rewards/accuracies": 0.59375, "rewards/chosen": -0.008888904936611652, "rewards/margins": 0.0013662164565175772, "rewards/rejected": -0.010255122557282448, "step": 180 }, { "epoch": 0.39790575916230364, "grad_norm": 9.558187484741211, "learning_rate": 3.781574579820464e-07, "log_odds_chosen": 0.3100183606147766, "log_odds_ratio": -0.6493682265281677, "logits/chosen": -0.5123935341835022, "logits/rejected": -0.5036609768867493, "logps/chosen": -0.8575283885002136, "logps/rejected": -1.0392427444458008, "loss": 5.265, "nll_loss": 1.317764163017273, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.008575284853577614, "rewards/margins": 0.001817143289372325, "rewards/rejected": -0.010392428375780582, "step": 190 }, { "epoch": 0.418848167539267, "grad_norm": 9.175705909729004, "learning_rate": 3.621088951385353e-07, "log_odds_chosen": 0.33279526233673096, "log_odds_ratio": -0.6604114770889282, "logits/chosen": -0.5108372569084167, "logits/rejected": -0.5279550552368164, "logps/chosen": -0.9044734835624695, "logps/rejected": -1.117700219154358, "loss": 5.2395, "nll_loss": 1.313854455947876, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.009044734761118889, "rewards/margins": 0.0021322660613805056, "rewards/rejected": -0.01117700058966875, "step": 200 }, { "epoch": 0.418848167539267, "eval_log_odds_chosen": 0.28363677859306335, "eval_log_odds_ratio": -0.6567826271057129, "eval_logits/chosen": -0.5067907571792603, "eval_logits/rejected": -0.493470162153244, "eval_logps/chosen": -0.8826000690460205, "eval_logps/rejected": -1.067192792892456, "eval_loss": 1.264377236366272, "eval_nll_loss": 1.2560975551605225, "eval_rewards/accuracies": 0.600806474685669, "eval_rewards/chosen": -0.008825999684631824, "eval_rewards/margins": 0.0018459270941093564, "eval_rewards/rejected": -0.010671926662325859, "eval_runtime": 45.0179, "eval_samples_per_second": 44.427, "eval_steps_per_second": 1.399, "step": 200 }, { "epoch": 0.4397905759162304, "grad_norm": 10.455543518066406, "learning_rate": 3.454593922550693e-07, "log_odds_chosen": 0.26851850748062134, "log_odds_ratio": -0.6904939413070679, "logits/chosen": -0.4954306483268738, "logits/rejected": -0.4978114068508148, "logps/chosen": -0.8825132250785828, "logps/rejected": -1.0579698085784912, "loss": 5.316, "nll_loss": 1.309258222579956, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.008825132623314857, "rewards/margins": 0.0017545666778460145, "rewards/rejected": -0.010579698719084263, "step": 210 }, { "epoch": 0.4607329842931937, "grad_norm": 10.91032886505127, "learning_rate": 3.2829819606729477e-07, "log_odds_chosen": 0.25027215480804443, "log_odds_ratio": -0.6841479539871216, "logits/chosen": -0.5188068151473999, "logits/rejected": -0.504177451133728, "logps/chosen": -0.9375869035720825, "logps/rejected": -1.0952080488204956, "loss": 5.2055, "nll_loss": 1.3260114192962646, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.009375869296491146, "rewards/margins": 0.0015762097900733352, "rewards/rejected": -0.01095207966864109, "step": 220 }, { "epoch": 0.4816753926701571, "grad_norm": 13.497580528259277, "learning_rate": 3.1071729615293424e-07, "log_odds_chosen": 0.18811996281147003, "log_odds_ratio": -0.7035666704177856, "logits/chosen": -0.5260552763938904, "logits/rejected": -0.510788083076477, "logps/chosen": -0.9514686465263367, "logps/rejected": -1.060083031654358, "loss": 5.1571, "nll_loss": 1.2953994274139404, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.009514686651527882, "rewards/margins": 0.0010861435439437628, "rewards/rejected": -0.010600829496979713, "step": 230 }, { "epoch": 0.5026178010471204, "grad_norm": 11.768831253051758, "learning_rate": 2.9281093183781403e-07, "log_odds_chosen": 0.32460492849349976, "log_odds_ratio": -0.6687750816345215, "logits/chosen": -0.46817174553871155, "logits/rejected": -0.4911385476589203, "logps/chosen": -0.8718591928482056, "logps/rejected": -1.053980827331543, "loss": 5.1916, "nll_loss": 1.2509262561798096, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.00871859211474657, "rewards/margins": 0.001821216195821762, "rewards/rejected": -0.010539808310568333, "step": 240 }, { "epoch": 0.5235602094240838, "grad_norm": 13.659006118774414, "learning_rate": 2.7467508704251135e-07, "log_odds_chosen": 0.2745344638824463, "log_odds_ratio": -0.6745666861534119, "logits/chosen": -0.5158644914627075, "logits/rejected": -0.5093538165092468, "logps/chosen": -0.8866285085678101, "logps/rejected": -1.0590866804122925, "loss": 5.1928, "nll_loss": 1.305888295173645, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.008866284973919392, "rewards/margins": 0.00172458216547966, "rewards/rejected": -0.010590866208076477, "step": 250 }, { "epoch": 0.5445026178010471, "grad_norm": 14.492527961730957, "learning_rate": 2.5640697577740815e-07, "log_odds_chosen": 0.26566964387893677, "log_odds_ratio": -0.6708006858825684, "logits/chosen": -0.5190873146057129, "logits/rejected": -0.5259062051773071, "logps/chosen": -0.8882888555526733, "logps/rejected": -1.0456405878067017, "loss": 5.1412, "nll_loss": 1.265944004058838, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.008882888592779636, "rewards/margins": 0.0015735173365101218, "rewards/rejected": -0.010456404648721218, "step": 260 }, { "epoch": 0.5654450261780105, "grad_norm": 15.814299583435059, "learning_rate": 2.381045210440644e-07, "log_odds_chosen": 0.23130980134010315, "log_odds_ratio": -0.7191926836967468, "logits/chosen": -0.5003880262374878, "logits/rejected": -0.48836550116539, "logps/chosen": -0.8919955492019653, "logps/rejected": -1.045290470123291, "loss": 5.026, "nll_loss": 1.2595463991165161, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.008919955231249332, "rewards/margins": 0.0015329491579905152, "rewards/rejected": -0.010452903807163239, "step": 270 }, { "epoch": 0.5863874345549738, "grad_norm": 16.75997543334961, "learning_rate": 2.1986582993616925e-07, "log_odds_chosen": 0.3688739538192749, "log_odds_ratio": -0.6330865621566772, "logits/chosen": -0.5016785860061646, "logits/rejected": -0.5119304656982422, "logps/chosen": -0.8641098141670227, "logps/rejected": -1.1013071537017822, "loss": 5.0293, "nll_loss": 1.2116810083389282, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.008641098625957966, "rewards/margins": 0.002371972892433405, "rewards/rejected": -0.011013071052730083, "step": 280 }, { "epoch": 0.6073298429319371, "grad_norm": 17.08208465576172, "learning_rate": 2.0178866775369774e-07, "log_odds_chosen": 0.2176806479692459, "log_odds_ratio": -0.7151871919631958, "logits/chosen": -0.49786868691444397, "logits/rejected": -0.5026464462280273, "logps/chosen": -0.9162432551383972, "logps/rejected": -1.068025827407837, "loss": 5.0736, "nll_loss": 1.2801578044891357, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.00916243251413107, "rewards/margins": 0.001517825061455369, "rewards/rejected": -0.01068025641143322, "step": 290 }, { "epoch": 0.6282722513089005, "grad_norm": 19.949918746948242, "learning_rate": 1.839699339491937e-07, "log_odds_chosen": 0.18358822166919708, "log_odds_ratio": -0.6940725445747375, "logits/chosen": -0.5160781145095825, "logits/rejected": -0.5093048810958862, "logps/chosen": -0.9349120855331421, "logps/rejected": -1.053973913192749, "loss": 4.9283, "nll_loss": 1.2697252035140991, "rewards/accuracies": 0.5625, "rewards/chosen": -0.00934912171214819, "rewards/margins": 0.0011906183790415525, "rewards/rejected": -0.010539740324020386, "step": 300 }, { "epoch": 0.6492146596858639, "grad_norm": 18.484830856323242, "learning_rate": 1.6650514271527465e-07, "log_odds_chosen": 0.20549292862415314, "log_odds_ratio": -0.7036994695663452, "logits/chosen": -0.5009588599205017, "logits/rejected": -0.48482465744018555, "logps/chosen": -0.8928836584091187, "logps/rejected": -1.007062554359436, "loss": 4.9485, "nll_loss": 1.2219517230987549, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.00892883725464344, "rewards/margins": 0.0011417880887165666, "rewards/rejected": -0.010070623829960823, "step": 310 }, { "epoch": 0.6701570680628273, "grad_norm": 15.437707901000977, "learning_rate": 1.4948791099758052e-07, "log_odds_chosen": 0.2843065857887268, "log_odds_ratio": -0.6758512258529663, "logits/chosen": -0.5303715467453003, "logits/rejected": -0.530095636844635, "logps/chosen": -0.920501708984375, "logps/rejected": -1.1022217273712158, "loss": 5.0943, "nll_loss": 1.2833216190338135, "rewards/accuracies": 0.5625, "rewards/chosen": -0.009205018170177937, "rewards/margins": 0.00181719905231148, "rewards/rejected": -0.011022215709090233, "step": 320 }, { "epoch": 0.6910994764397905, "grad_norm": 15.229772567749023, "learning_rate": 1.3300945667758012e-07, "log_odds_chosen": 0.3193449079990387, "log_odds_ratio": -0.6650776267051697, "logits/chosen": -0.520767092704773, "logits/rejected": -0.5018462538719177, "logps/chosen": -0.9000827074050903, "logps/rejected": -1.0957286357879639, "loss": 5.1075, "nll_loss": 1.2375710010528564, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.009000827558338642, "rewards/margins": 0.001956457272171974, "rewards/rejected": -0.010957283899188042, "step": 330 }, { "epoch": 0.7120418848167539, "grad_norm": 12.496045112609863, "learning_rate": 1.1715810961514072e-07, "log_odds_chosen": 0.3133091330528259, "log_odds_ratio": -0.6810437440872192, "logits/chosen": -0.5330361127853394, "logits/rejected": -0.5395983457565308, "logps/chosen": -0.9298146963119507, "logps/rejected": -1.1273194551467896, "loss": 5.1375, "nll_loss": 1.2518556118011475, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.00929814763367176, "rewards/margins": 0.0019750469364225864, "rewards/rejected": -0.011273193173110485, "step": 340 }, { "epoch": 0.7329842931937173, "grad_norm": 12.056753158569336, "learning_rate": 1.0201883817182949e-07, "log_odds_chosen": 0.2855184078216553, "log_odds_ratio": -0.6681550741195679, "logits/chosen": -0.5056412220001221, "logits/rejected": -0.5043959021568298, "logps/chosen": -0.9035753011703491, "logps/rejected": -1.097712516784668, "loss": 5.0573, "nll_loss": 1.2399346828460693, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.009035754017531872, "rewards/margins": 0.0019413719419389963, "rewards/rejected": -0.010977125726640224, "step": 350 }, { "epoch": 0.7539267015706806, "grad_norm": 10.219555854797363, "learning_rate": 8.76727937529367e-08, "log_odds_chosen": 0.2616792321205139, "log_odds_ratio": -0.6761180758476257, "logits/chosen": -0.5021784901618958, "logits/rejected": -0.5104162693023682, "logps/chosen": -0.8949702978134155, "logps/rejected": -1.065882682800293, "loss": 5.0549, "nll_loss": 1.2354400157928467, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0089497035369277, "rewards/margins": 0.0017091237241402268, "rewards/rejected": -0.010658827610313892, "step": 360 }, { "epoch": 0.774869109947644, "grad_norm": 9.729683876037598, "learning_rate": 7.419687580962222e-08, "log_odds_chosen": 0.3590267598628998, "log_odds_ratio": -0.6520247459411621, "logits/chosen": -0.528827428817749, "logits/rejected": -0.5364798903465271, "logps/chosen": -0.8705722689628601, "logps/rejected": -1.088714838027954, "loss": 5.0822, "nll_loss": 1.28533935546875, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.00870572216808796, "rewards/margins": 0.002181424992159009, "rewards/rejected": -0.01088714599609375, "step": 370 }, { "epoch": 0.7958115183246073, "grad_norm": 8.4192533493042, "learning_rate": 6.166331963291519e-08, "log_odds_chosen": 0.16738846898078918, "log_odds_ratio": -0.7240005135536194, "logits/chosen": -0.4927976131439209, "logits/rejected": -0.48198264837265015, "logps/chosen": -0.8879655599594116, "logps/rejected": -1.0060501098632812, "loss": 5.079, "nll_loss": 1.2799310684204102, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.008879655972123146, "rewards/margins": 0.0011808457784354687, "rewards/rejected": -0.010060502216219902, "step": 380 }, { "epoch": 0.8167539267015707, "grad_norm": 8.200337409973145, "learning_rate": 5.013930914912476e-08, "log_odds_chosen": 0.22408561408519745, "log_odds_ratio": -0.6869050860404968, "logits/chosen": -0.5221595764160156, "logits/rejected": -0.5140419602394104, "logps/chosen": -0.8862568140029907, "logps/rejected": -1.033087134361267, "loss": 4.9446, "nll_loss": 1.249403715133667, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.008862568065524101, "rewards/margins": 0.0014683037297800183, "rewards/rejected": -0.010330872610211372, "step": 390 }, { "epoch": 0.837696335078534, "grad_norm": 8.371641159057617, "learning_rate": 3.968661679220467e-08, "log_odds_chosen": 0.2015990912914276, "log_odds_ratio": -0.7086056470870972, "logits/chosen": -0.5080100893974304, "logits/rejected": -0.5066760182380676, "logps/chosen": -0.8951946496963501, "logps/rejected": -1.034939169883728, "loss": 4.9046, "nll_loss": 1.2675601243972778, "rewards/accuracies": 0.546875, "rewards/chosen": -0.008951946161687374, "rewards/margins": 0.0013974455650895834, "rewards/rejected": -0.010349391028285027, "step": 400 }, { "epoch": 0.837696335078534, "eval_log_odds_chosen": 0.2856575548648834, "eval_log_odds_ratio": -0.6565335392951965, "eval_logits/chosen": -0.5022188425064087, "eval_logits/rejected": -0.48727092146873474, "eval_logps/chosen": -0.8758360147476196, "eval_logps/rejected": -1.060706615447998, "eval_loss": 1.224434733390808, "eval_nll_loss": 1.217404842376709, "eval_rewards/accuracies": 0.6028226017951965, "eval_rewards/chosen": -0.008758360520005226, "eval_rewards/margins": 0.0018487058114260435, "eval_rewards/rejected": -0.010607065632939339, "eval_runtime": 44.2456, "eval_samples_per_second": 45.202, "eval_steps_per_second": 1.424, "step": 400 }, { "epoch": 0.8586387434554974, "grad_norm": 7.9049577713012695, "learning_rate": 3.036127238347164e-08, "log_odds_chosen": 0.2682987153530121, "log_odds_ratio": -0.6803867816925049, "logits/chosen": -0.5115852355957031, "logits/rejected": -0.4977447986602783, "logps/chosen": -0.8711401224136353, "logps/rejected": -1.0264394283294678, "loss": 4.9707, "nll_loss": 1.2559094429016113, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.00871140044182539, "rewards/margins": 0.001552992849610746, "rewards/rejected": -0.010264393873512745, "step": 410 }, { "epoch": 0.8795811518324608, "grad_norm": 8.560924530029297, "learning_rate": 2.2213262793589482e-08, "log_odds_chosen": 0.3053427040576935, "log_odds_ratio": -0.6574885845184326, "logits/chosen": -0.4770349860191345, "logits/rejected": -0.47626155614852905, "logps/chosen": -0.8793102502822876, "logps/rejected": -1.0621235370635986, "loss": 4.9777, "nll_loss": 1.1874935626983643, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.008793102577328682, "rewards/margins": 0.0018281324300915003, "rewards/rejected": -0.010621235705912113, "step": 420 }, { "epoch": 0.900523560209424, "grad_norm": 7.4246320724487305, "learning_rate": 1.5286263996730026e-08, "log_odds_chosen": 0.2607780992984772, "log_odds_ratio": -0.698715329170227, "logits/chosen": -0.5007373094558716, "logits/rejected": -0.5147266387939453, "logps/chosen": -0.8995779156684875, "logps/rejected": -1.0742470026016235, "loss": 4.9582, "nll_loss": 1.2430001497268677, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.008995778858661652, "rewards/margins": 0.0017466908320784569, "rewards/rejected": -0.010742468759417534, "step": 430 }, { "epoch": 0.9214659685863874, "grad_norm": 7.529422283172607, "learning_rate": 9.617406953185136e-09, "log_odds_chosen": 0.341641902923584, "log_odds_ratio": -0.6579716801643372, "logits/chosen": -0.505264937877655, "logits/rejected": -0.5192712545394897, "logps/chosen": -0.8687723875045776, "logps/rejected": -1.0708019733428955, "loss": 5.1002, "nll_loss": 1.2634742259979248, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.008687724359333515, "rewards/margins": 0.0020202945452183485, "rewards/rejected": -0.010708019137382507, "step": 440 }, { "epoch": 0.9424083769633508, "grad_norm": 7.834778785705566, "learning_rate": 5.2370785753763356e-09, "log_odds_chosen": 0.22981591522693634, "log_odds_ratio": -0.6861775517463684, "logits/chosen": -0.5010178089141846, "logits/rejected": -0.5019730925559998, "logps/chosen": -0.9211832284927368, "logps/rejected": -1.0682731866836548, "loss": 4.9999, "nll_loss": 1.2437247037887573, "rewards/accuracies": 0.5625, "rewards/chosen": -0.009211831726133823, "rewards/margins": 0.0014708999078720808, "rewards/rejected": -0.010682731866836548, "step": 450 }, { "epoch": 0.9633507853403142, "grad_norm": 8.3234224319458, "learning_rate": 2.168758844148272e-09, "log_odds_chosen": 0.23462414741516113, "log_odds_ratio": -0.6922942996025085, "logits/chosen": -0.49100571870803833, "logits/rejected": -0.49131718277931213, "logps/chosen": -0.9402503967285156, "logps/rejected": -1.0789129734039307, "loss": 4.9825, "nll_loss": 1.2720887660980225, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.009402502328157425, "rewards/margins": 0.001386628020554781, "rewards/rejected": -0.010789131745696068, "step": 460 }, { "epoch": 0.9842931937172775, "grad_norm": 8.285514831542969, "learning_rate": 4.288949484559934e-10, "log_odds_chosen": 0.2851186990737915, "log_odds_ratio": -0.6771829128265381, "logits/chosen": -0.5137313008308411, "logits/rejected": -0.5017358064651489, "logps/chosen": -0.8824328184127808, "logps/rejected": -1.0701076984405518, "loss": 5.0037, "nll_loss": 1.2345225811004639, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.008824328891932964, "rewards/margins": 0.0018767483998090029, "rewards/rejected": -0.010701077058911324, "step": 470 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 5.24161350602124, "train_runtime": 5082.9537, "train_samples_per_second": 12.027, "train_steps_per_second": 0.094 } ], "logging_steps": 10, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }