Files
llama-3-8b-base-orpo-ultraf…/trainer_state.json
ModelHub XC f172e34488 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/llama-3-8b-base-orpo-ultrafeedback-4xh200-rerun
Source: Original Platform
2026-05-09 20:44:31 +08:00

946 lines
34 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989528795811519,
"eval_steps": 200,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0020942408376963353,
"grad_norm": 21.158132553100586,
"learning_rate": 0.0,
"log_odds_chosen": 0.06687486171722412,
"log_odds_ratio": -0.7179359793663025,
"logits/chosen": -0.6003108024597168,
"logits/rejected": -0.5959680080413818,
"logps/chosen": -1.3004443645477295,
"logps/rejected": -1.3789687156677246,
"loss": 6.0476,
"nll_loss": 1.4909255504608154,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.013004443608224392,
"rewards/margins": 0.0007852441049180925,
"rewards/rejected": -0.013789687305688858,
"step": 1
},
{
"epoch": 0.020942408376963352,
"grad_norm": 22.246910095214844,
"learning_rate": 9.375e-08,
"log_odds_chosen": 0.24125711619853973,
"log_odds_ratio": -0.6763277053833008,
"logits/chosen": -0.680765688419342,
"logits/rejected": -0.6712806224822998,
"logps/chosen": -1.1191811561584473,
"logps/rejected": -1.29719877243042,
"loss": 6.0187,
"nll_loss": 1.5037554502487183,
"rewards/accuracies": 0.5763888955116272,
"rewards/chosen": -0.01119181141257286,
"rewards/margins": 0.0017801759531721473,
"rewards/rejected": -0.012971988879144192,
"step": 10
},
{
"epoch": 0.041884816753926704,
"grad_norm": 21.124698638916016,
"learning_rate": 1.9791666666666664e-07,
"log_odds_chosen": 0.24139347672462463,
"log_odds_ratio": -0.6791239976882935,
"logits/chosen": -0.6397651433944702,
"logits/rejected": -0.6568408608436584,
"logps/chosen": -1.1540690660476685,
"logps/rejected": -1.3257322311401367,
"loss": 5.8777,
"nll_loss": 1.4614884853363037,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.011540691368281841,
"rewards/margins": 0.0017166311154142022,
"rewards/rejected": -0.013257321901619434,
"step": 20
},
{
"epoch": 0.06282722513089005,
"grad_norm": 19.380123138427734,
"learning_rate": 3.020833333333333e-07,
"log_odds_chosen": 0.15998375415802002,
"log_odds_ratio": -0.715591549873352,
"logits/chosen": -0.6493812799453735,
"logits/rejected": -0.6421241164207458,
"logps/chosen": -1.0813921689987183,
"logps/rejected": -1.2045493125915527,
"loss": 5.9475,
"nll_loss": 1.485296607017517,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.010813921689987183,
"rewards/margins": 0.0012315715430304408,
"rewards/rejected": -0.01204549241811037,
"step": 30
},
{
"epoch": 0.08376963350785341,
"grad_norm": 16.27351188659668,
"learning_rate": 4.0625e-07,
"log_odds_chosen": 0.18812714517116547,
"log_odds_ratio": -0.7328466176986694,
"logits/chosen": -0.63264399766922,
"logits/rejected": -0.645917534828186,
"logps/chosen": -1.008213758468628,
"logps/rejected": -1.1435163021087646,
"loss": 5.7467,
"nll_loss": 1.3937628269195557,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.010082137770950794,
"rewards/margins": 0.0013530252035707235,
"rewards/rejected": -0.011435163207352161,
"step": 40
},
{
"epoch": 0.10471204188481675,
"grad_norm": 14.714133262634277,
"learning_rate": 4.999932966293553e-07,
"log_odds_chosen": 0.18715843558311462,
"log_odds_ratio": -0.7093220949172974,
"logits/chosen": -0.6608070135116577,
"logits/rejected": -0.6796087026596069,
"logps/chosen": -0.9801030158996582,
"logps/rejected": -1.10386061668396,
"loss": 5.7231,
"nll_loss": 1.4938442707061768,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.009801030158996582,
"rewards/margins": 0.0012375751975923777,
"rewards/rejected": -0.011038605123758316,
"step": 50
},
{
"epoch": 0.1256544502617801,
"grad_norm": 15.548501014709473,
"learning_rate": 4.991893270335525e-07,
"log_odds_chosen": 0.13172771036624908,
"log_odds_ratio": -0.7336539030075073,
"logits/chosen": -0.6106709241867065,
"logits/rejected": -0.620617687702179,
"logps/chosen": -1.0109319686889648,
"logps/rejected": -1.118139624595642,
"loss": 5.7263,
"nll_loss": 1.4132804870605469,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.010109319351613522,
"rewards/margins": 0.0010720762657001615,
"rewards/rejected": -0.011181396432220936,
"step": 60
},
{
"epoch": 0.14659685863874344,
"grad_norm": 11.627691268920898,
"learning_rate": 4.970496218214204e-07,
"log_odds_chosen": 0.2610163390636444,
"log_odds_ratio": -0.6738774180412292,
"logits/chosen": -0.585883378982544,
"logits/rejected": -0.5932791829109192,
"logps/chosen": -0.9574364423751831,
"logps/rejected": -1.1436620950698853,
"loss": 5.5467,
"nll_loss": 1.4156994819641113,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.009574364870786667,
"rewards/margins": 0.0018622549250721931,
"rewards/rejected": -0.011436618864536285,
"step": 70
},
{
"epoch": 0.16753926701570682,
"grad_norm": 10.594218254089355,
"learning_rate": 4.935856505068998e-07,
"log_odds_chosen": 0.2706550061702728,
"log_odds_ratio": -0.6398797035217285,
"logits/chosen": -0.5370240211486816,
"logits/rejected": -0.5456986427307129,
"logps/chosen": -0.9297056198120117,
"logps/rejected": -1.09745454788208,
"loss": 5.4389,
"nll_loss": 1.3115617036819458,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.009297055192291737,
"rewards/margins": 0.0016774894902482629,
"rewards/rejected": -0.010974545031785965,
"step": 80
},
{
"epoch": 0.18848167539267016,
"grad_norm": 9.036905288696289,
"learning_rate": 4.8881598109976e-07,
"log_odds_chosen": 0.22219958901405334,
"log_odds_ratio": -0.678338348865509,
"logits/chosen": -0.5145021677017212,
"logits/rejected": -0.5317824482917786,
"logps/chosen": -0.9497036933898926,
"logps/rejected": -1.101233959197998,
"loss": 5.4486,
"nll_loss": 1.3097865581512451,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.009497037157416344,
"rewards/margins": 0.0015153035055845976,
"rewards/rejected": -0.01101233996450901,
"step": 90
},
{
"epoch": 0.2094240837696335,
"grad_norm": 8.396464347839355,
"learning_rate": 4.827661805750437e-07,
"log_odds_chosen": 0.29178586602211,
"log_odds_ratio": -0.6671258211135864,
"logits/chosen": -0.4934987425804138,
"logits/rejected": -0.515052855014801,
"logps/chosen": -0.9019988179206848,
"logps/rejected": -1.0930007696151733,
"loss": 5.3924,
"nll_loss": 1.2699394226074219,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": -0.009019988588988781,
"rewards/margins": 0.0019100181525573134,
"rewards/rejected": -0.010930007323622704,
"step": 100
},
{
"epoch": 0.23036649214659685,
"grad_norm": 8.834160804748535,
"learning_rate": 4.75468677825789e-07,
"log_odds_chosen": 0.37880703806877136,
"log_odds_ratio": -0.6369461417198181,
"logits/chosen": -0.4875544607639313,
"logits/rejected": -0.5104657411575317,
"logps/chosen": -0.8797906637191772,
"logps/rejected": -1.1241319179534912,
"loss": 5.3743,
"nll_loss": 1.3109803199768066,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.008797907270491123,
"rewards/margins": 0.002443410689011216,
"rewards/rejected": -0.01124131865799427,
"step": 110
},
{
"epoch": 0.2513089005235602,
"grad_norm": 7.755215644836426,
"learning_rate": 4.669625898336438e-07,
"log_odds_chosen": 0.191465824842453,
"log_odds_ratio": -0.7049621343612671,
"logits/chosen": -0.5096135139465332,
"logits/rejected": -0.51964271068573,
"logps/chosen": -0.9437309503555298,
"logps/rejected": -1.0870112180709839,
"loss": 5.2767,
"nll_loss": 1.3031724691390991,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.00943730864673853,
"rewards/margins": 0.001432802644558251,
"rewards/rejected": -0.010870112106204033,
"step": 120
},
{
"epoch": 0.27225130890052357,
"grad_norm": 8.79680347442627,
"learning_rate": 4.5729351198915705e-07,
"log_odds_chosen": 0.10129977762699127,
"log_odds_ratio": -0.7444788813591003,
"logits/chosen": -0.5232862234115601,
"logits/rejected": -0.5057969093322754,
"logps/chosen": -0.9433780908584595,
"logps/rejected": -1.0080385208129883,
"loss": 5.3749,
"nll_loss": 1.321378469467163,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.009433778934180737,
"rewards/margins": 0.0006466054474003613,
"rewards/rejected": -0.010080385021865368,
"step": 130
},
{
"epoch": 0.2931937172774869,
"grad_norm": 8.298604965209961,
"learning_rate": 4.4651327368569684e-07,
"log_odds_chosen": 0.15155552327632904,
"log_odds_ratio": -0.7214217185974121,
"logits/chosen": -0.526405930519104,
"logits/rejected": -0.5193445086479187,
"logps/chosen": -0.9357401728630066,
"logps/rejected": -1.0443140268325806,
"loss": 5.3026,
"nll_loss": 1.3262240886688232,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.009357400238513947,
"rewards/margins": 0.0010857387678697705,
"rewards/rejected": -0.010443138889968395,
"step": 140
},
{
"epoch": 0.31413612565445026,
"grad_norm": 8.639591217041016,
"learning_rate": 4.346796604970912e-07,
"log_odds_chosen": 0.31407707929611206,
"log_odds_ratio": -0.6722251772880554,
"logits/chosen": -0.5174251198768616,
"logits/rejected": -0.5082064867019653,
"logps/chosen": -0.8792362213134766,
"logps/rejected": -1.0834535360336304,
"loss": 5.2925,
"nll_loss": 1.304610252380371,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.008792361244559288,
"rewards/margins": 0.0020421738736331463,
"rewards/rejected": -0.010834535583853722,
"step": 150
},
{
"epoch": 0.33507853403141363,
"grad_norm": 8.277928352355957,
"learning_rate": 4.218561044282098e-07,
"log_odds_chosen": 0.2669292092323303,
"log_odds_ratio": -0.6675515174865723,
"logits/chosen": -0.5230351686477661,
"logits/rejected": -0.5290777087211609,
"logps/chosen": -0.9324489831924438,
"logps/rejected": -1.1104564666748047,
"loss": 5.3537,
"nll_loss": 1.3643314838409424,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.009324489161372185,
"rewards/margins": 0.0017800761852413416,
"rewards/rejected": -0.011104565113782883,
"step": 160
},
{
"epoch": 0.35602094240837695,
"grad_norm": 8.850061416625977,
"learning_rate": 4.081113438988443e-07,
"log_odds_chosen": 0.2744578719139099,
"log_odds_ratio": -0.6702545881271362,
"logits/chosen": -0.49922794103622437,
"logits/rejected": -0.5103174448013306,
"logps/chosen": -0.9047120809555054,
"logps/rejected": -1.0733238458633423,
"loss": 5.2741,
"nll_loss": 1.3267205953598022,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": -0.009047120809555054,
"rewards/margins": 0.0016861179610714316,
"rewards/rejected": -0.010733239352703094,
"step": 170
},
{
"epoch": 0.3769633507853403,
"grad_norm": 8.881376266479492,
"learning_rate": 3.935190552834828e-07,
"log_odds_chosen": 0.23824062943458557,
"log_odds_ratio": -0.6932843923568726,
"logits/chosen": -0.4861333966255188,
"logits/rejected": -0.5041022896766663,
"logps/chosen": -0.8888905644416809,
"logps/rejected": -1.0255122184753418,
"loss": 5.259,
"nll_loss": 1.3062529563903809,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.008888904936611652,
"rewards/margins": 0.0013662164565175772,
"rewards/rejected": -0.010255122557282448,
"step": 180
},
{
"epoch": 0.39790575916230364,
"grad_norm": 9.558187484741211,
"learning_rate": 3.781574579820464e-07,
"log_odds_chosen": 0.3100183606147766,
"log_odds_ratio": -0.6493682265281677,
"logits/chosen": -0.5123935341835022,
"logits/rejected": -0.5036609768867493,
"logps/chosen": -0.8575283885002136,
"logps/rejected": -1.0392427444458008,
"loss": 5.265,
"nll_loss": 1.317764163017273,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.008575284853577614,
"rewards/margins": 0.001817143289372325,
"rewards/rejected": -0.010392428375780582,
"step": 190
},
{
"epoch": 0.418848167539267,
"grad_norm": 9.175705909729004,
"learning_rate": 3.621088951385353e-07,
"log_odds_chosen": 0.33279526233673096,
"log_odds_ratio": -0.6604114770889282,
"logits/chosen": -0.5108372569084167,
"logits/rejected": -0.5279550552368164,
"logps/chosen": -0.9044734835624695,
"logps/rejected": -1.117700219154358,
"loss": 5.2395,
"nll_loss": 1.313854455947876,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.009044734761118889,
"rewards/margins": 0.0021322660613805056,
"rewards/rejected": -0.01117700058966875,
"step": 200
},
{
"epoch": 0.418848167539267,
"eval_log_odds_chosen": 0.28363677859306335,
"eval_log_odds_ratio": -0.6567826271057129,
"eval_logits/chosen": -0.5067907571792603,
"eval_logits/rejected": -0.493470162153244,
"eval_logps/chosen": -0.8826000690460205,
"eval_logps/rejected": -1.067192792892456,
"eval_loss": 1.264377236366272,
"eval_nll_loss": 1.2560975551605225,
"eval_rewards/accuracies": 0.600806474685669,
"eval_rewards/chosen": -0.008825999684631824,
"eval_rewards/margins": 0.0018459270941093564,
"eval_rewards/rejected": -0.010671926662325859,
"eval_runtime": 45.0179,
"eval_samples_per_second": 44.427,
"eval_steps_per_second": 1.399,
"step": 200
},
{
"epoch": 0.4397905759162304,
"grad_norm": 10.455543518066406,
"learning_rate": 3.454593922550693e-07,
"log_odds_chosen": 0.26851850748062134,
"log_odds_ratio": -0.6904939413070679,
"logits/chosen": -0.4954306483268738,
"logits/rejected": -0.4978114068508148,
"logps/chosen": -0.8825132250785828,
"logps/rejected": -1.0579698085784912,
"loss": 5.316,
"nll_loss": 1.309258222579956,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.008825132623314857,
"rewards/margins": 0.0017545666778460145,
"rewards/rejected": -0.010579698719084263,
"step": 210
},
{
"epoch": 0.4607329842931937,
"grad_norm": 10.91032886505127,
"learning_rate": 3.2829819606729477e-07,
"log_odds_chosen": 0.25027215480804443,
"log_odds_ratio": -0.6841479539871216,
"logits/chosen": -0.5188068151473999,
"logits/rejected": -0.504177451133728,
"logps/chosen": -0.9375869035720825,
"logps/rejected": -1.0952080488204956,
"loss": 5.2055,
"nll_loss": 1.3260114192962646,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.009375869296491146,
"rewards/margins": 0.0015762097900733352,
"rewards/rejected": -0.01095207966864109,
"step": 220
},
{
"epoch": 0.4816753926701571,
"grad_norm": 13.497580528259277,
"learning_rate": 3.1071729615293424e-07,
"log_odds_chosen": 0.18811996281147003,
"log_odds_ratio": -0.7035666704177856,
"logits/chosen": -0.5260552763938904,
"logits/rejected": -0.510788083076477,
"logps/chosen": -0.9514686465263367,
"logps/rejected": -1.060083031654358,
"loss": 5.1571,
"nll_loss": 1.2953994274139404,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.009514686651527882,
"rewards/margins": 0.0010861435439437628,
"rewards/rejected": -0.010600829496979713,
"step": 230
},
{
"epoch": 0.5026178010471204,
"grad_norm": 11.768831253051758,
"learning_rate": 2.9281093183781403e-07,
"log_odds_chosen": 0.32460492849349976,
"log_odds_ratio": -0.6687750816345215,
"logits/chosen": -0.46817174553871155,
"logits/rejected": -0.4911385476589203,
"logps/chosen": -0.8718591928482056,
"logps/rejected": -1.053980827331543,
"loss": 5.1916,
"nll_loss": 1.2509262561798096,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.00871859211474657,
"rewards/margins": 0.001821216195821762,
"rewards/rejected": -0.010539808310568333,
"step": 240
},
{
"epoch": 0.5235602094240838,
"grad_norm": 13.659006118774414,
"learning_rate": 2.7467508704251135e-07,
"log_odds_chosen": 0.2745344638824463,
"log_odds_ratio": -0.6745666861534119,
"logits/chosen": -0.5158644914627075,
"logits/rejected": -0.5093538165092468,
"logps/chosen": -0.8866285085678101,
"logps/rejected": -1.0590866804122925,
"loss": 5.1928,
"nll_loss": 1.305888295173645,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.008866284973919392,
"rewards/margins": 0.00172458216547966,
"rewards/rejected": -0.010590866208076477,
"step": 250
},
{
"epoch": 0.5445026178010471,
"grad_norm": 14.492527961730957,
"learning_rate": 2.5640697577740815e-07,
"log_odds_chosen": 0.26566964387893677,
"log_odds_ratio": -0.6708006858825684,
"logits/chosen": -0.5190873146057129,
"logits/rejected": -0.5259062051773071,
"logps/chosen": -0.8882888555526733,
"logps/rejected": -1.0456405878067017,
"loss": 5.1412,
"nll_loss": 1.265944004058838,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.008882888592779636,
"rewards/margins": 0.0015735173365101218,
"rewards/rejected": -0.010456404648721218,
"step": 260
},
{
"epoch": 0.5654450261780105,
"grad_norm": 15.814299583435059,
"learning_rate": 2.381045210440644e-07,
"log_odds_chosen": 0.23130980134010315,
"log_odds_ratio": -0.7191926836967468,
"logits/chosen": -0.5003880262374878,
"logits/rejected": -0.48836550116539,
"logps/chosen": -0.8919955492019653,
"logps/rejected": -1.045290470123291,
"loss": 5.026,
"nll_loss": 1.2595463991165161,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": -0.008919955231249332,
"rewards/margins": 0.0015329491579905152,
"rewards/rejected": -0.010452903807163239,
"step": 270
},
{
"epoch": 0.5863874345549738,
"grad_norm": 16.75997543334961,
"learning_rate": 2.1986582993616925e-07,
"log_odds_chosen": 0.3688739538192749,
"log_odds_ratio": -0.6330865621566772,
"logits/chosen": -0.5016785860061646,
"logits/rejected": -0.5119304656982422,
"logps/chosen": -0.8641098141670227,
"logps/rejected": -1.1013071537017822,
"loss": 5.0293,
"nll_loss": 1.2116810083389282,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.008641098625957966,
"rewards/margins": 0.002371972892433405,
"rewards/rejected": -0.011013071052730083,
"step": 280
},
{
"epoch": 0.6073298429319371,
"grad_norm": 17.08208465576172,
"learning_rate": 2.0178866775369774e-07,
"log_odds_chosen": 0.2176806479692459,
"log_odds_ratio": -0.7151871919631958,
"logits/chosen": -0.49786868691444397,
"logits/rejected": -0.5026464462280273,
"logps/chosen": -0.9162432551383972,
"logps/rejected": -1.068025827407837,
"loss": 5.0736,
"nll_loss": 1.2801578044891357,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.00916243251413107,
"rewards/margins": 0.001517825061455369,
"rewards/rejected": -0.01068025641143322,
"step": 290
},
{
"epoch": 0.6282722513089005,
"grad_norm": 19.949918746948242,
"learning_rate": 1.839699339491937e-07,
"log_odds_chosen": 0.18358822166919708,
"log_odds_ratio": -0.6940725445747375,
"logits/chosen": -0.5160781145095825,
"logits/rejected": -0.5093048810958862,
"logps/chosen": -0.9349120855331421,
"logps/rejected": -1.053973913192749,
"loss": 4.9283,
"nll_loss": 1.2697252035140991,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.00934912171214819,
"rewards/margins": 0.0011906183790415525,
"rewards/rejected": -0.010539740324020386,
"step": 300
},
{
"epoch": 0.6492146596858639,
"grad_norm": 18.484830856323242,
"learning_rate": 1.6650514271527465e-07,
"log_odds_chosen": 0.20549292862415314,
"log_odds_ratio": -0.7036994695663452,
"logits/chosen": -0.5009588599205017,
"logits/rejected": -0.48482465744018555,
"logps/chosen": -0.8928836584091187,
"logps/rejected": -1.007062554359436,
"loss": 4.9485,
"nll_loss": 1.2219517230987549,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.00892883725464344,
"rewards/margins": 0.0011417880887165666,
"rewards/rejected": -0.010070623829960823,
"step": 310
},
{
"epoch": 0.6701570680628273,
"grad_norm": 15.437707901000977,
"learning_rate": 1.4948791099758052e-07,
"log_odds_chosen": 0.2843065857887268,
"log_odds_ratio": -0.6758512258529663,
"logits/chosen": -0.5303715467453003,
"logits/rejected": -0.530095636844635,
"logps/chosen": -0.920501708984375,
"logps/rejected": -1.1022217273712158,
"loss": 5.0943,
"nll_loss": 1.2833216190338135,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.009205018170177937,
"rewards/margins": 0.00181719905231148,
"rewards/rejected": -0.011022215709090233,
"step": 320
},
{
"epoch": 0.6910994764397905,
"grad_norm": 15.229772567749023,
"learning_rate": 1.3300945667758012e-07,
"log_odds_chosen": 0.3193449079990387,
"log_odds_ratio": -0.6650776267051697,
"logits/chosen": -0.520767092704773,
"logits/rejected": -0.5018462538719177,
"logps/chosen": -0.9000827074050903,
"logps/rejected": -1.0957286357879639,
"loss": 5.1075,
"nll_loss": 1.2375710010528564,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.009000827558338642,
"rewards/margins": 0.001956457272171974,
"rewards/rejected": -0.010957283899188042,
"step": 330
},
{
"epoch": 0.7120418848167539,
"grad_norm": 12.496045112609863,
"learning_rate": 1.1715810961514072e-07,
"log_odds_chosen": 0.3133091330528259,
"log_odds_ratio": -0.6810437440872192,
"logits/chosen": -0.5330361127853394,
"logits/rejected": -0.5395983457565308,
"logps/chosen": -0.9298146963119507,
"logps/rejected": -1.1273194551467896,
"loss": 5.1375,
"nll_loss": 1.2518556118011475,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.00929814763367176,
"rewards/margins": 0.0019750469364225864,
"rewards/rejected": -0.011273193173110485,
"step": 340
},
{
"epoch": 0.7329842931937173,
"grad_norm": 12.056753158569336,
"learning_rate": 1.0201883817182949e-07,
"log_odds_chosen": 0.2855184078216553,
"log_odds_ratio": -0.6681550741195679,
"logits/chosen": -0.5056412220001221,
"logits/rejected": -0.5043959021568298,
"logps/chosen": -0.9035753011703491,
"logps/rejected": -1.097712516784668,
"loss": 5.0573,
"nll_loss": 1.2399346828460693,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.009035754017531872,
"rewards/margins": 0.0019413719419389963,
"rewards/rejected": -0.010977125726640224,
"step": 350
},
{
"epoch": 0.7539267015706806,
"grad_norm": 10.219555854797363,
"learning_rate": 8.76727937529367e-08,
"log_odds_chosen": 0.2616792321205139,
"log_odds_ratio": -0.6761180758476257,
"logits/chosen": -0.5021784901618958,
"logits/rejected": -0.5104162693023682,
"logps/chosen": -0.8949702978134155,
"logps/rejected": -1.065882682800293,
"loss": 5.0549,
"nll_loss": 1.2354400157928467,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.0089497035369277,
"rewards/margins": 0.0017091237241402268,
"rewards/rejected": -0.010658827610313892,
"step": 360
},
{
"epoch": 0.774869109947644,
"grad_norm": 9.729683876037598,
"learning_rate": 7.419687580962222e-08,
"log_odds_chosen": 0.3590267598628998,
"log_odds_ratio": -0.6520247459411621,
"logits/chosen": -0.528827428817749,
"logits/rejected": -0.5364798903465271,
"logps/chosen": -0.8705722689628601,
"logps/rejected": -1.088714838027954,
"loss": 5.0822,
"nll_loss": 1.28533935546875,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.00870572216808796,
"rewards/margins": 0.002181424992159009,
"rewards/rejected": -0.01088714599609375,
"step": 370
},
{
"epoch": 0.7958115183246073,
"grad_norm": 8.4192533493042,
"learning_rate": 6.166331963291519e-08,
"log_odds_chosen": 0.16738846898078918,
"log_odds_ratio": -0.7240005135536194,
"logits/chosen": -0.4927976131439209,
"logits/rejected": -0.48198264837265015,
"logps/chosen": -0.8879655599594116,
"logps/rejected": -1.0060501098632812,
"loss": 5.079,
"nll_loss": 1.2799310684204102,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.008879655972123146,
"rewards/margins": 0.0011808457784354687,
"rewards/rejected": -0.010060502216219902,
"step": 380
},
{
"epoch": 0.8167539267015707,
"grad_norm": 8.200337409973145,
"learning_rate": 5.013930914912476e-08,
"log_odds_chosen": 0.22408561408519745,
"log_odds_ratio": -0.6869050860404968,
"logits/chosen": -0.5221595764160156,
"logits/rejected": -0.5140419602394104,
"logps/chosen": -0.8862568140029907,
"logps/rejected": -1.033087134361267,
"loss": 4.9446,
"nll_loss": 1.249403715133667,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.008862568065524101,
"rewards/margins": 0.0014683037297800183,
"rewards/rejected": -0.010330872610211372,
"step": 390
},
{
"epoch": 0.837696335078534,
"grad_norm": 8.371641159057617,
"learning_rate": 3.968661679220467e-08,
"log_odds_chosen": 0.2015990912914276,
"log_odds_ratio": -0.7086056470870972,
"logits/chosen": -0.5080100893974304,
"logits/rejected": -0.5066760182380676,
"logps/chosen": -0.8951946496963501,
"logps/rejected": -1.034939169883728,
"loss": 4.9046,
"nll_loss": 1.2675601243972778,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.008951946161687374,
"rewards/margins": 0.0013974455650895834,
"rewards/rejected": -0.010349391028285027,
"step": 400
},
{
"epoch": 0.837696335078534,
"eval_log_odds_chosen": 0.2856575548648834,
"eval_log_odds_ratio": -0.6565335392951965,
"eval_logits/chosen": -0.5022188425064087,
"eval_logits/rejected": -0.48727092146873474,
"eval_logps/chosen": -0.8758360147476196,
"eval_logps/rejected": -1.060706615447998,
"eval_loss": 1.224434733390808,
"eval_nll_loss": 1.217404842376709,
"eval_rewards/accuracies": 0.6028226017951965,
"eval_rewards/chosen": -0.008758360520005226,
"eval_rewards/margins": 0.0018487058114260435,
"eval_rewards/rejected": -0.010607065632939339,
"eval_runtime": 44.2456,
"eval_samples_per_second": 45.202,
"eval_steps_per_second": 1.424,
"step": 400
},
{
"epoch": 0.8586387434554974,
"grad_norm": 7.9049577713012695,
"learning_rate": 3.036127238347164e-08,
"log_odds_chosen": 0.2682987153530121,
"log_odds_ratio": -0.6803867816925049,
"logits/chosen": -0.5115852355957031,
"logits/rejected": -0.4977447986602783,
"logps/chosen": -0.8711401224136353,
"logps/rejected": -1.0264394283294678,
"loss": 4.9707,
"nll_loss": 1.2559094429016113,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.00871140044182539,
"rewards/margins": 0.001552992849610746,
"rewards/rejected": -0.010264393873512745,
"step": 410
},
{
"epoch": 0.8795811518324608,
"grad_norm": 8.560924530029297,
"learning_rate": 2.2213262793589482e-08,
"log_odds_chosen": 0.3053427040576935,
"log_odds_ratio": -0.6574885845184326,
"logits/chosen": -0.4770349860191345,
"logits/rejected": -0.47626155614852905,
"logps/chosen": -0.8793102502822876,
"logps/rejected": -1.0621235370635986,
"loss": 4.9777,
"nll_loss": 1.1874935626983643,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.008793102577328682,
"rewards/margins": 0.0018281324300915003,
"rewards/rejected": -0.010621235705912113,
"step": 420
},
{
"epoch": 0.900523560209424,
"grad_norm": 7.4246320724487305,
"learning_rate": 1.5286263996730026e-08,
"log_odds_chosen": 0.2607780992984772,
"log_odds_ratio": -0.698715329170227,
"logits/chosen": -0.5007373094558716,
"logits/rejected": -0.5147266387939453,
"logps/chosen": -0.8995779156684875,
"logps/rejected": -1.0742470026016235,
"loss": 4.9582,
"nll_loss": 1.2430001497268677,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.008995778858661652,
"rewards/margins": 0.0017466908320784569,
"rewards/rejected": -0.010742468759417534,
"step": 430
},
{
"epoch": 0.9214659685863874,
"grad_norm": 7.529422283172607,
"learning_rate": 9.617406953185136e-09,
"log_odds_chosen": 0.341641902923584,
"log_odds_ratio": -0.6579716801643372,
"logits/chosen": -0.505264937877655,
"logits/rejected": -0.5192712545394897,
"logps/chosen": -0.8687723875045776,
"logps/rejected": -1.0708019733428955,
"loss": 5.1002,
"nll_loss": 1.2634742259979248,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.008687724359333515,
"rewards/margins": 0.0020202945452183485,
"rewards/rejected": -0.010708019137382507,
"step": 440
},
{
"epoch": 0.9424083769633508,
"grad_norm": 7.834778785705566,
"learning_rate": 5.2370785753763356e-09,
"log_odds_chosen": 0.22981591522693634,
"log_odds_ratio": -0.6861775517463684,
"logits/chosen": -0.5010178089141846,
"logits/rejected": -0.5019730925559998,
"logps/chosen": -0.9211832284927368,
"logps/rejected": -1.0682731866836548,
"loss": 4.9999,
"nll_loss": 1.2437247037887573,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.009211831726133823,
"rewards/margins": 0.0014708999078720808,
"rewards/rejected": -0.010682731866836548,
"step": 450
},
{
"epoch": 0.9633507853403142,
"grad_norm": 8.3234224319458,
"learning_rate": 2.168758844148272e-09,
"log_odds_chosen": 0.23462414741516113,
"log_odds_ratio": -0.6922942996025085,
"logits/chosen": -0.49100571870803833,
"logits/rejected": -0.49131718277931213,
"logps/chosen": -0.9402503967285156,
"logps/rejected": -1.0789129734039307,
"loss": 4.9825,
"nll_loss": 1.2720887660980225,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.009402502328157425,
"rewards/margins": 0.001386628020554781,
"rewards/rejected": -0.010789131745696068,
"step": 460
},
{
"epoch": 0.9842931937172775,
"grad_norm": 8.285514831542969,
"learning_rate": 4.288949484559934e-10,
"log_odds_chosen": 0.2851186990737915,
"log_odds_ratio": -0.6771829128265381,
"logits/chosen": -0.5137313008308411,
"logits/rejected": -0.5017358064651489,
"logps/chosen": -0.8824328184127808,
"logps/rejected": -1.0701076984405518,
"loss": 5.0037,
"nll_loss": 1.2345225811004639,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.008824328891932964,
"rewards/margins": 0.0018767483998090029,
"rewards/rejected": -0.010701077058911324,
"step": 470
},
{
"epoch": 0.9989528795811519,
"step": 477,
"total_flos": 0.0,
"train_loss": 5.24161350602124,
"train_runtime": 5082.9537,
"train_samples_per_second": 12.027,
"train_steps_per_second": 0.094
}
],
"logging_steps": 10,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}