Model: jackf857/llama-3-8b-base-orpo-ultrafeedback-4xh200-rerun Source: Original Platform
946 lines
34 KiB
JSON
946 lines
34 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9989528795811519,
|
|
"eval_steps": 200,
|
|
"global_step": 477,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0020942408376963353,
|
|
"grad_norm": 21.158132553100586,
|
|
"learning_rate": 0.0,
|
|
"log_odds_chosen": 0.06687486171722412,
|
|
"log_odds_ratio": -0.7179359793663025,
|
|
"logits/chosen": -0.6003108024597168,
|
|
"logits/rejected": -0.5959680080413818,
|
|
"logps/chosen": -1.3004443645477295,
|
|
"logps/rejected": -1.3789687156677246,
|
|
"loss": 6.0476,
|
|
"nll_loss": 1.4909255504608154,
|
|
"rewards/accuracies": 0.46875,
|
|
"rewards/chosen": -0.013004443608224392,
|
|
"rewards/margins": 0.0007852441049180925,
|
|
"rewards/rejected": -0.013789687305688858,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.020942408376963352,
|
|
"grad_norm": 22.246910095214844,
|
|
"learning_rate": 9.375e-08,
|
|
"log_odds_chosen": 0.24125711619853973,
|
|
"log_odds_ratio": -0.6763277053833008,
|
|
"logits/chosen": -0.680765688419342,
|
|
"logits/rejected": -0.6712806224822998,
|
|
"logps/chosen": -1.1191811561584473,
|
|
"logps/rejected": -1.29719877243042,
|
|
"loss": 6.0187,
|
|
"nll_loss": 1.5037554502487183,
|
|
"rewards/accuracies": 0.5763888955116272,
|
|
"rewards/chosen": -0.01119181141257286,
|
|
"rewards/margins": 0.0017801759531721473,
|
|
"rewards/rejected": -0.012971988879144192,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.041884816753926704,
|
|
"grad_norm": 21.124698638916016,
|
|
"learning_rate": 1.9791666666666664e-07,
|
|
"log_odds_chosen": 0.24139347672462463,
|
|
"log_odds_ratio": -0.6791239976882935,
|
|
"logits/chosen": -0.6397651433944702,
|
|
"logits/rejected": -0.6568408608436584,
|
|
"logps/chosen": -1.1540690660476685,
|
|
"logps/rejected": -1.3257322311401367,
|
|
"loss": 5.8777,
|
|
"nll_loss": 1.4614884853363037,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": -0.011540691368281841,
|
|
"rewards/margins": 0.0017166311154142022,
|
|
"rewards/rejected": -0.013257321901619434,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06282722513089005,
|
|
"grad_norm": 19.380123138427734,
|
|
"learning_rate": 3.020833333333333e-07,
|
|
"log_odds_chosen": 0.15998375415802002,
|
|
"log_odds_ratio": -0.715591549873352,
|
|
"logits/chosen": -0.6493812799453735,
|
|
"logits/rejected": -0.6421241164207458,
|
|
"logps/chosen": -1.0813921689987183,
|
|
"logps/rejected": -1.2045493125915527,
|
|
"loss": 5.9475,
|
|
"nll_loss": 1.485296607017517,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": -0.010813921689987183,
|
|
"rewards/margins": 0.0012315715430304408,
|
|
"rewards/rejected": -0.01204549241811037,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.08376963350785341,
|
|
"grad_norm": 16.27351188659668,
|
|
"learning_rate": 4.0625e-07,
|
|
"log_odds_chosen": 0.18812714517116547,
|
|
"log_odds_ratio": -0.7328466176986694,
|
|
"logits/chosen": -0.63264399766922,
|
|
"logits/rejected": -0.645917534828186,
|
|
"logps/chosen": -1.008213758468628,
|
|
"logps/rejected": -1.1435163021087646,
|
|
"loss": 5.7467,
|
|
"nll_loss": 1.3937628269195557,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": -0.010082137770950794,
|
|
"rewards/margins": 0.0013530252035707235,
|
|
"rewards/rejected": -0.011435163207352161,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.10471204188481675,
|
|
"grad_norm": 14.714133262634277,
|
|
"learning_rate": 4.999932966293553e-07,
|
|
"log_odds_chosen": 0.18715843558311462,
|
|
"log_odds_ratio": -0.7093220949172974,
|
|
"logits/chosen": -0.6608070135116577,
|
|
"logits/rejected": -0.6796087026596069,
|
|
"logps/chosen": -0.9801030158996582,
|
|
"logps/rejected": -1.10386061668396,
|
|
"loss": 5.7231,
|
|
"nll_loss": 1.4938442707061768,
|
|
"rewards/accuracies": 0.565625011920929,
|
|
"rewards/chosen": -0.009801030158996582,
|
|
"rewards/margins": 0.0012375751975923777,
|
|
"rewards/rejected": -0.011038605123758316,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.1256544502617801,
|
|
"grad_norm": 15.548501014709473,
|
|
"learning_rate": 4.991893270335525e-07,
|
|
"log_odds_chosen": 0.13172771036624908,
|
|
"log_odds_ratio": -0.7336539030075073,
|
|
"logits/chosen": -0.6106709241867065,
|
|
"logits/rejected": -0.620617687702179,
|
|
"logps/chosen": -1.0109319686889648,
|
|
"logps/rejected": -1.118139624595642,
|
|
"loss": 5.7263,
|
|
"nll_loss": 1.4132804870605469,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.010109319351613522,
|
|
"rewards/margins": 0.0010720762657001615,
|
|
"rewards/rejected": -0.011181396432220936,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.14659685863874344,
|
|
"grad_norm": 11.627691268920898,
|
|
"learning_rate": 4.970496218214204e-07,
|
|
"log_odds_chosen": 0.2610163390636444,
|
|
"log_odds_ratio": -0.6738774180412292,
|
|
"logits/chosen": -0.585883378982544,
|
|
"logits/rejected": -0.5932791829109192,
|
|
"logps/chosen": -0.9574364423751831,
|
|
"logps/rejected": -1.1436620950698853,
|
|
"loss": 5.5467,
|
|
"nll_loss": 1.4156994819641113,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -0.009574364870786667,
|
|
"rewards/margins": 0.0018622549250721931,
|
|
"rewards/rejected": -0.011436618864536285,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.16753926701570682,
|
|
"grad_norm": 10.594218254089355,
|
|
"learning_rate": 4.935856505068998e-07,
|
|
"log_odds_chosen": 0.2706550061702728,
|
|
"log_odds_ratio": -0.6398797035217285,
|
|
"logits/chosen": -0.5370240211486816,
|
|
"logits/rejected": -0.5456986427307129,
|
|
"logps/chosen": -0.9297056198120117,
|
|
"logps/rejected": -1.09745454788208,
|
|
"loss": 5.4389,
|
|
"nll_loss": 1.3115617036819458,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -0.009297055192291737,
|
|
"rewards/margins": 0.0016774894902482629,
|
|
"rewards/rejected": -0.010974545031785965,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.18848167539267016,
|
|
"grad_norm": 9.036905288696289,
|
|
"learning_rate": 4.8881598109976e-07,
|
|
"log_odds_chosen": 0.22219958901405334,
|
|
"log_odds_ratio": -0.678338348865509,
|
|
"logits/chosen": -0.5145021677017212,
|
|
"logits/rejected": -0.5317824482917786,
|
|
"logps/chosen": -0.9497036933898926,
|
|
"logps/rejected": -1.101233959197998,
|
|
"loss": 5.4486,
|
|
"nll_loss": 1.3097865581512451,
|
|
"rewards/accuracies": 0.565625011920929,
|
|
"rewards/chosen": -0.009497037157416344,
|
|
"rewards/margins": 0.0015153035055845976,
|
|
"rewards/rejected": -0.01101233996450901,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"grad_norm": 8.396464347839355,
|
|
"learning_rate": 4.827661805750437e-07,
|
|
"log_odds_chosen": 0.29178586602211,
|
|
"log_odds_ratio": -0.6671258211135864,
|
|
"logits/chosen": -0.4934987425804138,
|
|
"logits/rejected": -0.515052855014801,
|
|
"logps/chosen": -0.9019988179206848,
|
|
"logps/rejected": -1.0930007696151733,
|
|
"loss": 5.3924,
|
|
"nll_loss": 1.2699394226074219,
|
|
"rewards/accuracies": 0.6343749761581421,
|
|
"rewards/chosen": -0.009019988588988781,
|
|
"rewards/margins": 0.0019100181525573134,
|
|
"rewards/rejected": -0.010930007323622704,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23036649214659685,
|
|
"grad_norm": 8.834160804748535,
|
|
"learning_rate": 4.75468677825789e-07,
|
|
"log_odds_chosen": 0.37880703806877136,
|
|
"log_odds_ratio": -0.6369461417198181,
|
|
"logits/chosen": -0.4875544607639313,
|
|
"logits/rejected": -0.5104657411575317,
|
|
"logps/chosen": -0.8797906637191772,
|
|
"logps/rejected": -1.1241319179534912,
|
|
"loss": 5.3743,
|
|
"nll_loss": 1.3109803199768066,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -0.008797907270491123,
|
|
"rewards/margins": 0.002443410689011216,
|
|
"rewards/rejected": -0.01124131865799427,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.2513089005235602,
|
|
"grad_norm": 7.755215644836426,
|
|
"learning_rate": 4.669625898336438e-07,
|
|
"log_odds_chosen": 0.191465824842453,
|
|
"log_odds_ratio": -0.7049621343612671,
|
|
"logits/chosen": -0.5096135139465332,
|
|
"logits/rejected": -0.51964271068573,
|
|
"logps/chosen": -0.9437309503555298,
|
|
"logps/rejected": -1.0870112180709839,
|
|
"loss": 5.2767,
|
|
"nll_loss": 1.3031724691390991,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": -0.00943730864673853,
|
|
"rewards/margins": 0.001432802644558251,
|
|
"rewards/rejected": -0.010870112106204033,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.27225130890052357,
|
|
"grad_norm": 8.79680347442627,
|
|
"learning_rate": 4.5729351198915705e-07,
|
|
"log_odds_chosen": 0.10129977762699127,
|
|
"log_odds_ratio": -0.7444788813591003,
|
|
"logits/chosen": -0.5232862234115601,
|
|
"logits/rejected": -0.5057969093322754,
|
|
"logps/chosen": -0.9433780908584595,
|
|
"logps/rejected": -1.0080385208129883,
|
|
"loss": 5.3749,
|
|
"nll_loss": 1.321378469467163,
|
|
"rewards/accuracies": 0.5218750238418579,
|
|
"rewards/chosen": -0.009433778934180737,
|
|
"rewards/margins": 0.0006466054474003613,
|
|
"rewards/rejected": -0.010080385021865368,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.2931937172774869,
|
|
"grad_norm": 8.298604965209961,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"log_odds_chosen": 0.15155552327632904,
|
|
"log_odds_ratio": -0.7214217185974121,
|
|
"logits/chosen": -0.526405930519104,
|
|
"logits/rejected": -0.5193445086479187,
|
|
"logps/chosen": -0.9357401728630066,
|
|
"logps/rejected": -1.0443140268325806,
|
|
"loss": 5.3026,
|
|
"nll_loss": 1.3262240886688232,
|
|
"rewards/accuracies": 0.534375011920929,
|
|
"rewards/chosen": -0.009357400238513947,
|
|
"rewards/margins": 0.0010857387678697705,
|
|
"rewards/rejected": -0.010443138889968395,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.31413612565445026,
|
|
"grad_norm": 8.639591217041016,
|
|
"learning_rate": 4.346796604970912e-07,
|
|
"log_odds_chosen": 0.31407707929611206,
|
|
"log_odds_ratio": -0.6722251772880554,
|
|
"logits/chosen": -0.5174251198768616,
|
|
"logits/rejected": -0.5082064867019653,
|
|
"logps/chosen": -0.8792362213134766,
|
|
"logps/rejected": -1.0834535360336304,
|
|
"loss": 5.2925,
|
|
"nll_loss": 1.304610252380371,
|
|
"rewards/accuracies": 0.581250011920929,
|
|
"rewards/chosen": -0.008792361244559288,
|
|
"rewards/margins": 0.0020421738736331463,
|
|
"rewards/rejected": -0.010834535583853722,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.33507853403141363,
|
|
"grad_norm": 8.277928352355957,
|
|
"learning_rate": 4.218561044282098e-07,
|
|
"log_odds_chosen": 0.2669292092323303,
|
|
"log_odds_ratio": -0.6675515174865723,
|
|
"logits/chosen": -0.5230351686477661,
|
|
"logits/rejected": -0.5290777087211609,
|
|
"logps/chosen": -0.9324489831924438,
|
|
"logps/rejected": -1.1104564666748047,
|
|
"loss": 5.3537,
|
|
"nll_loss": 1.3643314838409424,
|
|
"rewards/accuracies": 0.5718749761581421,
|
|
"rewards/chosen": -0.009324489161372185,
|
|
"rewards/margins": 0.0017800761852413416,
|
|
"rewards/rejected": -0.011104565113782883,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.35602094240837695,
|
|
"grad_norm": 8.850061416625977,
|
|
"learning_rate": 4.081113438988443e-07,
|
|
"log_odds_chosen": 0.2744578719139099,
|
|
"log_odds_ratio": -0.6702545881271362,
|
|
"logits/chosen": -0.49922794103622437,
|
|
"logits/rejected": -0.5103174448013306,
|
|
"logps/chosen": -0.9047120809555054,
|
|
"logps/rejected": -1.0733238458633423,
|
|
"loss": 5.2741,
|
|
"nll_loss": 1.3267205953598022,
|
|
"rewards/accuracies": 0.559374988079071,
|
|
"rewards/chosen": -0.009047120809555054,
|
|
"rewards/margins": 0.0016861179610714316,
|
|
"rewards/rejected": -0.010733239352703094,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3769633507853403,
|
|
"grad_norm": 8.881376266479492,
|
|
"learning_rate": 3.935190552834828e-07,
|
|
"log_odds_chosen": 0.23824062943458557,
|
|
"log_odds_ratio": -0.6932843923568726,
|
|
"logits/chosen": -0.4861333966255188,
|
|
"logits/rejected": -0.5041022896766663,
|
|
"logps/chosen": -0.8888905644416809,
|
|
"logps/rejected": -1.0255122184753418,
|
|
"loss": 5.259,
|
|
"nll_loss": 1.3062529563903809,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": -0.008888904936611652,
|
|
"rewards/margins": 0.0013662164565175772,
|
|
"rewards/rejected": -0.010255122557282448,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.39790575916230364,
|
|
"grad_norm": 9.558187484741211,
|
|
"learning_rate": 3.781574579820464e-07,
|
|
"log_odds_chosen": 0.3100183606147766,
|
|
"log_odds_ratio": -0.6493682265281677,
|
|
"logits/chosen": -0.5123935341835022,
|
|
"logits/rejected": -0.5036609768867493,
|
|
"logps/chosen": -0.8575283885002136,
|
|
"logps/rejected": -1.0392427444458008,
|
|
"loss": 5.265,
|
|
"nll_loss": 1.317764163017273,
|
|
"rewards/accuracies": 0.5843750238418579,
|
|
"rewards/chosen": -0.008575284853577614,
|
|
"rewards/margins": 0.001817143289372325,
|
|
"rewards/rejected": -0.010392428375780582,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"grad_norm": 9.175705909729004,
|
|
"learning_rate": 3.621088951385353e-07,
|
|
"log_odds_chosen": 0.33279526233673096,
|
|
"log_odds_ratio": -0.6604114770889282,
|
|
"logits/chosen": -0.5108372569084167,
|
|
"logits/rejected": -0.5279550552368164,
|
|
"logps/chosen": -0.9044734835624695,
|
|
"logps/rejected": -1.117700219154358,
|
|
"loss": 5.2395,
|
|
"nll_loss": 1.313854455947876,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.009044734761118889,
|
|
"rewards/margins": 0.0021322660613805056,
|
|
"rewards/rejected": -0.01117700058966875,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"eval_log_odds_chosen": 0.28363677859306335,
|
|
"eval_log_odds_ratio": -0.6567826271057129,
|
|
"eval_logits/chosen": -0.5067907571792603,
|
|
"eval_logits/rejected": -0.493470162153244,
|
|
"eval_logps/chosen": -0.8826000690460205,
|
|
"eval_logps/rejected": -1.067192792892456,
|
|
"eval_loss": 1.264377236366272,
|
|
"eval_nll_loss": 1.2560975551605225,
|
|
"eval_rewards/accuracies": 0.600806474685669,
|
|
"eval_rewards/chosen": -0.008825999684631824,
|
|
"eval_rewards/margins": 0.0018459270941093564,
|
|
"eval_rewards/rejected": -0.010671926662325859,
|
|
"eval_runtime": 45.0179,
|
|
"eval_samples_per_second": 44.427,
|
|
"eval_steps_per_second": 1.399,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.4397905759162304,
|
|
"grad_norm": 10.455543518066406,
|
|
"learning_rate": 3.454593922550693e-07,
|
|
"log_odds_chosen": 0.26851850748062134,
|
|
"log_odds_ratio": -0.6904939413070679,
|
|
"logits/chosen": -0.4954306483268738,
|
|
"logits/rejected": -0.4978114068508148,
|
|
"logps/chosen": -0.8825132250785828,
|
|
"logps/rejected": -1.0579698085784912,
|
|
"loss": 5.316,
|
|
"nll_loss": 1.309258222579956,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -0.008825132623314857,
|
|
"rewards/margins": 0.0017545666778460145,
|
|
"rewards/rejected": -0.010579698719084263,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4607329842931937,
|
|
"grad_norm": 10.91032886505127,
|
|
"learning_rate": 3.2829819606729477e-07,
|
|
"log_odds_chosen": 0.25027215480804443,
|
|
"log_odds_ratio": -0.6841479539871216,
|
|
"logits/chosen": -0.5188068151473999,
|
|
"logits/rejected": -0.504177451133728,
|
|
"logps/chosen": -0.9375869035720825,
|
|
"logps/rejected": -1.0952080488204956,
|
|
"loss": 5.2055,
|
|
"nll_loss": 1.3260114192962646,
|
|
"rewards/accuracies": 0.565625011920929,
|
|
"rewards/chosen": -0.009375869296491146,
|
|
"rewards/margins": 0.0015762097900733352,
|
|
"rewards/rejected": -0.01095207966864109,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.4816753926701571,
|
|
"grad_norm": 13.497580528259277,
|
|
"learning_rate": 3.1071729615293424e-07,
|
|
"log_odds_chosen": 0.18811996281147003,
|
|
"log_odds_ratio": -0.7035666704177856,
|
|
"logits/chosen": -0.5260552763938904,
|
|
"logits/rejected": -0.510788083076477,
|
|
"logps/chosen": -0.9514686465263367,
|
|
"logps/rejected": -1.060083031654358,
|
|
"loss": 5.1571,
|
|
"nll_loss": 1.2953994274139404,
|
|
"rewards/accuracies": 0.5375000238418579,
|
|
"rewards/chosen": -0.009514686651527882,
|
|
"rewards/margins": 0.0010861435439437628,
|
|
"rewards/rejected": -0.010600829496979713,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.5026178010471204,
|
|
"grad_norm": 11.768831253051758,
|
|
"learning_rate": 2.9281093183781403e-07,
|
|
"log_odds_chosen": 0.32460492849349976,
|
|
"log_odds_ratio": -0.6687750816345215,
|
|
"logits/chosen": -0.46817174553871155,
|
|
"logits/rejected": -0.4911385476589203,
|
|
"logps/chosen": -0.8718591928482056,
|
|
"logps/rejected": -1.053980827331543,
|
|
"loss": 5.1916,
|
|
"nll_loss": 1.2509262561798096,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.00871859211474657,
|
|
"rewards/margins": 0.001821216195821762,
|
|
"rewards/rejected": -0.010539808310568333,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5235602094240838,
|
|
"grad_norm": 13.659006118774414,
|
|
"learning_rate": 2.7467508704251135e-07,
|
|
"log_odds_chosen": 0.2745344638824463,
|
|
"log_odds_ratio": -0.6745666861534119,
|
|
"logits/chosen": -0.5158644914627075,
|
|
"logits/rejected": -0.5093538165092468,
|
|
"logps/chosen": -0.8866285085678101,
|
|
"logps/rejected": -1.0590866804122925,
|
|
"loss": 5.1928,
|
|
"nll_loss": 1.305888295173645,
|
|
"rewards/accuracies": 0.5531250238418579,
|
|
"rewards/chosen": -0.008866284973919392,
|
|
"rewards/margins": 0.00172458216547966,
|
|
"rewards/rejected": -0.010590866208076477,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5445026178010471,
|
|
"grad_norm": 14.492527961730957,
|
|
"learning_rate": 2.5640697577740815e-07,
|
|
"log_odds_chosen": 0.26566964387893677,
|
|
"log_odds_ratio": -0.6708006858825684,
|
|
"logits/chosen": -0.5190873146057129,
|
|
"logits/rejected": -0.5259062051773071,
|
|
"logps/chosen": -0.8882888555526733,
|
|
"logps/rejected": -1.0456405878067017,
|
|
"loss": 5.1412,
|
|
"nll_loss": 1.265944004058838,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -0.008882888592779636,
|
|
"rewards/margins": 0.0015735173365101218,
|
|
"rewards/rejected": -0.010456404648721218,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5654450261780105,
|
|
"grad_norm": 15.814299583435059,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"log_odds_chosen": 0.23130980134010315,
|
|
"log_odds_ratio": -0.7191926836967468,
|
|
"logits/chosen": -0.5003880262374878,
|
|
"logits/rejected": -0.48836550116539,
|
|
"logps/chosen": -0.8919955492019653,
|
|
"logps/rejected": -1.045290470123291,
|
|
"loss": 5.026,
|
|
"nll_loss": 1.2595463991165161,
|
|
"rewards/accuracies": 0.559374988079071,
|
|
"rewards/chosen": -0.008919955231249332,
|
|
"rewards/margins": 0.0015329491579905152,
|
|
"rewards/rejected": -0.010452903807163239,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5863874345549738,
|
|
"grad_norm": 16.75997543334961,
|
|
"learning_rate": 2.1986582993616925e-07,
|
|
"log_odds_chosen": 0.3688739538192749,
|
|
"log_odds_ratio": -0.6330865621566772,
|
|
"logits/chosen": -0.5016785860061646,
|
|
"logits/rejected": -0.5119304656982422,
|
|
"logps/chosen": -0.8641098141670227,
|
|
"logps/rejected": -1.1013071537017822,
|
|
"loss": 5.0293,
|
|
"nll_loss": 1.2116810083389282,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -0.008641098625957966,
|
|
"rewards/margins": 0.002371972892433405,
|
|
"rewards/rejected": -0.011013071052730083,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.6073298429319371,
|
|
"grad_norm": 17.08208465576172,
|
|
"learning_rate": 2.0178866775369774e-07,
|
|
"log_odds_chosen": 0.2176806479692459,
|
|
"log_odds_ratio": -0.7151871919631958,
|
|
"logits/chosen": -0.49786868691444397,
|
|
"logits/rejected": -0.5026464462280273,
|
|
"logps/chosen": -0.9162432551383972,
|
|
"logps/rejected": -1.068025827407837,
|
|
"loss": 5.0736,
|
|
"nll_loss": 1.2801578044891357,
|
|
"rewards/accuracies": 0.5531250238418579,
|
|
"rewards/chosen": -0.00916243251413107,
|
|
"rewards/margins": 0.001517825061455369,
|
|
"rewards/rejected": -0.01068025641143322,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"grad_norm": 19.949918746948242,
|
|
"learning_rate": 1.839699339491937e-07,
|
|
"log_odds_chosen": 0.18358822166919708,
|
|
"log_odds_ratio": -0.6940725445747375,
|
|
"logits/chosen": -0.5160781145095825,
|
|
"logits/rejected": -0.5093048810958862,
|
|
"logps/chosen": -0.9349120855331421,
|
|
"logps/rejected": -1.053973913192749,
|
|
"loss": 4.9283,
|
|
"nll_loss": 1.2697252035140991,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.00934912171214819,
|
|
"rewards/margins": 0.0011906183790415525,
|
|
"rewards/rejected": -0.010539740324020386,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6492146596858639,
|
|
"grad_norm": 18.484830856323242,
|
|
"learning_rate": 1.6650514271527465e-07,
|
|
"log_odds_chosen": 0.20549292862415314,
|
|
"log_odds_ratio": -0.7036994695663452,
|
|
"logits/chosen": -0.5009588599205017,
|
|
"logits/rejected": -0.48482465744018555,
|
|
"logps/chosen": -0.8928836584091187,
|
|
"logps/rejected": -1.007062554359436,
|
|
"loss": 4.9485,
|
|
"nll_loss": 1.2219517230987549,
|
|
"rewards/accuracies": 0.5531250238418579,
|
|
"rewards/chosen": -0.00892883725464344,
|
|
"rewards/margins": 0.0011417880887165666,
|
|
"rewards/rejected": -0.010070623829960823,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6701570680628273,
|
|
"grad_norm": 15.437707901000977,
|
|
"learning_rate": 1.4948791099758052e-07,
|
|
"log_odds_chosen": 0.2843065857887268,
|
|
"log_odds_ratio": -0.6758512258529663,
|
|
"logits/chosen": -0.5303715467453003,
|
|
"logits/rejected": -0.530095636844635,
|
|
"logps/chosen": -0.920501708984375,
|
|
"logps/rejected": -1.1022217273712158,
|
|
"loss": 5.0943,
|
|
"nll_loss": 1.2833216190338135,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.009205018170177937,
|
|
"rewards/margins": 0.00181719905231148,
|
|
"rewards/rejected": -0.011022215709090233,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6910994764397905,
|
|
"grad_norm": 15.229772567749023,
|
|
"learning_rate": 1.3300945667758012e-07,
|
|
"log_odds_chosen": 0.3193449079990387,
|
|
"log_odds_ratio": -0.6650776267051697,
|
|
"logits/chosen": -0.520767092704773,
|
|
"logits/rejected": -0.5018462538719177,
|
|
"logps/chosen": -0.9000827074050903,
|
|
"logps/rejected": -1.0957286357879639,
|
|
"loss": 5.1075,
|
|
"nll_loss": 1.2375710010528564,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.009000827558338642,
|
|
"rewards/margins": 0.001956457272171974,
|
|
"rewards/rejected": -0.010957283899188042,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.7120418848167539,
|
|
"grad_norm": 12.496045112609863,
|
|
"learning_rate": 1.1715810961514072e-07,
|
|
"log_odds_chosen": 0.3133091330528259,
|
|
"log_odds_ratio": -0.6810437440872192,
|
|
"logits/chosen": -0.5330361127853394,
|
|
"logits/rejected": -0.5395983457565308,
|
|
"logps/chosen": -0.9298146963119507,
|
|
"logps/rejected": -1.1273194551467896,
|
|
"loss": 5.1375,
|
|
"nll_loss": 1.2518556118011475,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -0.00929814763367176,
|
|
"rewards/margins": 0.0019750469364225864,
|
|
"rewards/rejected": -0.011273193173110485,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7329842931937173,
|
|
"grad_norm": 12.056753158569336,
|
|
"learning_rate": 1.0201883817182949e-07,
|
|
"log_odds_chosen": 0.2855184078216553,
|
|
"log_odds_ratio": -0.6681550741195679,
|
|
"logits/chosen": -0.5056412220001221,
|
|
"logits/rejected": -0.5043959021568298,
|
|
"logps/chosen": -0.9035753011703491,
|
|
"logps/rejected": -1.097712516784668,
|
|
"loss": 5.0573,
|
|
"nll_loss": 1.2399346828460693,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -0.009035754017531872,
|
|
"rewards/margins": 0.0019413719419389963,
|
|
"rewards/rejected": -0.010977125726640224,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7539267015706806,
|
|
"grad_norm": 10.219555854797363,
|
|
"learning_rate": 8.76727937529367e-08,
|
|
"log_odds_chosen": 0.2616792321205139,
|
|
"log_odds_ratio": -0.6761180758476257,
|
|
"logits/chosen": -0.5021784901618958,
|
|
"logits/rejected": -0.5104162693023682,
|
|
"logps/chosen": -0.8949702978134155,
|
|
"logps/rejected": -1.065882682800293,
|
|
"loss": 5.0549,
|
|
"nll_loss": 1.2354400157928467,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -0.0089497035369277,
|
|
"rewards/margins": 0.0017091237241402268,
|
|
"rewards/rejected": -0.010658827610313892,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.774869109947644,
|
|
"grad_norm": 9.729683876037598,
|
|
"learning_rate": 7.419687580962222e-08,
|
|
"log_odds_chosen": 0.3590267598628998,
|
|
"log_odds_ratio": -0.6520247459411621,
|
|
"logits/chosen": -0.528827428817749,
|
|
"logits/rejected": -0.5364798903465271,
|
|
"logps/chosen": -0.8705722689628601,
|
|
"logps/rejected": -1.088714838027954,
|
|
"loss": 5.0822,
|
|
"nll_loss": 1.28533935546875,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.00870572216808796,
|
|
"rewards/margins": 0.002181424992159009,
|
|
"rewards/rejected": -0.01088714599609375,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7958115183246073,
|
|
"grad_norm": 8.4192533493042,
|
|
"learning_rate": 6.166331963291519e-08,
|
|
"log_odds_chosen": 0.16738846898078918,
|
|
"log_odds_ratio": -0.7240005135536194,
|
|
"logits/chosen": -0.4927976131439209,
|
|
"logits/rejected": -0.48198264837265015,
|
|
"logps/chosen": -0.8879655599594116,
|
|
"logps/rejected": -1.0060501098632812,
|
|
"loss": 5.079,
|
|
"nll_loss": 1.2799310684204102,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": -0.008879655972123146,
|
|
"rewards/margins": 0.0011808457784354687,
|
|
"rewards/rejected": -0.010060502216219902,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.8167539267015707,
|
|
"grad_norm": 8.200337409973145,
|
|
"learning_rate": 5.013930914912476e-08,
|
|
"log_odds_chosen": 0.22408561408519745,
|
|
"log_odds_ratio": -0.6869050860404968,
|
|
"logits/chosen": -0.5221595764160156,
|
|
"logits/rejected": -0.5140419602394104,
|
|
"logps/chosen": -0.8862568140029907,
|
|
"logps/rejected": -1.033087134361267,
|
|
"loss": 4.9446,
|
|
"nll_loss": 1.249403715133667,
|
|
"rewards/accuracies": 0.565625011920929,
|
|
"rewards/chosen": -0.008862568065524101,
|
|
"rewards/margins": 0.0014683037297800183,
|
|
"rewards/rejected": -0.010330872610211372,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"grad_norm": 8.371641159057617,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"log_odds_chosen": 0.2015990912914276,
|
|
"log_odds_ratio": -0.7086056470870972,
|
|
"logits/chosen": -0.5080100893974304,
|
|
"logits/rejected": -0.5066760182380676,
|
|
"logps/chosen": -0.8951946496963501,
|
|
"logps/rejected": -1.034939169883728,
|
|
"loss": 4.9046,
|
|
"nll_loss": 1.2675601243972778,
|
|
"rewards/accuracies": 0.546875,
|
|
"rewards/chosen": -0.008951946161687374,
|
|
"rewards/margins": 0.0013974455650895834,
|
|
"rewards/rejected": -0.010349391028285027,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"eval_log_odds_chosen": 0.2856575548648834,
|
|
"eval_log_odds_ratio": -0.6565335392951965,
|
|
"eval_logits/chosen": -0.5022188425064087,
|
|
"eval_logits/rejected": -0.48727092146873474,
|
|
"eval_logps/chosen": -0.8758360147476196,
|
|
"eval_logps/rejected": -1.060706615447998,
|
|
"eval_loss": 1.224434733390808,
|
|
"eval_nll_loss": 1.217404842376709,
|
|
"eval_rewards/accuracies": 0.6028226017951965,
|
|
"eval_rewards/chosen": -0.008758360520005226,
|
|
"eval_rewards/margins": 0.0018487058114260435,
|
|
"eval_rewards/rejected": -0.010607065632939339,
|
|
"eval_runtime": 44.2456,
|
|
"eval_samples_per_second": 45.202,
|
|
"eval_steps_per_second": 1.424,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.8586387434554974,
|
|
"grad_norm": 7.9049577713012695,
|
|
"learning_rate": 3.036127238347164e-08,
|
|
"log_odds_chosen": 0.2682987153530121,
|
|
"log_odds_ratio": -0.6803867816925049,
|
|
"logits/chosen": -0.5115852355957031,
|
|
"logits/rejected": -0.4977447986602783,
|
|
"logps/chosen": -0.8711401224136353,
|
|
"logps/rejected": -1.0264394283294678,
|
|
"loss": 4.9707,
|
|
"nll_loss": 1.2559094429016113,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": -0.00871140044182539,
|
|
"rewards/margins": 0.001552992849610746,
|
|
"rewards/rejected": -0.010264393873512745,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8795811518324608,
|
|
"grad_norm": 8.560924530029297,
|
|
"learning_rate": 2.2213262793589482e-08,
|
|
"log_odds_chosen": 0.3053427040576935,
|
|
"log_odds_ratio": -0.6574885845184326,
|
|
"logits/chosen": -0.4770349860191345,
|
|
"logits/rejected": -0.47626155614852905,
|
|
"logps/chosen": -0.8793102502822876,
|
|
"logps/rejected": -1.0621235370635986,
|
|
"loss": 4.9777,
|
|
"nll_loss": 1.1874935626983643,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -0.008793102577328682,
|
|
"rewards/margins": 0.0018281324300915003,
|
|
"rewards/rejected": -0.010621235705912113,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.900523560209424,
|
|
"grad_norm": 7.4246320724487305,
|
|
"learning_rate": 1.5286263996730026e-08,
|
|
"log_odds_chosen": 0.2607780992984772,
|
|
"log_odds_ratio": -0.698715329170227,
|
|
"logits/chosen": -0.5007373094558716,
|
|
"logits/rejected": -0.5147266387939453,
|
|
"logps/chosen": -0.8995779156684875,
|
|
"logps/rejected": -1.0742470026016235,
|
|
"loss": 4.9582,
|
|
"nll_loss": 1.2430001497268677,
|
|
"rewards/accuracies": 0.543749988079071,
|
|
"rewards/chosen": -0.008995778858661652,
|
|
"rewards/margins": 0.0017466908320784569,
|
|
"rewards/rejected": -0.010742468759417534,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.9214659685863874,
|
|
"grad_norm": 7.529422283172607,
|
|
"learning_rate": 9.617406953185136e-09,
|
|
"log_odds_chosen": 0.341641902923584,
|
|
"log_odds_ratio": -0.6579716801643372,
|
|
"logits/chosen": -0.505264937877655,
|
|
"logits/rejected": -0.5192712545394897,
|
|
"logps/chosen": -0.8687723875045776,
|
|
"logps/rejected": -1.0708019733428955,
|
|
"loss": 5.1002,
|
|
"nll_loss": 1.2634742259979248,
|
|
"rewards/accuracies": 0.574999988079071,
|
|
"rewards/chosen": -0.008687724359333515,
|
|
"rewards/margins": 0.0020202945452183485,
|
|
"rewards/rejected": -0.010708019137382507,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.9424083769633508,
|
|
"grad_norm": 7.834778785705566,
|
|
"learning_rate": 5.2370785753763356e-09,
|
|
"log_odds_chosen": 0.22981591522693634,
|
|
"log_odds_ratio": -0.6861775517463684,
|
|
"logits/chosen": -0.5010178089141846,
|
|
"logits/rejected": -0.5019730925559998,
|
|
"logps/chosen": -0.9211832284927368,
|
|
"logps/rejected": -1.0682731866836548,
|
|
"loss": 4.9999,
|
|
"nll_loss": 1.2437247037887573,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.009211831726133823,
|
|
"rewards/margins": 0.0014708999078720808,
|
|
"rewards/rejected": -0.010682731866836548,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9633507853403142,
|
|
"grad_norm": 8.3234224319458,
|
|
"learning_rate": 2.168758844148272e-09,
|
|
"log_odds_chosen": 0.23462414741516113,
|
|
"log_odds_ratio": -0.6922942996025085,
|
|
"logits/chosen": -0.49100571870803833,
|
|
"logits/rejected": -0.49131718277931213,
|
|
"logps/chosen": -0.9402503967285156,
|
|
"logps/rejected": -1.0789129734039307,
|
|
"loss": 4.9825,
|
|
"nll_loss": 1.2720887660980225,
|
|
"rewards/accuracies": 0.5531250238418579,
|
|
"rewards/chosen": -0.009402502328157425,
|
|
"rewards/margins": 0.001386628020554781,
|
|
"rewards/rejected": -0.010789131745696068,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.9842931937172775,
|
|
"grad_norm": 8.285514831542969,
|
|
"learning_rate": 4.288949484559934e-10,
|
|
"log_odds_chosen": 0.2851186990737915,
|
|
"log_odds_ratio": -0.6771829128265381,
|
|
"logits/chosen": -0.5137313008308411,
|
|
"logits/rejected": -0.5017358064651489,
|
|
"logps/chosen": -0.8824328184127808,
|
|
"logps/rejected": -1.0701076984405518,
|
|
"loss": 5.0037,
|
|
"nll_loss": 1.2345225811004639,
|
|
"rewards/accuracies": 0.565625011920929,
|
|
"rewards/chosen": -0.008824328891932964,
|
|
"rewards/margins": 0.0018767483998090029,
|
|
"rewards/rejected": -0.010701077058911324,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9989528795811519,
|
|
"step": 477,
|
|
"total_flos": 0.0,
|
|
"train_loss": 5.24161350602124,
|
|
"train_runtime": 5082.9537,
|
|
"train_samples_per_second": 12.027,
|
|
"train_steps_per_second": 0.094
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 477,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|