1484 lines
46 KiB
JSON
1484 lines
46 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 200,
|
|
"global_step": 955,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0010471204188481676,
|
|
"grad_norm": 26.562393188476562,
|
|
"kl": 0.03359118103981018,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 1025103936.0,
|
|
"logits/rejected": 1063107072.0,
|
|
"logps/chosen": -192.39359907670453,
|
|
"logps/rejected": -244.60918598790323,
|
|
"loss": 1.9877,
|
|
"rewards/chosen": -0.0024960009437618833,
|
|
"rewards/margins": -0.003168148462927353,
|
|
"rewards/rejected": 0.0006721475191654697,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.010471204188481676,
|
|
"grad_norm": 28.173646926879883,
|
|
"kl": 0.047210514545440674,
|
|
"learning_rate": 4.6875e-08,
|
|
"logits/chosen": 1111284224.0,
|
|
"logits/rejected": 1036883072.0,
|
|
"logps/chosen": -273.04239220563846,
|
|
"logps/rejected": -251.92859175774134,
|
|
"loss": 1.9821,
|
|
"rewards/chosen": -0.003001420849789039,
|
|
"rewards/margins": -0.003218886387716663,
|
|
"rewards/rejected": 0.0002174655379276241,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.020942408376963352,
|
|
"grad_norm": 28.309864044189453,
|
|
"kl": 0.06180702522397041,
|
|
"learning_rate": 9.895833333333332e-08,
|
|
"logits/chosen": 1098295552.0,
|
|
"logits/rejected": 1058195840.0,
|
|
"logps/chosen": -271.31404378742513,
|
|
"logps/rejected": -259.71006944444446,
|
|
"loss": 1.9823,
|
|
"rewards/chosen": -0.003387315544539583,
|
|
"rewards/margins": -0.0018305458493822638,
|
|
"rewards/rejected": -0.001556769695157319,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.031413612565445025,
|
|
"grad_norm": 25.917030334472656,
|
|
"kl": 0.017666548490524292,
|
|
"learning_rate": 1.5104166666666664e-07,
|
|
"logits/chosen": 1014291776.0,
|
|
"logits/rejected": 938894528.0,
|
|
"logps/chosen": -286.8935106981982,
|
|
"logps/rejected": -240.47707145765472,
|
|
"loss": 1.9792,
|
|
"rewards/chosen": -0.009706638238809488,
|
|
"rewards/margins": 0.0046446070076055414,
|
|
"rewards/rejected": -0.01435124524641503,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.041884816753926704,
|
|
"grad_norm": 29.217172622680664,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.03125e-07,
|
|
"logits/chosen": 1001239232.0,
|
|
"logits/rejected": 1008372032.0,
|
|
"logps/chosen": -301.35429748822605,
|
|
"logps/rejected": -276.2920392690513,
|
|
"loss": 1.9909,
|
|
"rewards/chosen": -0.04018252386215904,
|
|
"rewards/margins": 0.004611896940786439,
|
|
"rewards/rejected": -0.04479442080294548,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.05235602094240838,
|
|
"grad_norm": 31.018491744995117,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.552083333333333e-07,
|
|
"logits/chosen": 1010273408.0,
|
|
"logits/rejected": 899531584.0,
|
|
"logps/chosen": -303.4438942307692,
|
|
"logps/rejected": -267.7524305555556,
|
|
"loss": 1.9613,
|
|
"rewards/chosen": -0.1181301762507512,
|
|
"rewards/margins": 0.01815716176388174,
|
|
"rewards/rejected": -0.13628733801463294,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.06282722513089005,
|
|
"grad_norm": 28.290821075439453,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.0729166666666665e-07,
|
|
"logits/chosen": 1054024704.0,
|
|
"logits/rejected": 1066224448.0,
|
|
"logps/chosen": -294.36489550473186,
|
|
"logps/rejected": -273.374661377709,
|
|
"loss": 1.9469,
|
|
"rewards/chosen": -0.2557775455318415,
|
|
"rewards/margins": 0.006258757347657351,
|
|
"rewards/rejected": -0.26203630287949886,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.07329842931937172,
|
|
"grad_norm": 33.95917892456055,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.59375e-07,
|
|
"logits/chosen": 942386816.0,
|
|
"logits/rejected": 1006898688.0,
|
|
"logps/chosen": -281.92285470257235,
|
|
"logps/rejected": -277.8022416413374,
|
|
"loss": 1.8777,
|
|
"rewards/chosen": -0.5341391609412681,
|
|
"rewards/margins": 0.026281655126598524,
|
|
"rewards/rejected": -0.5604208160678666,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.08376963350785341,
|
|
"grad_norm": 30.24815559387207,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.114583333333333e-07,
|
|
"logits/chosen": 1031609536.0,
|
|
"logits/rejected": 974328320.0,
|
|
"logps/chosen": -310.20034934915776,
|
|
"logps/rejected": -262.0218301435407,
|
|
"loss": 1.8281,
|
|
"rewards/chosen": -0.771320757785581,
|
|
"rewards/margins": 0.06435381405328466,
|
|
"rewards/rejected": -0.8356745718388656,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.09424083769633508,
|
|
"grad_norm": 16.658273696899414,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.6354166666666664e-07,
|
|
"logits/chosen": 902042816.0,
|
|
"logits/rejected": 935183872.0,
|
|
"logps/chosen": -264.3142224409449,
|
|
"logps/rejected": -268.2765988372093,
|
|
"loss": 1.7369,
|
|
"rewards/chosen": -1.3310041292445867,
|
|
"rewards/margins": 0.07459388280725432,
|
|
"rewards/rejected": -1.405598012051841,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.10471204188481675,
|
|
"grad_norm": 12.389727592468262,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.999849525959245e-07,
|
|
"logits/chosen": 959631872.0,
|
|
"logits/rejected": 1010654720.0,
|
|
"logps/chosen": -312.21802805280527,
|
|
"logps/rejected": -273.6338788946588,
|
|
"loss": 1.6219,
|
|
"rewards/chosen": -1.7981488822710396,
|
|
"rewards/margins": 0.140241108108226,
|
|
"rewards/rejected": -1.9383899903792656,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.11518324607329843,
|
|
"grad_norm": 7.393283367156982,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.997174935782199e-07,
|
|
"logits/chosen": 880968384.0,
|
|
"logits/rejected": 880545600.0,
|
|
"logps/chosen": -305.7040970062208,
|
|
"logps/rejected": -269.9880543563579,
|
|
"loss": 1.6532,
|
|
"rewards/chosen": -2.356570989866835,
|
|
"rewards/margins": 0.14298288817476612,
|
|
"rewards/rejected": -2.4995538780416013,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.1256544502617801,
|
|
"grad_norm": 4.027650356292725,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.9911605954668e-07,
|
|
"logits/chosen": 906821824.0,
|
|
"logits/rejected": 1005957952.0,
|
|
"logps/chosen": -298.43577188940094,
|
|
"logps/rejected": -318.1883942766296,
|
|
"loss": 1.6413,
|
|
"rewards/chosen": -3.1073929981518815,
|
|
"rewards/margins": 0.22084922039939014,
|
|
"rewards/rejected": -3.3282422185512717,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.13612565445026178,
|
|
"grad_norm": 2.4550304412841797,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.981814548660135e-07,
|
|
"logits/chosen": 905374080.0,
|
|
"logits/rejected": 1031035072.0,
|
|
"logps/chosen": -317.7083851575456,
|
|
"logps/rejected": -292.74079117429835,
|
|
"loss": 1.518,
|
|
"rewards/chosen": -3.76897818848466,
|
|
"rewards/margins": 0.27336776851774003,
|
|
"rewards/rejected": -4.0423459570024,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.14659685863874344,
|
|
"grad_norm": 1.2676219940185547,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.969149294871417e-07,
|
|
"logits/chosen": 896367616.0,
|
|
"logits/rejected": 964537536.0,
|
|
"logps/chosen": -305.43130990415335,
|
|
"logps/rejected": -322.4831804281346,
|
|
"loss": 1.5691,
|
|
"rewards/chosen": -4.445092149435903,
|
|
"rewards/margins": 0.32064222855052726,
|
|
"rewards/rejected": -4.76573437798643,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.15706806282722513,
|
|
"grad_norm": 1.0475122928619385,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.953181772754997e-07,
|
|
"logits/chosen": 898162304.0,
|
|
"logits/rejected": 908483776.0,
|
|
"logps/chosen": -313.5234494274809,
|
|
"logps/rejected": -306.161225,
|
|
"loss": 1.636,
|
|
"rewards/chosen": -4.89772967855439,
|
|
"rewards/margins": 0.43406836832061035,
|
|
"rewards/rejected": -5.331798046875,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.16753926701570682,
|
|
"grad_norm": 1.9686732292175293,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.93393333745642e-07,
|
|
"logits/chosen": 860213888.0,
|
|
"logits/rejected": 877230592.0,
|
|
"logps/chosen": -309.0058340097403,
|
|
"logps/rejected": -306.6567676957831,
|
|
"loss": 1.5387,
|
|
"rewards/chosen": -4.939952503551137,
|
|
"rewards/margins": 0.4787038111608366,
|
|
"rewards/rejected": -5.418656314711973,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.17801047120418848,
|
|
"grad_norm": 3.1122231483459473,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.9114297320518e-07,
|
|
"logits/chosen": 974107328.0,
|
|
"logits/rejected": 970027520.0,
|
|
"logps/chosen": -326.03355211598745,
|
|
"logps/rejected": -319.89719626168227,
|
|
"loss": 1.589,
|
|
"rewards/chosen": -4.719719291854428,
|
|
"rewards/margins": 0.561926200746818,
|
|
"rewards/rejected": -5.281645492601246,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.18848167539267016,
|
|
"grad_norm": 2.5136497020721436,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.885701053118751e-07,
|
|
"logits/chosen": 924908736.0,
|
|
"logits/rejected": 961720640.0,
|
|
"logps/chosen": -318.30974690880987,
|
|
"logps/rejected": -317.9285643759874,
|
|
"loss": 1.6112,
|
|
"rewards/chosen": -4.51682306257245,
|
|
"rewards/margins": 0.5941317162585138,
|
|
"rewards/rejected": -5.110954778830964,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.19895287958115182,
|
|
"grad_norm": 7.213411331176758,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.856781710484872e-07,
|
|
"logits/chosen": 928431808.0,
|
|
"logits/rejected": 952974528.0,
|
|
"logps/chosen": -314.60199860446573,
|
|
"logps/rejected": -322.4887299004594,
|
|
"loss": 1.5518,
|
|
"rewards/chosen": -4.191326427307616,
|
|
"rewards/margins": 0.6273537327708683,
|
|
"rewards/rejected": -4.818680160078484,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"grad_norm": 6.813438892364502,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.824710381207655e-07,
|
|
"logits/chosen": 918448448.0,
|
|
"logits/rejected": 1015918208.0,
|
|
"logps/chosen": -321.83199356913184,
|
|
"logps/rejected": -311.08847834346506,
|
|
"loss": 1.5375,
|
|
"rewards/chosen": -3.782886762711013,
|
|
"rewards/margins": 0.6459696202677101,
|
|
"rewards/rejected": -4.428856382978723,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2198952879581152,
|
|
"grad_norm": 8.781408309936523,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.789529957847353e-07,
|
|
"logits/chosen": 1004952576.0,
|
|
"logits/rejected": 891462464.0,
|
|
"logps/chosen": -323.2924141221374,
|
|
"logps/rejected": -297.6743,
|
|
"loss": 1.5864,
|
|
"rewards/chosen": -3.252991561307252,
|
|
"rewards/margins": 0.762442423067748,
|
|
"rewards/rejected": -4.015433984375,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.23036649214659685,
|
|
"grad_norm": 13.12763500213623,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.751287491101977e-07,
|
|
"logits/chosen": 946857856.0,
|
|
"logits/rejected": 880448896.0,
|
|
"logps/chosen": -304.32392760093165,
|
|
"logps/rejected": -287.22454795597486,
|
|
"loss": 1.5538,
|
|
"rewards/chosen": -2.888836025451281,
|
|
"rewards/margins": 0.7221485368767455,
|
|
"rewards/rejected": -3.6109845623280266,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.24083769633507854,
|
|
"grad_norm": 11.393705368041992,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.710034126881159e-07,
|
|
"logits/chosen": 1049158464.0,
|
|
"logits/rejected": 824376000.0,
|
|
"logps/chosen": -318.3151382823872,
|
|
"logps/rejected": -307.4688817453626,
|
|
"loss": 1.6174,
|
|
"rewards/chosen": -2.438205946654385,
|
|
"rewards/margins": 0.8078717468795524,
|
|
"rewards/rejected": -3.2460776935339375,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.2513089005235602,
|
|
"grad_norm": 11.578465461730957,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.665825037903035e-07,
|
|
"logits/chosen": 1024492544.0,
|
|
"logits/rejected": 929939776.0,
|
|
"logps/chosen": -296.2886893297381,
|
|
"logps/rejected": -285.70906794770207,
|
|
"loss": 1.5392,
|
|
"rewards/chosen": -2.06980766684322,
|
|
"rewards/margins": 0.8852973484474891,
|
|
"rewards/rejected": -2.9551050152907092,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.2617801047120419,
|
|
"grad_norm": 13.729632377624512,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.618719349905619e-07,
|
|
"logits/chosen": 1081932800.0,
|
|
"logits/rejected": 961173696.0,
|
|
"logps/chosen": -308.0947280534351,
|
|
"logps/rejected": -284.4129,
|
|
"loss": 1.5103,
|
|
"rewards/chosen": -1.88463274540792,
|
|
"rewards/margins": 0.9027238952170802,
|
|
"rewards/rejected": -2.787356640625,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.27225130890052357,
|
|
"grad_norm": 11.3008394241333,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.568780062571374e-07,
|
|
"logits/chosen": 972363776.0,
|
|
"logits/rejected": 1046053376.0,
|
|
"logps/chosen": -290.8611027644231,
|
|
"logps/rejected": -295.7996379573171,
|
|
"loss": 1.4671,
|
|
"rewards/chosen": -1.9210040752704327,
|
|
"rewards/margins": 0.9912621675244415,
|
|
"rewards/rejected": -2.9122662427948742,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.28272251308900526,
|
|
"grad_norm": 11.210180282592773,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.516073965270717e-07,
|
|
"logits/chosen": 927748608.0,
|
|
"logits/rejected": 893388288.0,
|
|
"logps/chosen": -287.66021126760563,
|
|
"logps/rejected": -304.2968262480499,
|
|
"loss": 1.485,
|
|
"rewards/chosen": -1.9320662473102503,
|
|
"rewards/margins": 1.088795397702425,
|
|
"rewards/rejected": -3.0208616450126753,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.2931937172774869,
|
|
"grad_norm": 12.282171249389648,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.460671547737158e-07,
|
|
"logits/chosen": 914422144.0,
|
|
"logits/rejected": 952285568.0,
|
|
"logps/chosen": -319.30807877813504,
|
|
"logps/rejected": -283.1123195288754,
|
|
"loss": 1.4245,
|
|
"rewards/chosen": -1.9814747506782557,
|
|
"rewards/margins": 1.1337076795079144,
|
|
"rewards/rejected": -3.11518243018617,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.3036649214659686,
|
|
"grad_norm": 11.814573287963867,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.40264690579353e-07,
|
|
"logits/chosen": 972118656.0,
|
|
"logits/rejected": 909624512.0,
|
|
"logps/chosen": -312.8792158917683,
|
|
"logps/rejected": -280.78568209134613,
|
|
"loss": 1.5032,
|
|
"rewards/chosen": -1.850838079685118,
|
|
"rewards/margins": 1.1238724336987964,
|
|
"rewards/rejected": -2.9747105133839145,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.31413612565445026,
|
|
"grad_norm": 12.68067455291748,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.3420776422553916e-07,
|
|
"logits/chosen": 953990016.0,
|
|
"logits/rejected": 940757632.0,
|
|
"logps/chosen": -299.58254491590213,
|
|
"logps/rejected": -287.3024161341853,
|
|
"loss": 1.4921,
|
|
"rewards/chosen": -1.8043567529147553,
|
|
"rewards/margins": 1.231686164355113,
|
|
"rewards/rejected": -3.036042917269868,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.32460732984293195,
|
|
"grad_norm": 17.389904022216797,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.279044763144141e-07,
|
|
"logits/chosen": 894258816.0,
|
|
"logits/rejected": 1021757120.0,
|
|
"logps/chosen": -280.6739217252396,
|
|
"logps/rejected": -316.6185254204893,
|
|
"loss": 1.4539,
|
|
"rewards/chosen": -1.810631115215655,
|
|
"rewards/margins": 1.282302842878764,
|
|
"rewards/rejected": -3.092933958094419,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.33507853403141363,
|
|
"grad_norm": 10.321409225463867,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.213632569348639e-07,
|
|
"logits/chosen": 1014880128.0,
|
|
"logits/rejected": 907480960.0,
|
|
"logps/chosen": -302.61937225475845,
|
|
"logps/rejected": -298.2493718592965,
|
|
"loss": 1.5305,
|
|
"rewards/chosen": -1.9286555322337116,
|
|
"rewards/margins": 1.3392826695172306,
|
|
"rewards/rejected": -3.267938201750942,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.34554973821989526,
|
|
"grad_norm": 12.460442543029785,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.145928543880249e-07,
|
|
"logits/chosen": 917175104.0,
|
|
"logits/rejected": 969322880.0,
|
|
"logps/chosen": -300.56881911532383,
|
|
"logps/rejected": -297.01260625965995,
|
|
"loss": 1.4141,
|
|
"rewards/chosen": -1.6344111775523302,
|
|
"rewards/margins": 1.6734750551804751,
|
|
"rewards/rejected": -3.3078862327328054,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.35602094240837695,
|
|
"grad_norm": 11.16015338897705,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.076023234872057e-07,
|
|
"logits/chosen": 866838528.0,
|
|
"logits/rejected": 971704128.0,
|
|
"logps/chosen": -298.1501665993538,
|
|
"logps/rejected": -291.4291792738275,
|
|
"loss": 1.3923,
|
|
"rewards/chosen": -1.5310064509920234,
|
|
"rewards/margins": 1.6836444980908056,
|
|
"rewards/rejected": -3.214650949082829,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.36649214659685864,
|
|
"grad_norm": 27.34151268005371,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.004010134478771e-07,
|
|
"logits/chosen": 968713728.0,
|
|
"logits/rejected": 972469376.0,
|
|
"logps/chosen": -286.62965374228395,
|
|
"logps/rejected": -288.3266663370253,
|
|
"loss": 1.4522,
|
|
"rewards/chosen": -1.4199869603286555,
|
|
"rewards/margins": 1.5951061066956722,
|
|
"rewards/rejected": -3.0150930670243277,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.3769633507853403,
|
|
"grad_norm": 16.52737045288086,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.9299855538392534e-07,
|
|
"logits/chosen": 922831360.0,
|
|
"logits/rejected": 947811008.0,
|
|
"logps/chosen": -301.2358689263323,
|
|
"logps/rejected": -295.06773267133957,
|
|
"loss": 1.4126,
|
|
"rewards/chosen": -1.6193985415850314,
|
|
"rewards/margins": 1.8232337354788317,
|
|
"rewards/rejected": -3.442632277063863,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.387434554973822,
|
|
"grad_norm": 16.960041046142578,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.8540484942689075e-07,
|
|
"logits/chosen": 870200512.0,
|
|
"logits/rejected": 973426560.0,
|
|
"logps/chosen": -289.64174462579615,
|
|
"logps/rejected": -309.79447852760734,
|
|
"loss": 1.461,
|
|
"rewards/chosen": -1.5883331298828125,
|
|
"rewards/margins": 1.8990898834415741,
|
|
"rewards/rejected": -3.4874230133243866,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.39790575916230364,
|
|
"grad_norm": 16.1551456451416,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.77630051485419e-07,
|
|
"logits/chosen": 954556544.0,
|
|
"logits/rejected": 868749824.0,
|
|
"logps/chosen": -303.6716906342183,
|
|
"logps/rejected": -294.0799159053156,
|
|
"loss": 1.4297,
|
|
"rewards/chosen": -1.4528007394796276,
|
|
"rewards/margins": 1.913596953432021,
|
|
"rewards/rejected": -3.3663976929116486,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.4083769633507853,
|
|
"grad_norm": 18.04342269897461,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.696845596626342e-07,
|
|
"logits/chosen": 914389696.0,
|
|
"logits/rejected": 942570880.0,
|
|
"logps/chosen": -274.45240575396826,
|
|
"logps/rejected": -291.5584855769231,
|
|
"loss": 1.4241,
|
|
"rewards/chosen": -1.014939953031994,
|
|
"rewards/margins": 1.9614178444439674,
|
|
"rewards/rejected": -2.9763577974759614,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"grad_norm": 19.24696159362793,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.61579000349597e-07,
|
|
"logits/chosen": 901277504.0,
|
|
"logits/rejected": 911895168.0,
|
|
"logps/chosen": -296.7239105504587,
|
|
"logps/rejected": -297.9510033945687,
|
|
"loss": 1.3318,
|
|
"rewards/chosen": -1.093078053325688,
|
|
"rewards/margins": 2.3687205688787847,
|
|
"rewards/rejected": -3.4617986222044728,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.4293193717277487,
|
|
"grad_norm": 20.188373565673828,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.5332421401344837e-07,
|
|
"logits/chosen": 755553600.0,
|
|
"logits/rejected": 901826880.0,
|
|
"logps/chosen": -295.43673716329965,
|
|
"logps/rejected": -291.0846164358601,
|
|
"loss": 1.3484,
|
|
"rewards/chosen": -1.2031269523029777,
|
|
"rewards/margins": 2.3843341133174665,
|
|
"rewards/rejected": -3.5874610656204444,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.4397905759162304,
|
|
"grad_norm": 39.75468444824219,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.4493124069924635e-07,
|
|
"logits/chosen": 849826752.0,
|
|
"logits/rejected": 898505088.0,
|
|
"logps/chosen": -305.4299,
|
|
"logps/rejected": -290.353697519084,
|
|
"loss": 1.3488,
|
|
"rewards/chosen": -1.8245701171875,
|
|
"rewards/margins": 2.5110345315481872,
|
|
"rewards/rejected": -4.335604648735687,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.450261780104712,
|
|
"grad_norm": 23.038509368896484,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.3641130526488335e-07,
|
|
"logits/chosen": 801155392.0,
|
|
"logits/rejected": 885279936.0,
|
|
"logps/chosen": -271.98359007064363,
|
|
"logps/rejected": -319.79568429237946,
|
|
"loss": 1.4282,
|
|
"rewards/chosen": -1.701683727126668,
|
|
"rewards/margins": 2.225468583210618,
|
|
"rewards/rejected": -3.927152310337286,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.4607329842931937,
|
|
"grad_norm": 16.816984176635742,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.2777580236883473e-07,
|
|
"logits/chosen": 781878144.0,
|
|
"logits/rejected": 840188736.0,
|
|
"logps/chosen": -268.33087060702877,
|
|
"logps/rejected": -298.1252628058104,
|
|
"loss": 1.3415,
|
|
"rewards/chosen": -0.9628440198806909,
|
|
"rewards/margins": 2.604564550575158,
|
|
"rewards/rejected": -3.5674085704558487,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.4712041884816754,
|
|
"grad_norm": 23.657085418701172,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.1903628123081196e-07,
|
|
"logits/chosen": 815678464.0,
|
|
"logits/rejected": 777286144.0,
|
|
"logps/chosen": -294.5112335015528,
|
|
"logps/rejected": -298.4295892295597,
|
|
"loss": 1.3785,
|
|
"rewards/chosen": -1.723951446343653,
|
|
"rewards/margins": 2.6521049608890515,
|
|
"rewards/rejected": -4.376056407232705,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.4816753926701571,
|
|
"grad_norm": 19.01488494873047,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.1020443018570556e-07,
|
|
"logits/chosen": 727440000.0,
|
|
"logits/rejected": 849506112.0,
|
|
"logps/chosen": -292.6208908279221,
|
|
"logps/rejected": -297.79659262048193,
|
|
"loss": 1.3359,
|
|
"rewards/chosen": -1.9514271129261365,
|
|
"rewards/margins": 2.62783696486942,
|
|
"rewards/rejected": -4.579264077795557,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.49214659685863876,
|
|
"grad_norm": 33.731056213378906,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.0129206105147343e-07,
|
|
"logits/chosen": 776329344.0,
|
|
"logits/rejected": 846505344.0,
|
|
"logps/chosen": -313.49515701468187,
|
|
"logps/rejected": -295.306128185907,
|
|
"loss": 1.3287,
|
|
"rewards/chosen": -2.3697519722420473,
|
|
"rewards/margins": 2.4085628498391,
|
|
"rewards/rejected": -4.778314822081147,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.5026178010471204,
|
|
"grad_norm": 13.92732048034668,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.923110933318805e-07,
|
|
"logits/chosen": 808196672.0,
|
|
"logits/rejected": 800854272.0,
|
|
"logps/chosen": -298.69172108208954,
|
|
"logps/rejected": -294.38888319672134,
|
|
"loss": 1.4221,
|
|
"rewards/chosen": -2.5513560736357275,
|
|
"rewards/margins": 2.401674872189068,
|
|
"rewards/rejected": -4.953030945824795,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.5130890052356021,
|
|
"grad_norm": 19.206186294555664,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.832735382752194e-07,
|
|
"logits/chosen": 850527616.0,
|
|
"logits/rejected": 800958400.0,
|
|
"logps/chosen": -290.59834418070443,
|
|
"logps/rejected": -309.14478668261563,
|
|
"loss": 1.3764,
|
|
"rewards/chosen": -2.0100744460662328,
|
|
"rewards/margins": 2.6875198147989985,
|
|
"rewards/rejected": -4.697594260865231,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.5235602094240838,
|
|
"grad_norm": 27.097524642944336,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.741914828103307e-07,
|
|
"logits/chosen": 786880896.0,
|
|
"logits/rejected": 811829952.0,
|
|
"logps/chosen": -284.3116169544741,
|
|
"logps/rejected": -294.09241349144634,
|
|
"loss": 1.3165,
|
|
"rewards/chosen": -1.4351621714641385,
|
|
"rewards/margins": 2.9411340899569347,
|
|
"rewards/rejected": -4.376296261421073,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.5340314136125655,
|
|
"grad_norm": 24.57645034790039,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.650770733814065e-07,
|
|
"logits/chosen": 794909632.0,
|
|
"logits/rejected": 800322944.0,
|
|
"logps/chosen": -303.6320275119617,
|
|
"logps/rejected": -303.80400555130166,
|
|
"loss": 1.3389,
|
|
"rewards/chosen": -2.452097623542165,
|
|
"rewards/margins": 2.8188633259218467,
|
|
"rewards/rejected": -5.270960949464012,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.5445026178010471,
|
|
"grad_norm": 19.90850067138672,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.55942499703198e-07,
|
|
"logits/chosen": 832917312.0,
|
|
"logits/rejected": 859018368.0,
|
|
"logps/chosen": -311.358175,
|
|
"logps/rejected": -306.0612118320611,
|
|
"loss": 1.3487,
|
|
"rewards/chosen": -2.985473828125,
|
|
"rewards/margins": 2.5981758319417936,
|
|
"rewards/rejected": -5.5836496600667935,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.5549738219895288,
|
|
"grad_norm": 16.22838020324707,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.467999784583527e-07,
|
|
"logits/chosen": 785304256.0,
|
|
"logits/rejected": 842632128.0,
|
|
"logps/chosen": -284.2848192891374,
|
|
"logps/rejected": -302.64994266055044,
|
|
"loss": 1.299,
|
|
"rewards/chosen": -1.9525462385183705,
|
|
"rewards/margins": 3.111523503573564,
|
|
"rewards/rejected": -5.0640697420919345,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.5654450261780105,
|
|
"grad_norm": 19.957088470458984,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.3766173695868388e-07,
|
|
"logits/chosen": 769985984.0,
|
|
"logits/rejected": 797255360.0,
|
|
"logps/chosen": -298.26605570444104,
|
|
"logps/rejected": -304.75356359649123,
|
|
"loss": 1.3787,
|
|
"rewards/chosen": -1.5651508699332408,
|
|
"rewards/margins": 2.674011544714885,
|
|
"rewards/rejected": -4.239162414648126,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.5759162303664922,
|
|
"grad_norm": 20.141708374023438,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.285399967922253e-07,
|
|
"logits/chosen": 732652224.0,
|
|
"logits/rejected": 825995904.0,
|
|
"logps/chosen": -283.12699680511184,
|
|
"logps/rejected": -302.75038226299694,
|
|
"loss": 1.2983,
|
|
"rewards/chosen": -2.2529322225064896,
|
|
"rewards/margins": 2.993755080389153,
|
|
"rewards/rejected": -5.2466873028956424,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.5863874345549738,
|
|
"grad_norm": 15.83252239227295,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.194469574779397e-07,
|
|
"logits/chosen": 891087488.0,
|
|
"logits/rejected": 787483520.0,
|
|
"logps/chosen": -304.4279685128983,
|
|
"logps/rejected": -305.6478713768116,
|
|
"loss": 1.3612,
|
|
"rewards/chosen": -2.3803220062950494,
|
|
"rewards/margins": 2.7306727350495557,
|
|
"rewards/rejected": -5.110994741344605,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.5968586387434555,
|
|
"grad_norm": 15.746390342712402,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.1039478014994441e-07,
|
|
"logits/chosen": 725274944.0,
|
|
"logits/rejected": 853497664.0,
|
|
"logps/chosen": -282.81245139968894,
|
|
"logps/rejected": -308.07447017268447,
|
|
"loss": 1.331,
|
|
"rewards/chosen": -1.969779339400272,
|
|
"rewards/margins": 2.9806961407557324,
|
|
"rewards/rejected": -4.950475480156005,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.6073298429319371,
|
|
"grad_norm": 29.05588150024414,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.0139557129307149e-07,
|
|
"logits/chosen": 805204992.0,
|
|
"logits/rejected": 881275392.0,
|
|
"logps/chosen": -307.06715266719743,
|
|
"logps/rejected": -325.5736915260736,
|
|
"loss": 1.2908,
|
|
"rewards/chosen": -1.7291349180185112,
|
|
"rewards/margins": 3.099390742209633,
|
|
"rewards/rejected": -4.828525660228144,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.6178010471204188,
|
|
"grad_norm": 17.282276153564453,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.9246136655151808e-07,
|
|
"logits/chosen": 853613056.0,
|
|
"logits/rejected": 822100032.0,
|
|
"logps/chosen": -309.79278100775196,
|
|
"logps/rejected": -327.5703248031496,
|
|
"loss": 1.296,
|
|
"rewards/chosen": -2.4475913350896317,
|
|
"rewards/margins": 3.021821112621983,
|
|
"rewards/rejected": -5.469412447711615,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"grad_norm": 25.881441116333008,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.8360411463223873e-07,
|
|
"logits/chosen": 790009792.0,
|
|
"logits/rejected": 845724032.0,
|
|
"logps/chosen": -297.8154809220986,
|
|
"logps/rejected": -315.4985839093702,
|
|
"loss": 1.3103,
|
|
"rewards/chosen": -2.263746592122913,
|
|
"rewards/margins": 3.04722330995082,
|
|
"rewards/rejected": -5.310969902073733,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.6387434554973822,
|
|
"grad_norm": 22.745113372802734,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.7483566132460865e-07,
|
|
"logits/chosen": 793115456.0,
|
|
"logits/rejected": 847319680.0,
|
|
"logps/chosen": -307.8780409414557,
|
|
"logps/rejected": -297.18407600308643,
|
|
"loss": 1.3389,
|
|
"rewards/chosen": -2.189051567753659,
|
|
"rewards/margins": 2.7282397673640113,
|
|
"rewards/rejected": -4.91729133511767,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.6492146596858639,
|
|
"grad_norm": 20.82432746887207,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.66167733657731e-07,
|
|
"logits/chosen": 836790912.0,
|
|
"logits/rejected": 851649024.0,
|
|
"logps/chosen": -310.7592703349282,
|
|
"logps/rejected": -311.3528426493109,
|
|
"loss": 1.3303,
|
|
"rewards/chosen": -2.0225349195075757,
|
|
"rewards/margins": 2.746800419791812,
|
|
"rewards/rejected": -4.769335339299388,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.6596858638743456,
|
|
"grad_norm": 18.315628051757812,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.5761192421657456e-07,
|
|
"logits/chosen": 792098688.0,
|
|
"logits/rejected": 826501376.0,
|
|
"logps/chosen": -301.0860373402556,
|
|
"logps/rejected": -318.67146884556576,
|
|
"loss": 1.2914,
|
|
"rewards/chosen": -1.6394025418705072,
|
|
"rewards/margins": 3.416822596102543,
|
|
"rewards/rejected": -5.05622513797305,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.6701570680628273,
|
|
"grad_norm": 16.926488876342773,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.491796756379185e-07,
|
|
"logits/chosen": 887689536.0,
|
|
"logits/rejected": 823059328.0,
|
|
"logps/chosen": -321.0725746268657,
|
|
"logps/rejected": -304.9668545081967,
|
|
"loss": 1.3763,
|
|
"rewards/chosen": -1.9418799556902986,
|
|
"rewards/margins": 2.9874629458209716,
|
|
"rewards/rejected": -4.92934290151127,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.680628272251309,
|
|
"grad_norm": 17.46025276184082,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.4088226530684071e-07,
|
|
"logits/chosen": 877960000.0,
|
|
"logits/rejected": 800502720.0,
|
|
"logps/chosen": -305.0225134408602,
|
|
"logps/rejected": -312.8841166534181,
|
|
"loss": 1.2991,
|
|
"rewards/chosen": -1.6010805941580262,
|
|
"rewards/margins": 3.246691709970551,
|
|
"rewards/rejected": -4.847772304128577,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.6910994764397905,
|
|
"grad_norm": 26.456403732299805,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.327307902742142e-07,
|
|
"logits/chosen": 874241024.0,
|
|
"logits/rejected": 855177088.0,
|
|
"logps/chosen": -293.12502403846156,
|
|
"logps/rejected": -322.4185267857143,
|
|
"loss": 1.2745,
|
|
"rewards/chosen": -1.854320537860577,
|
|
"rewards/margins": 3.6010327297485505,
|
|
"rewards/rejected": -5.455353267609127,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.7015706806282722,
|
|
"grad_norm": 27.566020965576172,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.2473615241538523e-07,
|
|
"logits/chosen": 803011584.0,
|
|
"logits/rejected": 778086784.0,
|
|
"logps/chosen": -281.4911978390462,
|
|
"logps/rejected": -322.3012366584565,
|
|
"loss": 1.3525,
|
|
"rewards/chosen": -1.6165469492245714,
|
|
"rewards/margins": 3.3331648926781376,
|
|
"rewards/rejected": -4.949711841902709,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.7120418848167539,
|
|
"grad_norm": 25.231483459472656,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.169090438498816e-07,
|
|
"logits/chosen": 864072320.0,
|
|
"logits/rejected": 894087488.0,
|
|
"logps/chosen": -299.5445031298905,
|
|
"logps/rejected": -311.1689498829953,
|
|
"loss": 1.3068,
|
|
"rewards/chosen": -1.4237583978261932,
|
|
"rewards/margins": 3.4037450148103123,
|
|
"rewards/rejected": -4.8275034126365055,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.7225130890052356,
|
|
"grad_norm": 26.85382652282715,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.0925993264165045e-07,
|
|
"logits/chosen": 801657152.0,
|
|
"logits/rejected": 836854656.0,
|
|
"logps/chosen": -295.48494908146967,
|
|
"logps/rejected": -316.611381880734,
|
|
"loss": 1.2955,
|
|
"rewards/chosen": -1.5278358337597344,
|
|
"rewards/margins": 3.3747778148211136,
|
|
"rewards/rejected": -4.902613648580848,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.7329842931937173,
|
|
"grad_norm": 17.67808723449707,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.0179904879894998e-07,
|
|
"logits/chosen": 839250560.0,
|
|
"logits/rejected": 829687552.0,
|
|
"logps/chosen": -294.1292613636364,
|
|
"logps/rejected": -317.5498929127726,
|
|
"loss": 1.3023,
|
|
"rewards/chosen": -1.7617466845856191,
|
|
"rewards/margins": 3.402093731107527,
|
|
"rewards/rejected": -5.163840415693146,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.743455497382199,
|
|
"grad_norm": 32.00407791137695,
|
|
"kl": 0.0,
|
|
"learning_rate": 9.453637059262117e-08,
|
|
"logits/chosen": 800788096.0,
|
|
"logits/rejected": 837777152.0,
|
|
"logps/chosen": -289.1109255725191,
|
|
"logps/rejected": -294.386675,
|
|
"loss": 1.3705,
|
|
"rewards/chosen": -1.946717147244752,
|
|
"rewards/margins": 2.817464102755248,
|
|
"rewards/rejected": -4.76418125,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.7539267015706806,
|
|
"grad_norm": 18.53364372253418,
|
|
"kl": 0.0,
|
|
"learning_rate": 8.748161121103406e-08,
|
|
"logits/chosen": 801277312.0,
|
|
"logits/rejected": 835388288.0,
|
|
"logps/chosen": -298.15263287401575,
|
|
"logps/rejected": -325.6966812015504,
|
|
"loss": 1.3015,
|
|
"rewards/chosen": -1.6334436131274606,
|
|
"rewards/margins": 3.3554996554045164,
|
|
"rewards/rejected": -4.988943268531977,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.7643979057591623,
|
|
"grad_norm": 18.17774772644043,
|
|
"kl": 0.0,
|
|
"learning_rate": 8.064420576955965e-08,
|
|
"logits/chosen": 859945600.0,
|
|
"logits/rejected": 924831360.0,
|
|
"logps/chosen": -302.85615234375,
|
|
"logps/rejected": -323.9920654296875,
|
|
"loss": 1.2968,
|
|
"rewards/chosen": -2.0160938262939454,
|
|
"rewards/margins": 3.482906723022461,
|
|
"rewards/rejected": -5.499000549316406,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.774869109947644,
|
|
"grad_norm": 24.694583892822266,
|
|
"kl": 0.0,
|
|
"learning_rate": 7.403329869193922e-08,
|
|
"logits/chosen": 838223552.0,
|
|
"logits/rejected": 778052224.0,
|
|
"logps/chosen": -290.2984591013825,
|
|
"logps/rejected": -299.4383942766296,
|
|
"loss": 1.2544,
|
|
"rewards/chosen": -1.8794061569940477,
|
|
"rewards/margins": 3.8346927506222084,
|
|
"rewards/rejected": -5.714098907616256,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.7853403141361257,
|
|
"grad_norm": 29.033084869384766,
|
|
"kl": 0.0,
|
|
"learning_rate": 6.765773148042858e-08,
|
|
"logits/chosen": 843541504.0,
|
|
"logits/rejected": 823506752.0,
|
|
"logps/chosen": -299.42698732718895,
|
|
"logps/rejected": -302.11419415739266,
|
|
"loss": 1.3466,
|
|
"rewards/chosen": -2.019189603134601,
|
|
"rewards/margins": 2.8317887448135313,
|
|
"rewards/rejected": -4.850978347948132,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.7958115183246073,
|
|
"grad_norm": 19.56595230102539,
|
|
"kl": 0.0,
|
|
"learning_rate": 6.152603089107139e-08,
|
|
"logits/chosen": 810777344.0,
|
|
"logits/rejected": 779764736.0,
|
|
"logps/chosen": -289.197298325723,
|
|
"logps/rejected": -302.0749899678973,
|
|
"loss": 1.3425,
|
|
"rewards/chosen": -1.8063266237395357,
|
|
"rewards/margins": 3.0717198437062905,
|
|
"rewards/rejected": -4.878046467445826,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.806282722513089,
|
|
"grad_norm": 14.122668266296387,
|
|
"kl": 0.0,
|
|
"learning_rate": 5.5646397529920175e-08,
|
|
"logits/chosen": 837819072.0,
|
|
"logits/rejected": 859991232.0,
|
|
"logps/chosen": -314.30802861685214,
|
|
"logps/rejected": -308.1335925499232,
|
|
"loss": 1.1956,
|
|
"rewards/chosen": -1.5554460243504074,
|
|
"rewards/margins": 3.727686596137304,
|
|
"rewards/rejected": -5.283132620487711,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.8167539267015707,
|
|
"grad_norm": 22.549423217773438,
|
|
"kl": 0.0,
|
|
"learning_rate": 5.002669488545111e-08,
|
|
"logits/chosen": 790213888.0,
|
|
"logits/rejected": 885866496.0,
|
|
"logps/chosen": -293.48640188834156,
|
|
"logps/rejected": -321.5396562965723,
|
|
"loss": 1.3008,
|
|
"rewards/chosen": -1.921995266317734,
|
|
"rewards/margins": 2.910402012696797,
|
|
"rewards/rejected": -4.832397279014531,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.8272251308900523,
|
|
"grad_norm": 21.376644134521484,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.467443881184646e-08,
|
|
"logits/chosen": 782350336.0,
|
|
"logits/rejected": 819218560.0,
|
|
"logps/chosen": -300.98388364779873,
|
|
"logps/rejected": -297.211810947205,
|
|
"loss": 1.3223,
|
|
"rewards/chosen": -2.0677747426542847,
|
|
"rewards/margins": 2.908827941982749,
|
|
"rewards/rejected": -4.976602684637034,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"grad_norm": 27.827619552612305,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.959678747720488e-08,
|
|
"logits/chosen": 906698944.0,
|
|
"logits/rejected": 851121024.0,
|
|
"logps/chosen": -293.82601851851854,
|
|
"logps/rejected": -310.759375,
|
|
"loss": 1.3648,
|
|
"rewards/chosen": -2.1147281901041666,
|
|
"rewards/margins": 3.1817343938317837,
|
|
"rewards/rejected": -5.29646258393595,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.8481675392670157,
|
|
"grad_norm": 14.492793083190918,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.480053179012654e-08,
|
|
"logits/chosen": 731670336.0,
|
|
"logits/rejected": 864448640.0,
|
|
"logps/chosen": -282.0983207472178,
|
|
"logps/rejected": -311.6561539938556,
|
|
"loss": 1.3694,
|
|
"rewards/chosen": -2.1266589141991257,
|
|
"rewards/margins": 2.7596034315910045,
|
|
"rewards/rejected": -4.88626234579013,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.8586387434554974,
|
|
"grad_norm": 22.540645599365234,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.029208631747446e-08,
|
|
"logits/chosen": 822572672.0,
|
|
"logits/rejected": 801164608.0,
|
|
"logps/chosen": -287.1926763803681,
|
|
"logps/rejected": -311.2378085191083,
|
|
"loss": 1.2763,
|
|
"rewards/chosen": -1.883757983248658,
|
|
"rewards/margins": 3.543778513442226,
|
|
"rewards/rejected": -5.427536496690884,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.8691099476439791,
|
|
"grad_norm": 17.336292266845703,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.607748070546037e-08,
|
|
"logits/chosen": 845615360.0,
|
|
"logits/rejected": 904532160.0,
|
|
"logps/chosen": -289.08657827287067,
|
|
"logps/rejected": -321.46013931888547,
|
|
"loss": 1.3094,
|
|
"rewards/chosen": -1.9600795420938486,
|
|
"rewards/margins": 3.3928427380435355,
|
|
"rewards/rejected": -5.352922280137384,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.8795811518324608,
|
|
"grad_norm": 26.244890213012695,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.2162351615526544e-08,
|
|
"logits/chosen": 830761088.0,
|
|
"logits/rejected": 874489984.0,
|
|
"logps/chosen": -318.6343998015873,
|
|
"logps/rejected": -321.2511057692308,
|
|
"loss": 1.3163,
|
|
"rewards/chosen": -2.282879929315476,
|
|
"rewards/margins": 2.976137874170101,
|
|
"rewards/rejected": -5.259017803485577,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.8900523560209425,
|
|
"grad_norm": 18.969242095947266,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.8551935185811717e-08,
|
|
"logits/chosen": 761686912.0,
|
|
"logits/rejected": 807224448.0,
|
|
"logps/chosen": -299.0131200396825,
|
|
"logps/rejected": -326.48896634615386,
|
|
"loss": 1.3118,
|
|
"rewards/chosen": -2.2417308020213293,
|
|
"rewards/margins": 3.3625240056709784,
|
|
"rewards/rejected": -5.604254807692308,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.900523560209424,
|
|
"grad_norm": 13.49955940246582,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.5251060028279612e-08,
|
|
"logits/chosen": 844396160.0,
|
|
"logits/rejected": 831924288.0,
|
|
"logps/chosen": -285.6634949768161,
|
|
"logps/rejected": -329.8748518957346,
|
|
"loss": 1.3469,
|
|
"rewards/chosen": -2.111665280561727,
|
|
"rewards/margins": 3.1700718436237,
|
|
"rewards/rejected": -5.281737124185427,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.9109947643979057,
|
|
"grad_norm": 20.407133102416992,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.2264140770878839e-08,
|
|
"logits/chosen": 813423744.0,
|
|
"logits/rejected": 872637824.0,
|
|
"logps/chosen": -313.7548781695721,
|
|
"logps/rejected": -317.90100154083206,
|
|
"loss": 1.3324,
|
|
"rewards/chosen": -2.2133790997053286,
|
|
"rewards/margins": 3.005966768938739,
|
|
"rewards/rejected": -5.2193458686440675,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.9214659685863874,
|
|
"grad_norm": 21.028409957885742,
|
|
"kl": 0.0,
|
|
"learning_rate": 9.59517215336922e-09,
|
|
"logits/chosen": 710001472.0,
|
|
"logits/rejected": 798854528.0,
|
|
"logps/chosen": -295.04663047730827,
|
|
"logps/rejected": -312.5884847893916,
|
|
"loss": 1.3053,
|
|
"rewards/chosen": -2.2264215173855635,
|
|
"rewards/margins": 3.319438070904218,
|
|
"rewards/rejected": -5.545859588289781,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.9319371727748691,
|
|
"grad_norm": 35.28827667236328,
|
|
"kl": 0.0,
|
|
"learning_rate": 7.247723684711382e-09,
|
|
"logits/chosen": 821099200.0,
|
|
"logits/rejected": 816860864.0,
|
|
"logps/chosen": -284.8080221036585,
|
|
"logps/rejected": -317.58358373397436,
|
|
"loss": 1.3087,
|
|
"rewards/chosen": -1.8735839099418827,
|
|
"rewards/margins": 3.3290528088081173,
|
|
"rewards/rejected": -5.20263671875,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.9424083769633508,
|
|
"grad_norm": 20.1422176361084,
|
|
"kl": 0.0,
|
|
"learning_rate": 5.224934869164976e-09,
|
|
"logits/chosen": 833211072.0,
|
|
"logits/rejected": 892030464.0,
|
|
"logps/chosen": -308.6479436790924,
|
|
"logps/rejected": -323.03117929864254,
|
|
"loss": 1.3294,
|
|
"rewards/chosen": -2.350066396753444,
|
|
"rewards/margins": 2.840480389869859,
|
|
"rewards/rejected": -5.190546786623303,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.9528795811518325,
|
|
"grad_norm": 15.786822319030762,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.529511007479946e-09,
|
|
"logits/chosen": 850338304.0,
|
|
"logits/rejected": 878503936.0,
|
|
"logps/chosen": -307.224846390169,
|
|
"logps/rejected": -304.68362480127183,
|
|
"loss": 1.3362,
|
|
"rewards/chosen": -2.205735917098694,
|
|
"rewards/margins": 3.0281383811425617,
|
|
"rewards/rejected": -5.233874298241256,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.9633507853403142,
|
|
"grad_norm": 23.114709854125977,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.1637195787966857e-09,
|
|
"logits/chosen": 768496128.0,
|
|
"logits/rejected": 914081792.0,
|
|
"logps/chosen": -307.8621338282504,
|
|
"logps/rejected": -306.88308599695586,
|
|
"loss": 1.2942,
|
|
"rewards/chosen": -1.9231529174608748,
|
|
"rewards/margins": 3.125122639640533,
|
|
"rewards/rejected": -5.048275557101408,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.9738219895287958,
|
|
"grad_norm": 22.419374465942383,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.1293872080934963e-09,
|
|
"logits/chosen": 777531392.0,
|
|
"logits/rejected": 896280960.0,
|
|
"logps/chosen": -298.2679078733766,
|
|
"logps/rejected": -321.1836643448795,
|
|
"loss": 1.2511,
|
|
"rewards/chosen": -2.0406327681107954,
|
|
"rewards/margins": 3.466080363713376,
|
|
"rewards/rejected": -5.5067131318241715,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.9842931937172775,
|
|
"grad_norm": 22.132793426513672,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.2789722323760546e-10,
|
|
"logits/chosen": 845816704.0,
|
|
"logits/rejected": 823620416.0,
|
|
"logps/chosen": -302.37687125748505,
|
|
"logps/rejected": -313.2297794117647,
|
|
"loss": 1.3619,
|
|
"rewards/chosen": -1.8776080651197604,
|
|
"rewards/margins": 3.33271708870479,
|
|
"rewards/rejected": -5.2103251538245505,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.9947643979057592,
|
|
"grad_norm": 22.317419052124023,
|
|
"kl": 0.0,
|
|
"learning_rate": 6.018780490690822e-11,
|
|
"logits/chosen": 842274304.0,
|
|
"logits/rejected": 781750656.0,
|
|
"logps/chosen": -298.93238636363634,
|
|
"logps/rejected": -305.31222278225806,
|
|
"loss": 1.3093,
|
|
"rewards/chosen": -2.095315459280303,
|
|
"rewards/margins": 3.475892485528165,
|
|
"rewards/rejected": -5.571207944808468,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 955,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.4430672781629712,
|
|
"train_runtime": 11696.6719,
|
|
"train_samples_per_second": 10.453,
|
|
"train_steps_per_second": 0.082
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 955,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|