938 lines
23 KiB
JSON
938 lines
23 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 10.0,
|
|
"eval_steps": 500,
|
|
"global_step": 38780,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.07735946364105209,
|
|
"grad_norm": 1.558766484260559,
|
|
"learning_rate": 7.726098191214471e-05,
|
|
"loss": 7.285,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.15471892728210418,
|
|
"grad_norm": 0.9385259747505188,
|
|
"learning_rate": 9.944781600812648e-05,
|
|
"loss": 4.7972,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.23207839092315627,
|
|
"grad_norm": 0.9864380359649658,
|
|
"learning_rate": 9.866642356679604e-05,
|
|
"loss": 4.5563,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.30943785456420836,
|
|
"grad_norm": 0.971341073513031,
|
|
"learning_rate": 9.788503112546558e-05,
|
|
"loss": 4.4545,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3867973182052604,
|
|
"grad_norm": 0.8991674184799194,
|
|
"learning_rate": 9.710363868413513e-05,
|
|
"loss": 4.3616,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.46415678184631254,
|
|
"grad_norm": 0.9576361775398254,
|
|
"learning_rate": 9.632224624280469e-05,
|
|
"loss": 4.2695,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.5415162454873647,
|
|
"grad_norm": 0.9536678791046143,
|
|
"learning_rate": 9.554085380147423e-05,
|
|
"loss": 4.2254,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.6188757091284167,
|
|
"grad_norm": 0.892548680305481,
|
|
"learning_rate": 9.475946136014378e-05,
|
|
"loss": 4.1759,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6962351727694688,
|
|
"grad_norm": 0.9262155294418335,
|
|
"learning_rate": 9.397806891881334e-05,
|
|
"loss": 4.1328,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.7735946364105208,
|
|
"grad_norm": 1.055438756942749,
|
|
"learning_rate": 9.319667647748288e-05,
|
|
"loss": 4.0732,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.850954100051573,
|
|
"grad_norm": 1.0588972568511963,
|
|
"learning_rate": 9.241528403615243e-05,
|
|
"loss": 4.0574,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.9283135636926251,
|
|
"grad_norm": 1.344167947769165,
|
|
"learning_rate": 9.163389159482197e-05,
|
|
"loss": 4.0472,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 1.0056730273336771,
|
|
"grad_norm": 0.9573405981063843,
|
|
"learning_rate": 9.085249915349152e-05,
|
|
"loss": 3.972,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 1.0830324909747293,
|
|
"grad_norm": 1.0597045421600342,
|
|
"learning_rate": 9.007110671216108e-05,
|
|
"loss": 3.8933,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 1.1603919546157813,
|
|
"grad_norm": 1.1895560026168823,
|
|
"learning_rate": 8.928971427083062e-05,
|
|
"loss": 3.8657,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 1.2377514182568334,
|
|
"grad_norm": 1.1971007585525513,
|
|
"learning_rate": 8.850832182950017e-05,
|
|
"loss": 3.8486,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 1.3151108818978856,
|
|
"grad_norm": 1.2342840433120728,
|
|
"learning_rate": 8.772692938816972e-05,
|
|
"loss": 3.8417,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 1.3924703455389376,
|
|
"grad_norm": 1.213428258895874,
|
|
"learning_rate": 8.694553694683926e-05,
|
|
"loss": 3.8048,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 1.4698298091799897,
|
|
"grad_norm": 1.191662073135376,
|
|
"learning_rate": 8.616414450550882e-05,
|
|
"loss": 3.7818,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 1.5471892728210417,
|
|
"grad_norm": 1.3016968965530396,
|
|
"learning_rate": 8.538275206417838e-05,
|
|
"loss": 3.7365,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 1.6245487364620939,
|
|
"grad_norm": 1.179246187210083,
|
|
"learning_rate": 8.460135962284792e-05,
|
|
"loss": 3.7605,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 1.701908200103146,
|
|
"grad_norm": 1.2382755279541016,
|
|
"learning_rate": 8.381996718151747e-05,
|
|
"loss": 3.6887,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 1.7792676637441982,
|
|
"grad_norm": 1.209956169128418,
|
|
"learning_rate": 8.303857474018702e-05,
|
|
"loss": 3.7204,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 1.8566271273852502,
|
|
"grad_norm": 1.119544267654419,
|
|
"learning_rate": 8.225718229885656e-05,
|
|
"loss": 3.6682,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 1.933986591026302,
|
|
"grad_norm": 1.2890771627426147,
|
|
"learning_rate": 8.147578985752612e-05,
|
|
"loss": 3.6434,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 2.0113460546673543,
|
|
"grad_norm": 1.2189580202102661,
|
|
"learning_rate": 8.069439741619567e-05,
|
|
"loss": 3.6477,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 2.0887055183084065,
|
|
"grad_norm": 1.4200156927108765,
|
|
"learning_rate": 7.991300497486521e-05,
|
|
"loss": 3.5883,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 2.1660649819494586,
|
|
"grad_norm": 1.1501333713531494,
|
|
"learning_rate": 7.913161253353476e-05,
|
|
"loss": 3.6023,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 2.2434244455905104,
|
|
"grad_norm": 1.321439504623413,
|
|
"learning_rate": 7.83502200922043e-05,
|
|
"loss": 3.5765,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 2.3207839092315625,
|
|
"grad_norm": 1.4532771110534668,
|
|
"learning_rate": 7.756882765087386e-05,
|
|
"loss": 3.5858,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 2.3981433728726147,
|
|
"grad_norm": 1.2922136783599854,
|
|
"learning_rate": 7.67874352095434e-05,
|
|
"loss": 3.5483,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 2.475502836513667,
|
|
"grad_norm": 1.459169864654541,
|
|
"learning_rate": 7.600604276821297e-05,
|
|
"loss": 3.551,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 2.552862300154719,
|
|
"grad_norm": 1.3106615543365479,
|
|
"learning_rate": 7.522465032688251e-05,
|
|
"loss": 3.5216,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 2.630221763795771,
|
|
"grad_norm": 1.4244039058685303,
|
|
"learning_rate": 7.444325788555206e-05,
|
|
"loss": 3.5251,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 2.707581227436823,
|
|
"grad_norm": 1.3957465887069702,
|
|
"learning_rate": 7.366186544422162e-05,
|
|
"loss": 3.5245,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 2.784940691077875,
|
|
"grad_norm": 1.4246965646743774,
|
|
"learning_rate": 7.288047300289116e-05,
|
|
"loss": 3.4938,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 2.8623001547189273,
|
|
"grad_norm": 1.3009408712387085,
|
|
"learning_rate": 7.209908056156071e-05,
|
|
"loss": 3.4972,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 2.9396596183599795,
|
|
"grad_norm": 1.2788194417953491,
|
|
"learning_rate": 7.131768812023025e-05,
|
|
"loss": 3.4835,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 3.0170190820010316,
|
|
"grad_norm": 1.415262222290039,
|
|
"learning_rate": 7.05362956788998e-05,
|
|
"loss": 3.4686,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 3.0943785456420834,
|
|
"grad_norm": 1.3552271127700806,
|
|
"learning_rate": 6.975490323756934e-05,
|
|
"loss": 3.4434,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 3.1717380092831355,
|
|
"grad_norm": 1.2953003644943237,
|
|
"learning_rate": 6.89735107962389e-05,
|
|
"loss": 3.406,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 3.2490974729241877,
|
|
"grad_norm": 1.2616957426071167,
|
|
"learning_rate": 6.819211835490845e-05,
|
|
"loss": 3.4219,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 3.32645693656524,
|
|
"grad_norm": 1.3086093664169312,
|
|
"learning_rate": 6.7410725913578e-05,
|
|
"loss": 3.4327,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 3.403816400206292,
|
|
"grad_norm": 1.5225331783294678,
|
|
"learning_rate": 6.662933347224755e-05,
|
|
"loss": 3.3881,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 3.4811758638473442,
|
|
"grad_norm": 1.3017733097076416,
|
|
"learning_rate": 6.58479410309171e-05,
|
|
"loss": 3.4253,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 3.558535327488396,
|
|
"grad_norm": 1.4945634603500366,
|
|
"learning_rate": 6.506654858958666e-05,
|
|
"loss": 3.3739,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 3.635894791129448,
|
|
"grad_norm": 1.3506596088409424,
|
|
"learning_rate": 6.42851561482562e-05,
|
|
"loss": 3.3795,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 3.7132542547705003,
|
|
"grad_norm": 1.3715941905975342,
|
|
"learning_rate": 6.350376370692575e-05,
|
|
"loss": 3.3621,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 3.7906137184115525,
|
|
"grad_norm": 1.4353686571121216,
|
|
"learning_rate": 6.27223712655953e-05,
|
|
"loss": 3.3625,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 3.867973182052604,
|
|
"grad_norm": 1.4907252788543701,
|
|
"learning_rate": 6.194097882426484e-05,
|
|
"loss": 3.3694,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 3.9453326456936564,
|
|
"grad_norm": 1.3906782865524292,
|
|
"learning_rate": 6.11595863829344e-05,
|
|
"loss": 3.3778,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 4.0226921093347086,
|
|
"grad_norm": 1.4113860130310059,
|
|
"learning_rate": 6.0378193941603944e-05,
|
|
"loss": 3.3418,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 4.100051572975761,
|
|
"grad_norm": 1.371813416481018,
|
|
"learning_rate": 5.959680150027349e-05,
|
|
"loss": 3.2831,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 4.177411036616813,
|
|
"grad_norm": 1.433017611503601,
|
|
"learning_rate": 5.881540905894304e-05,
|
|
"loss": 3.2937,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 4.254770500257865,
|
|
"grad_norm": 1.454952597618103,
|
|
"learning_rate": 5.803401661761259e-05,
|
|
"loss": 3.2925,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 4.332129963898917,
|
|
"grad_norm": 1.4268256425857544,
|
|
"learning_rate": 5.725262417628213e-05,
|
|
"loss": 3.3171,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 4.409489427539969,
|
|
"grad_norm": 1.4231845140457153,
|
|
"learning_rate": 5.647123173495169e-05,
|
|
"loss": 3.3303,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 4.486848891181021,
|
|
"grad_norm": 1.358296275138855,
|
|
"learning_rate": 5.568983929362124e-05,
|
|
"loss": 3.3273,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 4.564208354822073,
|
|
"grad_norm": 1.4314409494400024,
|
|
"learning_rate": 5.490844685229078e-05,
|
|
"loss": 3.3027,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 4.641567818463125,
|
|
"grad_norm": 1.447662353515625,
|
|
"learning_rate": 5.4127054410960335e-05,
|
|
"loss": 3.2779,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 4.718927282104177,
|
|
"grad_norm": 1.498307466506958,
|
|
"learning_rate": 5.334566196962988e-05,
|
|
"loss": 3.2733,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 4.796286745745229,
|
|
"grad_norm": 1.3249318599700928,
|
|
"learning_rate": 5.256426952829944e-05,
|
|
"loss": 3.2159,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 4.873646209386282,
|
|
"grad_norm": 1.7372560501098633,
|
|
"learning_rate": 5.1782877086968985e-05,
|
|
"loss": 3.2769,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 4.951005673027334,
|
|
"grad_norm": 1.475892186164856,
|
|
"learning_rate": 5.100148464563853e-05,
|
|
"loss": 3.2724,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 5.028365136668386,
|
|
"grad_norm": 1.5225096940994263,
|
|
"learning_rate": 5.0220092204308076e-05,
|
|
"loss": 3.2132,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 5.105724600309438,
|
|
"grad_norm": 1.5637928247451782,
|
|
"learning_rate": 4.943869976297763e-05,
|
|
"loss": 3.2313,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 5.18308406395049,
|
|
"grad_norm": 1.5115944147109985,
|
|
"learning_rate": 4.865730732164718e-05,
|
|
"loss": 3.2163,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 5.260443527591542,
|
|
"grad_norm": 1.4446969032287598,
|
|
"learning_rate": 4.7875914880316726e-05,
|
|
"loss": 3.1997,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 5.337802991232594,
|
|
"grad_norm": 1.4487448930740356,
|
|
"learning_rate": 4.709452243898628e-05,
|
|
"loss": 3.2236,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 5.415162454873646,
|
|
"grad_norm": 1.5380080938339233,
|
|
"learning_rate": 4.6313129997655824e-05,
|
|
"loss": 3.2076,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 5.492521918514698,
|
|
"grad_norm": 1.4626458883285522,
|
|
"learning_rate": 4.5531737556325376e-05,
|
|
"loss": 3.2204,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 5.56988138215575,
|
|
"grad_norm": 1.6070873737335205,
|
|
"learning_rate": 4.475034511499492e-05,
|
|
"loss": 3.182,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 5.647240845796802,
|
|
"grad_norm": 1.5365498065948486,
|
|
"learning_rate": 4.3968952673664474e-05,
|
|
"loss": 3.1846,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 5.724600309437855,
|
|
"grad_norm": 1.6350524425506592,
|
|
"learning_rate": 4.3187560232334026e-05,
|
|
"loss": 3.2233,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 5.801959773078907,
|
|
"grad_norm": 1.5178848505020142,
|
|
"learning_rate": 4.240616779100357e-05,
|
|
"loss": 3.2046,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 5.879319236719959,
|
|
"grad_norm": 1.5043169260025024,
|
|
"learning_rate": 4.162477534967312e-05,
|
|
"loss": 3.16,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 5.956678700361011,
|
|
"grad_norm": 1.371469259262085,
|
|
"learning_rate": 4.084338290834267e-05,
|
|
"loss": 3.2134,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 6.034038164002063,
|
|
"grad_norm": 1.660897970199585,
|
|
"learning_rate": 4.0061990467012215e-05,
|
|
"loss": 3.1417,
|
|
"step": 23400
|
|
},
|
|
{
|
|
"epoch": 6.111397627643115,
|
|
"grad_norm": 1.6934055089950562,
|
|
"learning_rate": 3.928059802568177e-05,
|
|
"loss": 3.1623,
|
|
"step": 23700
|
|
},
|
|
{
|
|
"epoch": 6.188757091284167,
|
|
"grad_norm": 1.6035997867584229,
|
|
"learning_rate": 3.849920558435132e-05,
|
|
"loss": 3.1501,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 6.266116554925219,
|
|
"grad_norm": 1.618349313735962,
|
|
"learning_rate": 3.7717813143020865e-05,
|
|
"loss": 3.1305,
|
|
"step": 24300
|
|
},
|
|
{
|
|
"epoch": 6.343476018566271,
|
|
"grad_norm": 1.519572377204895,
|
|
"learning_rate": 3.693642070169042e-05,
|
|
"loss": 3.1529,
|
|
"step": 24600
|
|
},
|
|
{
|
|
"epoch": 6.420835482207323,
|
|
"grad_norm": 1.5830146074295044,
|
|
"learning_rate": 3.615502826035996e-05,
|
|
"loss": 3.1571,
|
|
"step": 24900
|
|
},
|
|
{
|
|
"epoch": 6.498194945848375,
|
|
"grad_norm": 1.6157386302947998,
|
|
"learning_rate": 3.537363581902951e-05,
|
|
"loss": 3.1564,
|
|
"step": 25200
|
|
},
|
|
{
|
|
"epoch": 6.575554409489428,
|
|
"grad_norm": 1.5344434976577759,
|
|
"learning_rate": 3.459224337769906e-05,
|
|
"loss": 3.1638,
|
|
"step": 25500
|
|
},
|
|
{
|
|
"epoch": 6.65291387313048,
|
|
"grad_norm": 1.6386032104492188,
|
|
"learning_rate": 3.381085093636861e-05,
|
|
"loss": 3.0942,
|
|
"step": 25800
|
|
},
|
|
{
|
|
"epoch": 6.730273336771532,
|
|
"grad_norm": 1.5561423301696777,
|
|
"learning_rate": 3.302945849503816e-05,
|
|
"loss": 3.121,
|
|
"step": 26100
|
|
},
|
|
{
|
|
"epoch": 6.807632800412584,
|
|
"grad_norm": 1.6447923183441162,
|
|
"learning_rate": 3.224806605370771e-05,
|
|
"loss": 3.1108,
|
|
"step": 26400
|
|
},
|
|
{
|
|
"epoch": 6.884992264053636,
|
|
"grad_norm": 1.6027878522872925,
|
|
"learning_rate": 3.1466673612377256e-05,
|
|
"loss": 3.1331,
|
|
"step": 26700
|
|
},
|
|
{
|
|
"epoch": 6.9623517276946885,
|
|
"grad_norm": 1.6786209344863892,
|
|
"learning_rate": 3.068528117104681e-05,
|
|
"loss": 3.1515,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 7.03971119133574,
|
|
"grad_norm": 1.725610613822937,
|
|
"learning_rate": 2.9903888729716357e-05,
|
|
"loss": 3.1025,
|
|
"step": 27300
|
|
},
|
|
{
|
|
"epoch": 7.117070654976792,
|
|
"grad_norm": 1.6194796562194824,
|
|
"learning_rate": 2.9122496288385903e-05,
|
|
"loss": 3.0819,
|
|
"step": 27600
|
|
},
|
|
{
|
|
"epoch": 7.194430118617844,
|
|
"grad_norm": 1.7126758098602295,
|
|
"learning_rate": 2.8341103847055455e-05,
|
|
"loss": 3.1056,
|
|
"step": 27900
|
|
},
|
|
{
|
|
"epoch": 7.271789582258896,
|
|
"grad_norm": 1.610686182975769,
|
|
"learning_rate": 2.7559711405725004e-05,
|
|
"loss": 3.0932,
|
|
"step": 28200
|
|
},
|
|
{
|
|
"epoch": 7.349149045899948,
|
|
"grad_norm": 1.6700507402420044,
|
|
"learning_rate": 2.677831896439455e-05,
|
|
"loss": 3.0938,
|
|
"step": 28500
|
|
},
|
|
{
|
|
"epoch": 7.426508509541001,
|
|
"grad_norm": 1.5000895261764526,
|
|
"learning_rate": 2.59969265230641e-05,
|
|
"loss": 3.0911,
|
|
"step": 28800
|
|
},
|
|
{
|
|
"epoch": 7.503867973182053,
|
|
"grad_norm": 1.6568007469177246,
|
|
"learning_rate": 2.521553408173365e-05,
|
|
"loss": 3.0938,
|
|
"step": 29100
|
|
},
|
|
{
|
|
"epoch": 7.581227436823105,
|
|
"grad_norm": 1.7494336366653442,
|
|
"learning_rate": 2.44341416404032e-05,
|
|
"loss": 3.0711,
|
|
"step": 29400
|
|
},
|
|
{
|
|
"epoch": 7.658586900464157,
|
|
"grad_norm": 1.7158912420272827,
|
|
"learning_rate": 2.3652749199072748e-05,
|
|
"loss": 3.072,
|
|
"step": 29700
|
|
},
|
|
{
|
|
"epoch": 7.735946364105208,
|
|
"grad_norm": 1.7721878290176392,
|
|
"learning_rate": 2.2871356757742297e-05,
|
|
"loss": 3.1069,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"epoch": 7.813305827746261,
|
|
"grad_norm": 1.5379910469055176,
|
|
"learning_rate": 2.2089964316411846e-05,
|
|
"loss": 3.0882,
|
|
"step": 30300
|
|
},
|
|
{
|
|
"epoch": 7.890665291387313,
|
|
"grad_norm": 1.6254152059555054,
|
|
"learning_rate": 2.1308571875081395e-05,
|
|
"loss": 3.0506,
|
|
"step": 30600
|
|
},
|
|
{
|
|
"epoch": 7.968024755028365,
|
|
"grad_norm": 1.6591140031814575,
|
|
"learning_rate": 2.0527179433750944e-05,
|
|
"loss": 3.0912,
|
|
"step": 30900
|
|
},
|
|
{
|
|
"epoch": 8.045384218669417,
|
|
"grad_norm": 1.4908177852630615,
|
|
"learning_rate": 1.9745786992420496e-05,
|
|
"loss": 3.0567,
|
|
"step": 31200
|
|
},
|
|
{
|
|
"epoch": 8.12274368231047,
|
|
"grad_norm": 1.6893351078033447,
|
|
"learning_rate": 1.896439455109004e-05,
|
|
"loss": 3.0538,
|
|
"step": 31500
|
|
},
|
|
{
|
|
"epoch": 8.200103145951521,
|
|
"grad_norm": 1.6335694789886475,
|
|
"learning_rate": 1.818300210975959e-05,
|
|
"loss": 3.0596,
|
|
"step": 31800
|
|
},
|
|
{
|
|
"epoch": 8.277462609592574,
|
|
"grad_norm": 1.814844012260437,
|
|
"learning_rate": 1.7401609668429143e-05,
|
|
"loss": 3.0789,
|
|
"step": 32100
|
|
},
|
|
{
|
|
"epoch": 8.354822073233626,
|
|
"grad_norm": 1.666052222251892,
|
|
"learning_rate": 1.662021722709869e-05,
|
|
"loss": 3.0435,
|
|
"step": 32400
|
|
},
|
|
{
|
|
"epoch": 8.432181536874678,
|
|
"grad_norm": 1.8534607887268066,
|
|
"learning_rate": 1.5838824785768237e-05,
|
|
"loss": 3.0542,
|
|
"step": 32700
|
|
},
|
|
{
|
|
"epoch": 8.50954100051573,
|
|
"grad_norm": 1.8089135885238647,
|
|
"learning_rate": 1.5057432344437788e-05,
|
|
"loss": 3.0435,
|
|
"step": 33000
|
|
},
|
|
{
|
|
"epoch": 8.586900464156782,
|
|
"grad_norm": 1.5717253684997559,
|
|
"learning_rate": 1.4276039903107338e-05,
|
|
"loss": 3.0323,
|
|
"step": 33300
|
|
},
|
|
{
|
|
"epoch": 8.664259927797834,
|
|
"grad_norm": 1.681136131286621,
|
|
"learning_rate": 1.3494647461776889e-05,
|
|
"loss": 3.0528,
|
|
"step": 33600
|
|
},
|
|
{
|
|
"epoch": 8.741619391438887,
|
|
"grad_norm": 1.700218915939331,
|
|
"learning_rate": 1.2713255020446434e-05,
|
|
"loss": 3.0454,
|
|
"step": 33900
|
|
},
|
|
{
|
|
"epoch": 8.818978855079939,
|
|
"grad_norm": 1.8672676086425781,
|
|
"learning_rate": 1.1931862579115985e-05,
|
|
"loss": 3.0757,
|
|
"step": 34200
|
|
},
|
|
{
|
|
"epoch": 8.896338318720991,
|
|
"grad_norm": 1.7094194889068604,
|
|
"learning_rate": 1.1150470137785534e-05,
|
|
"loss": 3.0514,
|
|
"step": 34500
|
|
},
|
|
{
|
|
"epoch": 8.973697782362041,
|
|
"grad_norm": 1.7016539573669434,
|
|
"learning_rate": 1.0369077696455083e-05,
|
|
"loss": 3.022,
|
|
"step": 34800
|
|
},
|
|
{
|
|
"epoch": 9.051057246003094,
|
|
"grad_norm": 1.7859755754470825,
|
|
"learning_rate": 9.587685255124633e-06,
|
|
"loss": 3.0189,
|
|
"step": 35100
|
|
},
|
|
{
|
|
"epoch": 9.128416709644146,
|
|
"grad_norm": 1.6786860227584839,
|
|
"learning_rate": 8.80629281379418e-06,
|
|
"loss": 3.0062,
|
|
"step": 35400
|
|
},
|
|
{
|
|
"epoch": 9.205776173285198,
|
|
"grad_norm": 1.7441751956939697,
|
|
"learning_rate": 8.024900372463731e-06,
|
|
"loss": 3.0036,
|
|
"step": 35700
|
|
},
|
|
{
|
|
"epoch": 9.28313563692625,
|
|
"grad_norm": 1.6931071281433105,
|
|
"learning_rate": 7.243507931133279e-06,
|
|
"loss": 3.0179,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 9.360495100567302,
|
|
"grad_norm": 1.5787148475646973,
|
|
"learning_rate": 6.46211548980283e-06,
|
|
"loss": 3.045,
|
|
"step": 36300
|
|
},
|
|
{
|
|
"epoch": 9.437854564208354,
|
|
"grad_norm": 1.8229496479034424,
|
|
"learning_rate": 5.680723048472378e-06,
|
|
"loss": 3.0025,
|
|
"step": 36600
|
|
},
|
|
{
|
|
"epoch": 9.515214027849407,
|
|
"grad_norm": 1.8122637271881104,
|
|
"learning_rate": 4.899330607141927e-06,
|
|
"loss": 3.0239,
|
|
"step": 36900
|
|
},
|
|
{
|
|
"epoch": 9.592573491490459,
|
|
"grad_norm": 1.5085257291793823,
|
|
"learning_rate": 4.117938165811476e-06,
|
|
"loss": 3.0226,
|
|
"step": 37200
|
|
},
|
|
{
|
|
"epoch": 9.669932955131511,
|
|
"grad_norm": 1.8228789567947388,
|
|
"learning_rate": 3.336545724481025e-06,
|
|
"loss": 3.0286,
|
|
"step": 37500
|
|
},
|
|
{
|
|
"epoch": 9.747292418772563,
|
|
"grad_norm": 1.5136455297470093,
|
|
"learning_rate": 2.5551532831505747e-06,
|
|
"loss": 3.0184,
|
|
"step": 37800
|
|
},
|
|
{
|
|
"epoch": 9.824651882413615,
|
|
"grad_norm": 1.7498648166656494,
|
|
"learning_rate": 1.7737608418201238e-06,
|
|
"loss": 3.002,
|
|
"step": 38100
|
|
},
|
|
{
|
|
"epoch": 9.902011346054667,
|
|
"grad_norm": 1.625130534172058,
|
|
"learning_rate": 9.923684004896727e-07,
|
|
"loss": 3.0423,
|
|
"step": 38400
|
|
},
|
|
{
|
|
"epoch": 9.97937080969572,
|
|
"grad_norm": 1.782974362373352,
|
|
"learning_rate": 2.1097595915922174e-07,
|
|
"loss": 3.023,
|
|
"step": 38700
|
|
}
|
|
],
|
|
"logging_steps": 300,
|
|
"max_steps": 38780,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 10,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.20181986164736e+16,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|