{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2436, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012315270935960591, "grad_norm": 36.7600685768779, "learning_rate": 0.0, "loss": 4.157936096191406, "step": 1 }, { "epoch": 0.0024630541871921183, "grad_norm": 37.664654386111934, "learning_rate": 4.098360655737705e-08, "loss": 3.8494455814361572, "step": 2 }, { "epoch": 0.003694581280788177, "grad_norm": 38.23654519991739, "learning_rate": 8.19672131147541e-08, "loss": 3.7497382164001465, "step": 3 }, { "epoch": 0.0049261083743842365, "grad_norm": 49.1212230676838, "learning_rate": 1.2295081967213116e-07, "loss": 4.874395847320557, "step": 4 }, { "epoch": 0.006157635467980296, "grad_norm": 51.23013396325368, "learning_rate": 1.639344262295082e-07, "loss": 5.729328155517578, "step": 5 }, { "epoch": 0.007389162561576354, "grad_norm": 33.06662236870545, "learning_rate": 2.0491803278688524e-07, "loss": 3.968146324157715, "step": 6 }, { "epoch": 0.008620689655172414, "grad_norm": 33.94224964860029, "learning_rate": 2.459016393442623e-07, "loss": 4.092198848724365, "step": 7 }, { "epoch": 0.009852216748768473, "grad_norm": 28.585037517248036, "learning_rate": 2.8688524590163937e-07, "loss": 3.4101109504699707, "step": 8 }, { "epoch": 0.011083743842364532, "grad_norm": 39.512646004891735, "learning_rate": 3.278688524590164e-07, "loss": 4.387180805206299, "step": 9 }, { "epoch": 0.012315270935960592, "grad_norm": 29.487139965581328, "learning_rate": 3.6885245901639347e-07, "loss": 3.4985814094543457, "step": 10 }, { "epoch": 0.013546798029556651, "grad_norm": 35.1254398727907, "learning_rate": 4.0983606557377047e-07, "loss": 5.157108306884766, "step": 11 }, { "epoch": 0.014778325123152709, "grad_norm": 33.7037580376338, "learning_rate": 4.508196721311476e-07, "loss": 4.057161808013916, "step": 12 }, { "epoch": 0.01600985221674877, "grad_norm": 35.136997816960864, "learning_rate": 4.918032786885246e-07, "loss": 4.237695693969727, "step": 13 }, { "epoch": 0.017241379310344827, "grad_norm": 39.34259468640213, "learning_rate": 5.327868852459017e-07, "loss": 4.635364532470703, "step": 14 }, { "epoch": 0.01847290640394089, "grad_norm": 33.5811322334086, "learning_rate": 5.737704918032787e-07, "loss": 3.3291709423065186, "step": 15 }, { "epoch": 0.019704433497536946, "grad_norm": 33.93459885987163, "learning_rate": 6.147540983606558e-07, "loss": 3.8693442344665527, "step": 16 }, { "epoch": 0.020935960591133004, "grad_norm": 25.605142057165235, "learning_rate": 6.557377049180328e-07, "loss": 3.4419002532958984, "step": 17 }, { "epoch": 0.022167487684729065, "grad_norm": 33.566059151369195, "learning_rate": 6.967213114754098e-07, "loss": 3.8446784019470215, "step": 18 }, { "epoch": 0.023399014778325122, "grad_norm": 29.72848721122937, "learning_rate": 7.377049180327869e-07, "loss": 3.5930001735687256, "step": 19 }, { "epoch": 0.024630541871921183, "grad_norm": 26.393927957123275, "learning_rate": 7.78688524590164e-07, "loss": 3.638699531555176, "step": 20 }, { "epoch": 0.02586206896551724, "grad_norm": 26.06446386508918, "learning_rate": 8.196721311475409e-07, "loss": 3.6789143085479736, "step": 21 }, { "epoch": 0.027093596059113302, "grad_norm": 35.2733178056508, "learning_rate": 8.606557377049181e-07, "loss": 3.959703207015991, "step": 22 }, { "epoch": 0.02832512315270936, "grad_norm": 33.03896583989334, "learning_rate": 9.016393442622952e-07, "loss": 3.8822054862976074, "step": 23 }, { "epoch": 0.029556650246305417, "grad_norm": 33.57337166473473, "learning_rate": 9.426229508196721e-07, "loss": 3.8448376655578613, "step": 24 }, { "epoch": 0.03078817733990148, "grad_norm": 20.141759958099808, "learning_rate": 9.836065573770493e-07, "loss": 3.372765064239502, "step": 25 }, { "epoch": 0.03201970443349754, "grad_norm": 23.420906015149534, "learning_rate": 1.0245901639344263e-06, "loss": 3.4989559650421143, "step": 26 }, { "epoch": 0.0332512315270936, "grad_norm": 33.133583346249836, "learning_rate": 1.0655737704918034e-06, "loss": 3.6318516731262207, "step": 27 }, { "epoch": 0.034482758620689655, "grad_norm": 18.99907077955952, "learning_rate": 1.1065573770491804e-06, "loss": 3.351621627807617, "step": 28 }, { "epoch": 0.03571428571428571, "grad_norm": 18.353082575411992, "learning_rate": 1.1475409836065575e-06, "loss": 3.1978442668914795, "step": 29 }, { "epoch": 0.03694581280788178, "grad_norm": 26.628518248775677, "learning_rate": 1.1885245901639345e-06, "loss": 4.033670902252197, "step": 30 }, { "epoch": 0.038177339901477834, "grad_norm": 16.452853960671934, "learning_rate": 1.2295081967213116e-06, "loss": 3.626315116882324, "step": 31 }, { "epoch": 0.03940886699507389, "grad_norm": 16.372280561150735, "learning_rate": 1.2704918032786886e-06, "loss": 3.385767936706543, "step": 32 }, { "epoch": 0.04064039408866995, "grad_norm": 23.073122100098054, "learning_rate": 1.3114754098360657e-06, "loss": 3.946913719177246, "step": 33 }, { "epoch": 0.04187192118226601, "grad_norm": 11.580002792760054, "learning_rate": 1.352459016393443e-06, "loss": 3.3034565448760986, "step": 34 }, { "epoch": 0.04310344827586207, "grad_norm": 17.961230909917667, "learning_rate": 1.3934426229508196e-06, "loss": 3.2368359565734863, "step": 35 }, { "epoch": 0.04433497536945813, "grad_norm": 11.543206406321579, "learning_rate": 1.4344262295081968e-06, "loss": 3.728569984436035, "step": 36 }, { "epoch": 0.04556650246305419, "grad_norm": 14.762221765187595, "learning_rate": 1.4754098360655739e-06, "loss": 3.3756117820739746, "step": 37 }, { "epoch": 0.046798029556650245, "grad_norm": 13.981113216433073, "learning_rate": 1.516393442622951e-06, "loss": 3.399596691131592, "step": 38 }, { "epoch": 0.0480295566502463, "grad_norm": 24.184372796013783, "learning_rate": 1.557377049180328e-06, "loss": 4.209182262420654, "step": 39 }, { "epoch": 0.04926108374384237, "grad_norm": 11.628888477605962, "learning_rate": 1.5983606557377053e-06, "loss": 2.797691822052002, "step": 40 }, { "epoch": 0.050492610837438424, "grad_norm": 16.948512477650098, "learning_rate": 1.6393442622950819e-06, "loss": 3.630617141723633, "step": 41 }, { "epoch": 0.05172413793103448, "grad_norm": 14.186312302659116, "learning_rate": 1.6803278688524592e-06, "loss": 3.182535171508789, "step": 42 }, { "epoch": 0.05295566502463054, "grad_norm": 13.666441097834594, "learning_rate": 1.7213114754098362e-06, "loss": 3.554767370223999, "step": 43 }, { "epoch": 0.054187192118226604, "grad_norm": 16.91458664100256, "learning_rate": 1.7622950819672133e-06, "loss": 3.675961494445801, "step": 44 }, { "epoch": 0.05541871921182266, "grad_norm": 16.161861225550066, "learning_rate": 1.8032786885245903e-06, "loss": 3.346269369125366, "step": 45 }, { "epoch": 0.05665024630541872, "grad_norm": 14.040742605132769, "learning_rate": 1.8442622950819674e-06, "loss": 3.4892683029174805, "step": 46 }, { "epoch": 0.05788177339901478, "grad_norm": 14.981644166015332, "learning_rate": 1.8852459016393442e-06, "loss": 3.3602352142333984, "step": 47 }, { "epoch": 0.059113300492610835, "grad_norm": 9.346123052417639, "learning_rate": 1.9262295081967215e-06, "loss": 3.301713228225708, "step": 48 }, { "epoch": 0.0603448275862069, "grad_norm": 22.6894652203607, "learning_rate": 1.9672131147540985e-06, "loss": 3.7745046615600586, "step": 49 }, { "epoch": 0.06157635467980296, "grad_norm": 8.465817304604528, "learning_rate": 2.0081967213114756e-06, "loss": 3.0452070236206055, "step": 50 }, { "epoch": 0.06280788177339902, "grad_norm": 20.560185363485036, "learning_rate": 2.0491803278688526e-06, "loss": 3.7955079078674316, "step": 51 }, { "epoch": 0.06403940886699508, "grad_norm": 8.75621229547506, "learning_rate": 2.0901639344262297e-06, "loss": 3.1644039154052734, "step": 52 }, { "epoch": 0.06527093596059114, "grad_norm": 13.679443353464602, "learning_rate": 2.1311475409836067e-06, "loss": 3.2459874153137207, "step": 53 }, { "epoch": 0.0665024630541872, "grad_norm": 12.278683741598382, "learning_rate": 2.1721311475409838e-06, "loss": 3.61742901802063, "step": 54 }, { "epoch": 0.06773399014778325, "grad_norm": 12.717536959646948, "learning_rate": 2.213114754098361e-06, "loss": 3.3136467933654785, "step": 55 }, { "epoch": 0.06896551724137931, "grad_norm": 15.543240982145285, "learning_rate": 2.254098360655738e-06, "loss": 3.272696018218994, "step": 56 }, { "epoch": 0.07019704433497537, "grad_norm": 13.101250342680272, "learning_rate": 2.295081967213115e-06, "loss": 3.041365385055542, "step": 57 }, { "epoch": 0.07142857142857142, "grad_norm": 11.7077150462335, "learning_rate": 2.336065573770492e-06, "loss": 3.309293746948242, "step": 58 }, { "epoch": 0.07266009852216748, "grad_norm": 26.32874973946408, "learning_rate": 2.377049180327869e-06, "loss": 3.4676990509033203, "step": 59 }, { "epoch": 0.07389162561576355, "grad_norm": 16.588748060840203, "learning_rate": 2.418032786885246e-06, "loss": 2.8236446380615234, "step": 60 }, { "epoch": 0.07512315270935961, "grad_norm": 8.18040938852151, "learning_rate": 2.459016393442623e-06, "loss": 2.716705083847046, "step": 61 }, { "epoch": 0.07635467980295567, "grad_norm": 20.07190390154421, "learning_rate": 2.5e-06, "loss": 2.5590922832489014, "step": 62 }, { "epoch": 0.07758620689655173, "grad_norm": 11.418876796774995, "learning_rate": 2.5409836065573773e-06, "loss": 2.6987993717193604, "step": 63 }, { "epoch": 0.07881773399014778, "grad_norm": 13.315536498724418, "learning_rate": 2.5819672131147543e-06, "loss": 4.340274810791016, "step": 64 }, { "epoch": 0.08004926108374384, "grad_norm": 17.075484530853824, "learning_rate": 2.6229508196721314e-06, "loss": 4.166017532348633, "step": 65 }, { "epoch": 0.0812807881773399, "grad_norm": 9.586520693266204, "learning_rate": 2.6639344262295084e-06, "loss": 2.664743185043335, "step": 66 }, { "epoch": 0.08251231527093596, "grad_norm": 11.154276667212649, "learning_rate": 2.704918032786886e-06, "loss": 3.4285409450531006, "step": 67 }, { "epoch": 0.08374384236453201, "grad_norm": 23.203683210215114, "learning_rate": 2.745901639344263e-06, "loss": 2.613044023513794, "step": 68 }, { "epoch": 0.08497536945812807, "grad_norm": 13.748249566024421, "learning_rate": 2.786885245901639e-06, "loss": 3.1923232078552246, "step": 69 }, { "epoch": 0.08620689655172414, "grad_norm": 23.6456335605133, "learning_rate": 2.8278688524590166e-06, "loss": 3.881509780883789, "step": 70 }, { "epoch": 0.0874384236453202, "grad_norm": 12.242314523228817, "learning_rate": 2.8688524590163937e-06, "loss": 3.3872318267822266, "step": 71 }, { "epoch": 0.08866995073891626, "grad_norm": 10.174962303917177, "learning_rate": 2.9098360655737707e-06, "loss": 3.1114461421966553, "step": 72 }, { "epoch": 0.08990147783251232, "grad_norm": 9.979115596445391, "learning_rate": 2.9508196721311478e-06, "loss": 3.182547092437744, "step": 73 }, { "epoch": 0.09113300492610837, "grad_norm": 10.437140873327547, "learning_rate": 2.991803278688525e-06, "loss": 3.488222599029541, "step": 74 }, { "epoch": 0.09236453201970443, "grad_norm": 9.422729886318432, "learning_rate": 3.032786885245902e-06, "loss": 3.0836119651794434, "step": 75 }, { "epoch": 0.09359605911330049, "grad_norm": 9.576987414129725, "learning_rate": 3.073770491803279e-06, "loss": 2.965284824371338, "step": 76 }, { "epoch": 0.09482758620689655, "grad_norm": 9.051063368959207, "learning_rate": 3.114754098360656e-06, "loss": 3.0366950035095215, "step": 77 }, { "epoch": 0.0960591133004926, "grad_norm": 19.769081445901076, "learning_rate": 3.155737704918033e-06, "loss": 3.7336153984069824, "step": 78 }, { "epoch": 0.09729064039408868, "grad_norm": 17.150697728192082, "learning_rate": 3.1967213114754105e-06, "loss": 3.3801069259643555, "step": 79 }, { "epoch": 0.09852216748768473, "grad_norm": 11.029522805215215, "learning_rate": 3.2377049180327876e-06, "loss": 3.1140761375427246, "step": 80 }, { "epoch": 0.09975369458128079, "grad_norm": 9.099280236883942, "learning_rate": 3.2786885245901638e-06, "loss": 3.1199679374694824, "step": 81 }, { "epoch": 0.10098522167487685, "grad_norm": 10.894555994753386, "learning_rate": 3.3196721311475413e-06, "loss": 2.919370651245117, "step": 82 }, { "epoch": 0.1022167487684729, "grad_norm": 10.246835888516838, "learning_rate": 3.3606557377049183e-06, "loss": 3.0058987140655518, "step": 83 }, { "epoch": 0.10344827586206896, "grad_norm": 8.315907792605513, "learning_rate": 3.4016393442622954e-06, "loss": 3.201812744140625, "step": 84 }, { "epoch": 0.10467980295566502, "grad_norm": 10.55746200109404, "learning_rate": 3.4426229508196724e-06, "loss": 2.8387913703918457, "step": 85 }, { "epoch": 0.10591133004926108, "grad_norm": 23.69077930997652, "learning_rate": 3.4836065573770495e-06, "loss": 3.565217971801758, "step": 86 }, { "epoch": 0.10714285714285714, "grad_norm": 17.752023971892026, "learning_rate": 3.5245901639344265e-06, "loss": 3.563566207885742, "step": 87 }, { "epoch": 0.10837438423645321, "grad_norm": 7.328374103560201, "learning_rate": 3.5655737704918036e-06, "loss": 3.3282840251922607, "step": 88 }, { "epoch": 0.10960591133004927, "grad_norm": 9.307632619059875, "learning_rate": 3.6065573770491806e-06, "loss": 2.693999767303467, "step": 89 }, { "epoch": 0.11083743842364532, "grad_norm": 9.537047052971076, "learning_rate": 3.6475409836065577e-06, "loss": 3.0820372104644775, "step": 90 }, { "epoch": 0.11206896551724138, "grad_norm": 11.895652602739977, "learning_rate": 3.6885245901639347e-06, "loss": 2.5853302478790283, "step": 91 }, { "epoch": 0.11330049261083744, "grad_norm": 19.909007675751152, "learning_rate": 3.729508196721312e-06, "loss": 3.622239589691162, "step": 92 }, { "epoch": 0.1145320197044335, "grad_norm": 9.562243449141407, "learning_rate": 3.7704918032786884e-06, "loss": 3.269063949584961, "step": 93 }, { "epoch": 0.11576354679802955, "grad_norm": 10.402493100303827, "learning_rate": 3.811475409836066e-06, "loss": 2.932877540588379, "step": 94 }, { "epoch": 0.11699507389162561, "grad_norm": 7.9937288583052, "learning_rate": 3.852459016393443e-06, "loss": 2.8118062019348145, "step": 95 }, { "epoch": 0.11822660098522167, "grad_norm": 12.161021036700474, "learning_rate": 3.8934426229508196e-06, "loss": 2.977217674255371, "step": 96 }, { "epoch": 0.11945812807881774, "grad_norm": 9.48055025878799, "learning_rate": 3.934426229508197e-06, "loss": 2.534318685531616, "step": 97 }, { "epoch": 0.1206896551724138, "grad_norm": 8.971246829575332, "learning_rate": 3.975409836065574e-06, "loss": 2.888187885284424, "step": 98 }, { "epoch": 0.12192118226600986, "grad_norm": 9.005963079459367, "learning_rate": 4.016393442622951e-06, "loss": 2.6558847427368164, "step": 99 }, { "epoch": 0.12315270935960591, "grad_norm": 9.651575487247985, "learning_rate": 4.057377049180329e-06, "loss": 2.707779884338379, "step": 100 }, { "epoch": 0.12438423645320197, "grad_norm": 8.8113086796363, "learning_rate": 4.098360655737705e-06, "loss": 3.2292768955230713, "step": 101 }, { "epoch": 0.12561576354679804, "grad_norm": 13.438004585842267, "learning_rate": 4.139344262295083e-06, "loss": 2.9476242065429688, "step": 102 }, { "epoch": 0.1268472906403941, "grad_norm": 9.014089316100105, "learning_rate": 4.180327868852459e-06, "loss": 2.9598989486694336, "step": 103 }, { "epoch": 0.12807881773399016, "grad_norm": 8.84790292690003, "learning_rate": 4.221311475409837e-06, "loss": 2.593669891357422, "step": 104 }, { "epoch": 0.12931034482758622, "grad_norm": 9.732549020932908, "learning_rate": 4.2622950819672135e-06, "loss": 2.884164810180664, "step": 105 }, { "epoch": 0.13054187192118227, "grad_norm": 16.843882776588455, "learning_rate": 4.30327868852459e-06, "loss": 3.091454267501831, "step": 106 }, { "epoch": 0.13177339901477833, "grad_norm": 11.588593389024608, "learning_rate": 4.3442622950819676e-06, "loss": 2.913923740386963, "step": 107 }, { "epoch": 0.1330049261083744, "grad_norm": 18.29569166468431, "learning_rate": 4.385245901639344e-06, "loss": 2.779545307159424, "step": 108 }, { "epoch": 0.13423645320197045, "grad_norm": 9.202902461418143, "learning_rate": 4.426229508196722e-06, "loss": 1.8711936473846436, "step": 109 }, { "epoch": 0.1354679802955665, "grad_norm": 13.481452134492262, "learning_rate": 4.467213114754098e-06, "loss": 2.892902374267578, "step": 110 }, { "epoch": 0.13669950738916256, "grad_norm": 12.958399723073786, "learning_rate": 4.508196721311476e-06, "loss": 3.0064496994018555, "step": 111 }, { "epoch": 0.13793103448275862, "grad_norm": 13.016721832572243, "learning_rate": 4.549180327868853e-06, "loss": 2.8515172004699707, "step": 112 }, { "epoch": 0.13916256157635468, "grad_norm": 8.374489861175874, "learning_rate": 4.59016393442623e-06, "loss": 3.2504403591156006, "step": 113 }, { "epoch": 0.14039408866995073, "grad_norm": 7.893218569270328, "learning_rate": 4.631147540983607e-06, "loss": 2.67405366897583, "step": 114 }, { "epoch": 0.1416256157635468, "grad_norm": 10.146133271952388, "learning_rate": 4.672131147540984e-06, "loss": 3.079516887664795, "step": 115 }, { "epoch": 0.14285714285714285, "grad_norm": 19.354096600007853, "learning_rate": 4.7131147540983615e-06, "loss": 2.8897287845611572, "step": 116 }, { "epoch": 0.1440886699507389, "grad_norm": 13.276953948761626, "learning_rate": 4.754098360655738e-06, "loss": 2.7275729179382324, "step": 117 }, { "epoch": 0.14532019704433496, "grad_norm": 9.682874064462416, "learning_rate": 4.795081967213115e-06, "loss": 2.9996538162231445, "step": 118 }, { "epoch": 0.14655172413793102, "grad_norm": 7.397102570298892, "learning_rate": 4.836065573770492e-06, "loss": 3.307245969772339, "step": 119 }, { "epoch": 0.1477832512315271, "grad_norm": 12.665703486872426, "learning_rate": 4.877049180327869e-06, "loss": 3.475133180618286, "step": 120 }, { "epoch": 0.14901477832512317, "grad_norm": 11.317195785901513, "learning_rate": 4.918032786885246e-06, "loss": 3.0947790145874023, "step": 121 }, { "epoch": 0.15024630541871922, "grad_norm": 7.236267930218516, "learning_rate": 4.959016393442623e-06, "loss": 2.9675135612487793, "step": 122 }, { "epoch": 0.15147783251231528, "grad_norm": 8.759893869589918, "learning_rate": 5e-06, "loss": 2.7873148918151855, "step": 123 }, { "epoch": 0.15270935960591134, "grad_norm": 10.395692764487977, "learning_rate": 5.040983606557377e-06, "loss": 3.10044264793396, "step": 124 }, { "epoch": 0.1539408866995074, "grad_norm": 10.40007835832301, "learning_rate": 5.0819672131147545e-06, "loss": 3.755798101425171, "step": 125 }, { "epoch": 0.15517241379310345, "grad_norm": 13.715148535872732, "learning_rate": 5.122950819672131e-06, "loss": 3.0117135047912598, "step": 126 }, { "epoch": 0.1564039408866995, "grad_norm": 12.668410235183005, "learning_rate": 5.163934426229509e-06, "loss": 2.944417953491211, "step": 127 }, { "epoch": 0.15763546798029557, "grad_norm": 14.317219715469237, "learning_rate": 5.204918032786885e-06, "loss": 2.672874927520752, "step": 128 }, { "epoch": 0.15886699507389163, "grad_norm": 16.489459603874575, "learning_rate": 5.245901639344263e-06, "loss": 2.7205734252929688, "step": 129 }, { "epoch": 0.16009852216748768, "grad_norm": 16.41932178225047, "learning_rate": 5.286885245901639e-06, "loss": 2.883897304534912, "step": 130 }, { "epoch": 0.16133004926108374, "grad_norm": 15.043569897203326, "learning_rate": 5.327868852459017e-06, "loss": 2.782104253768921, "step": 131 }, { "epoch": 0.1625615763546798, "grad_norm": 8.98371180872493, "learning_rate": 5.3688524590163935e-06, "loss": 2.6445870399475098, "step": 132 }, { "epoch": 0.16379310344827586, "grad_norm": 11.815392040561601, "learning_rate": 5.409836065573772e-06, "loss": 2.9319727420806885, "step": 133 }, { "epoch": 0.16502463054187191, "grad_norm": 10.152797634103624, "learning_rate": 5.4508196721311476e-06, "loss": 3.169668674468994, "step": 134 }, { "epoch": 0.16625615763546797, "grad_norm": 14.778160076043047, "learning_rate": 5.491803278688526e-06, "loss": 2.8588128089904785, "step": 135 }, { "epoch": 0.16748768472906403, "grad_norm": 10.175583728158522, "learning_rate": 5.5327868852459025e-06, "loss": 2.9894580841064453, "step": 136 }, { "epoch": 0.1687192118226601, "grad_norm": 9.056737222762985, "learning_rate": 5.573770491803278e-06, "loss": 2.5721185207366943, "step": 137 }, { "epoch": 0.16995073891625614, "grad_norm": 13.273464461148466, "learning_rate": 5.614754098360657e-06, "loss": 2.927572727203369, "step": 138 }, { "epoch": 0.17118226600985223, "grad_norm": 6.55893818610158, "learning_rate": 5.655737704918033e-06, "loss": 2.1956796646118164, "step": 139 }, { "epoch": 0.1724137931034483, "grad_norm": 29.225445444647217, "learning_rate": 5.696721311475411e-06, "loss": 2.9739363193511963, "step": 140 }, { "epoch": 0.17364532019704434, "grad_norm": 11.15274917433196, "learning_rate": 5.737704918032787e-06, "loss": 2.9413986206054688, "step": 141 }, { "epoch": 0.1748768472906404, "grad_norm": 10.26279112360335, "learning_rate": 5.778688524590165e-06, "loss": 3.267493724822998, "step": 142 }, { "epoch": 0.17610837438423646, "grad_norm": 10.574770426769376, "learning_rate": 5.8196721311475415e-06, "loss": 3.355569362640381, "step": 143 }, { "epoch": 0.17733990147783252, "grad_norm": 30.57215689151005, "learning_rate": 5.860655737704919e-06, "loss": 1.9742871522903442, "step": 144 }, { "epoch": 0.17857142857142858, "grad_norm": 12.842491765573998, "learning_rate": 5.9016393442622956e-06, "loss": 3.571032762527466, "step": 145 }, { "epoch": 0.17980295566502463, "grad_norm": 12.726974439363154, "learning_rate": 5.942622950819673e-06, "loss": 3.3115599155426025, "step": 146 }, { "epoch": 0.1810344827586207, "grad_norm": 17.55458268041124, "learning_rate": 5.98360655737705e-06, "loss": 2.781893730163574, "step": 147 }, { "epoch": 0.18226600985221675, "grad_norm": 21.115989900825127, "learning_rate": 6.024590163934426e-06, "loss": 3.5053911209106445, "step": 148 }, { "epoch": 0.1834975369458128, "grad_norm": 14.601719954400593, "learning_rate": 6.065573770491804e-06, "loss": 2.797297477722168, "step": 149 }, { "epoch": 0.18472906403940886, "grad_norm": 11.706500964440364, "learning_rate": 6.10655737704918e-06, "loss": 2.995811939239502, "step": 150 }, { "epoch": 0.18596059113300492, "grad_norm": 15.414506649569596, "learning_rate": 6.147540983606558e-06, "loss": 3.028142213821411, "step": 151 }, { "epoch": 0.18719211822660098, "grad_norm": 16.893206406115734, "learning_rate": 6.1885245901639345e-06, "loss": 3.092806816101074, "step": 152 }, { "epoch": 0.18842364532019704, "grad_norm": 15.790657692703299, "learning_rate": 6.229508196721312e-06, "loss": 3.4657726287841797, "step": 153 }, { "epoch": 0.1896551724137931, "grad_norm": 14.336314687505745, "learning_rate": 6.270491803278689e-06, "loss": 2.888990879058838, "step": 154 }, { "epoch": 0.19088669950738915, "grad_norm": 8.384597105554349, "learning_rate": 6.311475409836066e-06, "loss": 2.21640682220459, "step": 155 }, { "epoch": 0.1921182266009852, "grad_norm": 15.11144998304732, "learning_rate": 6.352459016393443e-06, "loss": 3.1153030395507812, "step": 156 }, { "epoch": 0.1933497536945813, "grad_norm": 10.552333909396582, "learning_rate": 6.393442622950821e-06, "loss": 3.5814146995544434, "step": 157 }, { "epoch": 0.19458128078817735, "grad_norm": 16.968338748229492, "learning_rate": 6.434426229508197e-06, "loss": 3.3865175247192383, "step": 158 }, { "epoch": 0.1958128078817734, "grad_norm": 18.57431273466726, "learning_rate": 6.475409836065575e-06, "loss": 3.2125191688537598, "step": 159 }, { "epoch": 0.19704433497536947, "grad_norm": 6.884951933192958, "learning_rate": 6.516393442622952e-06, "loss": 3.137500286102295, "step": 160 }, { "epoch": 0.19827586206896552, "grad_norm": 14.232532156130397, "learning_rate": 6.5573770491803276e-06, "loss": 2.63275408744812, "step": 161 }, { "epoch": 0.19950738916256158, "grad_norm": 8.457248873163048, "learning_rate": 6.598360655737706e-06, "loss": 3.1714844703674316, "step": 162 }, { "epoch": 0.20073891625615764, "grad_norm": 8.202663921028103, "learning_rate": 6.6393442622950825e-06, "loss": 2.2414371967315674, "step": 163 }, { "epoch": 0.2019704433497537, "grad_norm": 21.716160496341246, "learning_rate": 6.68032786885246e-06, "loss": 2.4281110763549805, "step": 164 }, { "epoch": 0.20320197044334976, "grad_norm": 14.06837422573523, "learning_rate": 6.721311475409837e-06, "loss": 2.6953632831573486, "step": 165 }, { "epoch": 0.2044334975369458, "grad_norm": 12.440616463990054, "learning_rate": 6.762295081967214e-06, "loss": 2.7645516395568848, "step": 166 }, { "epoch": 0.20566502463054187, "grad_norm": 9.155924284482328, "learning_rate": 6.803278688524591e-06, "loss": 2.676801919937134, "step": 167 }, { "epoch": 0.20689655172413793, "grad_norm": 18.399209140322007, "learning_rate": 6.844262295081968e-06, "loss": 3.2417163848876953, "step": 168 }, { "epoch": 0.20812807881773399, "grad_norm": 10.633235724872472, "learning_rate": 6.885245901639345e-06, "loss": 3.1967976093292236, "step": 169 }, { "epoch": 0.20935960591133004, "grad_norm": 9.001521768789516, "learning_rate": 6.926229508196722e-06, "loss": 3.4212145805358887, "step": 170 }, { "epoch": 0.2105911330049261, "grad_norm": 19.131341549460146, "learning_rate": 6.967213114754099e-06, "loss": 3.0731911659240723, "step": 171 }, { "epoch": 0.21182266009852216, "grad_norm": 24.78027708091891, "learning_rate": 7.0081967213114756e-06, "loss": 3.8659727573394775, "step": 172 }, { "epoch": 0.21305418719211822, "grad_norm": 7.256951095872975, "learning_rate": 7.049180327868853e-06, "loss": 3.036478042602539, "step": 173 }, { "epoch": 0.21428571428571427, "grad_norm": 13.753177425595323, "learning_rate": 7.09016393442623e-06, "loss": 2.489211082458496, "step": 174 }, { "epoch": 0.21551724137931033, "grad_norm": 15.568690129763258, "learning_rate": 7.131147540983607e-06, "loss": 3.8306775093078613, "step": 175 }, { "epoch": 0.21674876847290642, "grad_norm": 14.053955715138319, "learning_rate": 7.172131147540984e-06, "loss": 3.0287742614746094, "step": 176 }, { "epoch": 0.21798029556650247, "grad_norm": 7.402046078874498, "learning_rate": 7.213114754098361e-06, "loss": 2.767753839492798, "step": 177 }, { "epoch": 0.21921182266009853, "grad_norm": 7.607064770644376, "learning_rate": 7.254098360655738e-06, "loss": 2.8400726318359375, "step": 178 }, { "epoch": 0.2204433497536946, "grad_norm": 9.218463959135196, "learning_rate": 7.295081967213115e-06, "loss": 2.9013113975524902, "step": 179 }, { "epoch": 0.22167487684729065, "grad_norm": 14.207394035741054, "learning_rate": 7.336065573770492e-06, "loss": 3.1111714839935303, "step": 180 }, { "epoch": 0.2229064039408867, "grad_norm": 22.91981906121516, "learning_rate": 7.3770491803278695e-06, "loss": 2.968287229537964, "step": 181 }, { "epoch": 0.22413793103448276, "grad_norm": 25.20920899192849, "learning_rate": 7.418032786885246e-06, "loss": 3.2560596466064453, "step": 182 }, { "epoch": 0.22536945812807882, "grad_norm": 11.263908332317076, "learning_rate": 7.459016393442624e-06, "loss": 2.6196365356445312, "step": 183 }, { "epoch": 0.22660098522167488, "grad_norm": 9.253114778490854, "learning_rate": 7.500000000000001e-06, "loss": 2.48789644241333, "step": 184 }, { "epoch": 0.22783251231527094, "grad_norm": 10.894130133931592, "learning_rate": 7.540983606557377e-06, "loss": 3.492011308670044, "step": 185 }, { "epoch": 0.229064039408867, "grad_norm": 10.265317756792616, "learning_rate": 7.581967213114755e-06, "loss": 2.643688917160034, "step": 186 }, { "epoch": 0.23029556650246305, "grad_norm": 18.91537781193984, "learning_rate": 7.622950819672132e-06, "loss": 3.291731834411621, "step": 187 }, { "epoch": 0.2315270935960591, "grad_norm": 8.094549723224802, "learning_rate": 7.66393442622951e-06, "loss": 2.9554359912872314, "step": 188 }, { "epoch": 0.23275862068965517, "grad_norm": 8.032083532292669, "learning_rate": 7.704918032786886e-06, "loss": 2.634860038757324, "step": 189 }, { "epoch": 0.23399014778325122, "grad_norm": 12.421064936443088, "learning_rate": 7.745901639344263e-06, "loss": 3.505284309387207, "step": 190 }, { "epoch": 0.23522167487684728, "grad_norm": 9.73160074977933, "learning_rate": 7.786885245901639e-06, "loss": 2.8865461349487305, "step": 191 }, { "epoch": 0.23645320197044334, "grad_norm": 9.154882618515046, "learning_rate": 7.827868852459017e-06, "loss": 2.804072618484497, "step": 192 }, { "epoch": 0.2376847290640394, "grad_norm": 19.13061642741136, "learning_rate": 7.868852459016394e-06, "loss": 2.830981969833374, "step": 193 }, { "epoch": 0.23891625615763548, "grad_norm": 15.563283146640595, "learning_rate": 7.909836065573772e-06, "loss": 2.2295336723327637, "step": 194 }, { "epoch": 0.24014778325123154, "grad_norm": 12.225259694302743, "learning_rate": 7.950819672131147e-06, "loss": 2.338548183441162, "step": 195 }, { "epoch": 0.2413793103448276, "grad_norm": 9.892040827483035, "learning_rate": 7.991803278688526e-06, "loss": 3.0856008529663086, "step": 196 }, { "epoch": 0.24261083743842365, "grad_norm": 7.694617498251832, "learning_rate": 8.032786885245902e-06, "loss": 2.8032941818237305, "step": 197 }, { "epoch": 0.2438423645320197, "grad_norm": 14.517107480578428, "learning_rate": 8.073770491803279e-06, "loss": 2.793623924255371, "step": 198 }, { "epoch": 0.24507389162561577, "grad_norm": 14.257539519236145, "learning_rate": 8.114754098360657e-06, "loss": 3.316802740097046, "step": 199 }, { "epoch": 0.24630541871921183, "grad_norm": 9.345732169704513, "learning_rate": 8.155737704918034e-06, "loss": 2.7230677604675293, "step": 200 }, { "epoch": 0.24753694581280788, "grad_norm": 15.629904452590212, "learning_rate": 8.19672131147541e-06, "loss": 3.3343541622161865, "step": 201 }, { "epoch": 0.24876847290640394, "grad_norm": 15.523761255621764, "learning_rate": 8.237704918032787e-06, "loss": 2.6796741485595703, "step": 202 }, { "epoch": 0.25, "grad_norm": 19.56220339462512, "learning_rate": 8.278688524590165e-06, "loss": 3.5974526405334473, "step": 203 }, { "epoch": 0.2512315270935961, "grad_norm": 13.897070581153926, "learning_rate": 8.319672131147542e-06, "loss": 2.2697930335998535, "step": 204 }, { "epoch": 0.2524630541871921, "grad_norm": 58.73834156491825, "learning_rate": 8.360655737704919e-06, "loss": 3.692251682281494, "step": 205 }, { "epoch": 0.2536945812807882, "grad_norm": 7.38409958845656, "learning_rate": 8.401639344262295e-06, "loss": 1.9303261041641235, "step": 206 }, { "epoch": 0.25492610837438423, "grad_norm": 9.965151267955871, "learning_rate": 8.442622950819674e-06, "loss": 2.538956880569458, "step": 207 }, { "epoch": 0.2561576354679803, "grad_norm": 9.12744959101674, "learning_rate": 8.48360655737705e-06, "loss": 2.777608633041382, "step": 208 }, { "epoch": 0.25738916256157635, "grad_norm": 7.651759491423955, "learning_rate": 8.524590163934427e-06, "loss": 2.5776896476745605, "step": 209 }, { "epoch": 0.25862068965517243, "grad_norm": 7.384463920815584, "learning_rate": 8.565573770491804e-06, "loss": 2.9199795722961426, "step": 210 }, { "epoch": 0.25985221674876846, "grad_norm": 20.103355409171535, "learning_rate": 8.60655737704918e-06, "loss": 3.515129566192627, "step": 211 }, { "epoch": 0.26108374384236455, "grad_norm": 11.426838299111452, "learning_rate": 8.647540983606559e-06, "loss": 2.5549678802490234, "step": 212 }, { "epoch": 0.2623152709359606, "grad_norm": 9.257633699344172, "learning_rate": 8.688524590163935e-06, "loss": 2.769425630569458, "step": 213 }, { "epoch": 0.26354679802955666, "grad_norm": 10.532098802898833, "learning_rate": 8.729508196721312e-06, "loss": 3.369231700897217, "step": 214 }, { "epoch": 0.2647783251231527, "grad_norm": 9.351621764685488, "learning_rate": 8.770491803278688e-06, "loss": 2.942309856414795, "step": 215 }, { "epoch": 0.2660098522167488, "grad_norm": 13.925057065300786, "learning_rate": 8.811475409836067e-06, "loss": 2.7516608238220215, "step": 216 }, { "epoch": 0.2672413793103448, "grad_norm": 36.50661601809998, "learning_rate": 8.852459016393443e-06, "loss": 2.8445613384246826, "step": 217 }, { "epoch": 0.2684729064039409, "grad_norm": 22.25960453914331, "learning_rate": 8.893442622950822e-06, "loss": 2.987518787384033, "step": 218 }, { "epoch": 0.2697044334975369, "grad_norm": 16.564591915051718, "learning_rate": 8.934426229508197e-06, "loss": 3.2499587535858154, "step": 219 }, { "epoch": 0.270935960591133, "grad_norm": 17.28227853231096, "learning_rate": 8.975409836065575e-06, "loss": 2.926447868347168, "step": 220 }, { "epoch": 0.27216748768472904, "grad_norm": 11.211927116407436, "learning_rate": 9.016393442622952e-06, "loss": 2.8910017013549805, "step": 221 }, { "epoch": 0.2733990147783251, "grad_norm": 8.72596083956733, "learning_rate": 9.057377049180328e-06, "loss": 3.0613536834716797, "step": 222 }, { "epoch": 0.2746305418719212, "grad_norm": 9.803135692376356, "learning_rate": 9.098360655737707e-06, "loss": 2.829414129257202, "step": 223 }, { "epoch": 0.27586206896551724, "grad_norm": 12.92734853493422, "learning_rate": 9.139344262295083e-06, "loss": 2.7085399627685547, "step": 224 }, { "epoch": 0.2770935960591133, "grad_norm": 9.4118708856159, "learning_rate": 9.18032786885246e-06, "loss": 2.6637799739837646, "step": 225 }, { "epoch": 0.27832512315270935, "grad_norm": 18.83957093140758, "learning_rate": 9.221311475409836e-06, "loss": 2.845503807067871, "step": 226 }, { "epoch": 0.27955665024630544, "grad_norm": 13.475569415500434, "learning_rate": 9.262295081967215e-06, "loss": 2.954394817352295, "step": 227 }, { "epoch": 0.28078817733990147, "grad_norm": 8.290170639522628, "learning_rate": 9.303278688524591e-06, "loss": 2.640540838241577, "step": 228 }, { "epoch": 0.28201970443349755, "grad_norm": 11.224559700746246, "learning_rate": 9.344262295081968e-06, "loss": 2.806300163269043, "step": 229 }, { "epoch": 0.2832512315270936, "grad_norm": 7.885675569548075, "learning_rate": 9.385245901639345e-06, "loss": 2.6030101776123047, "step": 230 }, { "epoch": 0.28448275862068967, "grad_norm": 24.236973973758758, "learning_rate": 9.426229508196723e-06, "loss": 2.7991466522216797, "step": 231 }, { "epoch": 0.2857142857142857, "grad_norm": 8.845347044883379, "learning_rate": 9.4672131147541e-06, "loss": 3.106261968612671, "step": 232 }, { "epoch": 0.2869458128078818, "grad_norm": 51.821805980416265, "learning_rate": 9.508196721311476e-06, "loss": 3.2630815505981445, "step": 233 }, { "epoch": 0.2881773399014778, "grad_norm": 16.78742746550897, "learning_rate": 9.549180327868853e-06, "loss": 3.1156482696533203, "step": 234 }, { "epoch": 0.2894088669950739, "grad_norm": 13.713777073631656, "learning_rate": 9.59016393442623e-06, "loss": 3.1271071434020996, "step": 235 }, { "epoch": 0.29064039408866993, "grad_norm": 13.698738323083157, "learning_rate": 9.631147540983608e-06, "loss": 2.536348342895508, "step": 236 }, { "epoch": 0.291871921182266, "grad_norm": 15.926322663194057, "learning_rate": 9.672131147540984e-06, "loss": 2.8055825233459473, "step": 237 }, { "epoch": 0.29310344827586204, "grad_norm": 10.519363729962654, "learning_rate": 9.713114754098361e-06, "loss": 2.9949395656585693, "step": 238 }, { "epoch": 0.29433497536945813, "grad_norm": 12.579584872972768, "learning_rate": 9.754098360655738e-06, "loss": 2.125136137008667, "step": 239 }, { "epoch": 0.2955665024630542, "grad_norm": 11.391036061101172, "learning_rate": 9.795081967213116e-06, "loss": 2.830984592437744, "step": 240 }, { "epoch": 0.29679802955665024, "grad_norm": 14.46789942529014, "learning_rate": 9.836065573770493e-06, "loss": 3.2255706787109375, "step": 241 }, { "epoch": 0.29802955665024633, "grad_norm": 8.899469108078774, "learning_rate": 9.87704918032787e-06, "loss": 2.686436653137207, "step": 242 }, { "epoch": 0.29926108374384236, "grad_norm": 10.094433891654246, "learning_rate": 9.918032786885246e-06, "loss": 2.497978687286377, "step": 243 }, { "epoch": 0.30049261083743845, "grad_norm": 8.691385167763809, "learning_rate": 9.959016393442624e-06, "loss": 3.308448076248169, "step": 244 }, { "epoch": 0.3017241379310345, "grad_norm": 15.757524580227669, "learning_rate": 1e-05, "loss": 3.2378220558166504, "step": 245 }, { "epoch": 0.30295566502463056, "grad_norm": 8.671108255060687, "learning_rate": 9.999994864785605e-06, "loss": 2.4129133224487305, "step": 246 }, { "epoch": 0.3041871921182266, "grad_norm": 13.501190126023713, "learning_rate": 9.99997945915297e-06, "loss": 2.938180923461914, "step": 247 }, { "epoch": 0.3054187192118227, "grad_norm": 11.217667256673044, "learning_rate": 9.999953783133733e-06, "loss": 2.5165305137634277, "step": 248 }, { "epoch": 0.3066502463054187, "grad_norm": 7.520771962392289, "learning_rate": 9.999917836780642e-06, "loss": 3.425577163696289, "step": 249 }, { "epoch": 0.3078817733990148, "grad_norm": 13.889092280188136, "learning_rate": 9.999871620167532e-06, "loss": 2.876093626022339, "step": 250 }, { "epoch": 0.3091133004926108, "grad_norm": 7.799661481860974, "learning_rate": 9.999815133389334e-06, "loss": 2.9071428775787354, "step": 251 }, { "epoch": 0.3103448275862069, "grad_norm": 18.185225557276123, "learning_rate": 9.999748376562078e-06, "loss": 2.998086452484131, "step": 252 }, { "epoch": 0.31157635467980294, "grad_norm": 27.086825836566575, "learning_rate": 9.999671349822887e-06, "loss": 2.1193456649780273, "step": 253 }, { "epoch": 0.312807881773399, "grad_norm": 13.320934166458603, "learning_rate": 9.999584053329983e-06, "loss": 2.753380298614502, "step": 254 }, { "epoch": 0.31403940886699505, "grad_norm": 14.498031739385082, "learning_rate": 9.999486487262677e-06, "loss": 2.876704216003418, "step": 255 }, { "epoch": 0.31527093596059114, "grad_norm": 13.532410059083729, "learning_rate": 9.999378651821381e-06, "loss": 3.0882208347320557, "step": 256 }, { "epoch": 0.31650246305418717, "grad_norm": 13.700484400761207, "learning_rate": 9.999260547227599e-06, "loss": 3.155285120010376, "step": 257 }, { "epoch": 0.31773399014778325, "grad_norm": 12.6000984521867, "learning_rate": 9.999132173723923e-06, "loss": 2.7646055221557617, "step": 258 }, { "epoch": 0.31896551724137934, "grad_norm": 15.115470197004113, "learning_rate": 9.998993531574048e-06, "loss": 2.7237563133239746, "step": 259 }, { "epoch": 0.32019704433497537, "grad_norm": 20.594748113733633, "learning_rate": 9.998844621062755e-06, "loss": 3.3845739364624023, "step": 260 }, { "epoch": 0.32142857142857145, "grad_norm": 10.767576295669059, "learning_rate": 9.998685442495921e-06, "loss": 3.8065264225006104, "step": 261 }, { "epoch": 0.3226600985221675, "grad_norm": 20.754860824013544, "learning_rate": 9.998515996200508e-06, "loss": 2.8899989128112793, "step": 262 }, { "epoch": 0.32389162561576357, "grad_norm": 15.819137797930164, "learning_rate": 9.998336282524579e-06, "loss": 3.253079414367676, "step": 263 }, { "epoch": 0.3251231527093596, "grad_norm": 18.790797790728803, "learning_rate": 9.998146301837274e-06, "loss": 3.346510648727417, "step": 264 }, { "epoch": 0.3263546798029557, "grad_norm": 23.146345527241454, "learning_rate": 9.997946054528837e-06, "loss": 3.4698657989501953, "step": 265 }, { "epoch": 0.3275862068965517, "grad_norm": 14.512612088330997, "learning_rate": 9.99773554101059e-06, "loss": 3.174567699432373, "step": 266 }, { "epoch": 0.3288177339901478, "grad_norm": 12.860516080892424, "learning_rate": 9.997514761714946e-06, "loss": 2.5275719165802, "step": 267 }, { "epoch": 0.33004926108374383, "grad_norm": 9.43003857415246, "learning_rate": 9.997283717095403e-06, "loss": 2.9102673530578613, "step": 268 }, { "epoch": 0.3312807881773399, "grad_norm": 11.178249951549107, "learning_rate": 9.99704240762655e-06, "loss": 2.865558624267578, "step": 269 }, { "epoch": 0.33251231527093594, "grad_norm": 24.802063921828417, "learning_rate": 9.996790833804053e-06, "loss": 2.749305248260498, "step": 270 }, { "epoch": 0.33374384236453203, "grad_norm": 24.70724769915988, "learning_rate": 9.996528996144668e-06, "loss": 2.0590691566467285, "step": 271 }, { "epoch": 0.33497536945812806, "grad_norm": 14.115920333851845, "learning_rate": 9.996256895186234e-06, "loss": 3.0421628952026367, "step": 272 }, { "epoch": 0.33620689655172414, "grad_norm": 12.058059347872495, "learning_rate": 9.995974531487668e-06, "loss": 2.8302841186523438, "step": 273 }, { "epoch": 0.3374384236453202, "grad_norm": 12.632643288786921, "learning_rate": 9.995681905628968e-06, "loss": 2.7192673683166504, "step": 274 }, { "epoch": 0.33866995073891626, "grad_norm": 15.484122360072316, "learning_rate": 9.995379018211215e-06, "loss": 2.3330166339874268, "step": 275 }, { "epoch": 0.3399014778325123, "grad_norm": 13.2967377526589, "learning_rate": 9.995065869856566e-06, "loss": 2.5359480381011963, "step": 276 }, { "epoch": 0.3411330049261084, "grad_norm": 15.221286627267526, "learning_rate": 9.994742461208251e-06, "loss": 3.049252986907959, "step": 277 }, { "epoch": 0.34236453201970446, "grad_norm": 15.24270242699156, "learning_rate": 9.994408792930584e-06, "loss": 3.3440940380096436, "step": 278 }, { "epoch": 0.3435960591133005, "grad_norm": 14.053973379642196, "learning_rate": 9.994064865708944e-06, "loss": 3.038376808166504, "step": 279 }, { "epoch": 0.3448275862068966, "grad_norm": 22.631635572415856, "learning_rate": 9.993710680249788e-06, "loss": 3.6074423789978027, "step": 280 }, { "epoch": 0.3460591133004926, "grad_norm": 20.559687915989883, "learning_rate": 9.993346237280646e-06, "loss": 2.686741352081299, "step": 281 }, { "epoch": 0.3472906403940887, "grad_norm": 12.521946549290966, "learning_rate": 9.992971537550112e-06, "loss": 2.4198198318481445, "step": 282 }, { "epoch": 0.3485221674876847, "grad_norm": 6.138840145200369, "learning_rate": 9.992586581827853e-06, "loss": 2.8091788291931152, "step": 283 }, { "epoch": 0.3497536945812808, "grad_norm": 9.177811201919399, "learning_rate": 9.992191370904599e-06, "loss": 3.0199592113494873, "step": 284 }, { "epoch": 0.35098522167487683, "grad_norm": 11.072879739046153, "learning_rate": 9.991785905592149e-06, "loss": 2.6372945308685303, "step": 285 }, { "epoch": 0.3522167487684729, "grad_norm": 12.835701532770578, "learning_rate": 9.991370186723363e-06, "loss": 2.9127607345581055, "step": 286 }, { "epoch": 0.35344827586206895, "grad_norm": 16.621843867679726, "learning_rate": 9.990944215152166e-06, "loss": 2.464376926422119, "step": 287 }, { "epoch": 0.35467980295566504, "grad_norm": 9.777456171349527, "learning_rate": 9.990507991753535e-06, "loss": 2.8306374549865723, "step": 288 }, { "epoch": 0.35591133004926107, "grad_norm": 11.701262899932036, "learning_rate": 9.990061517423513e-06, "loss": 2.9181313514709473, "step": 289 }, { "epoch": 0.35714285714285715, "grad_norm": 12.914380903938605, "learning_rate": 9.989604793079198e-06, "loss": 3.1937739849090576, "step": 290 }, { "epoch": 0.3583743842364532, "grad_norm": 25.41280169964493, "learning_rate": 9.989137819658738e-06, "loss": 4.190927028656006, "step": 291 }, { "epoch": 0.35960591133004927, "grad_norm": 12.268585179317036, "learning_rate": 9.988660598121337e-06, "loss": 2.8343558311462402, "step": 292 }, { "epoch": 0.3608374384236453, "grad_norm": 14.508602864953724, "learning_rate": 9.988173129447251e-06, "loss": 3.741821050643921, "step": 293 }, { "epoch": 0.3620689655172414, "grad_norm": 8.935077328629724, "learning_rate": 9.98767541463778e-06, "loss": 2.484419345855713, "step": 294 }, { "epoch": 0.3633004926108374, "grad_norm": 8.195009351092525, "learning_rate": 9.987167454715277e-06, "loss": 2.671337127685547, "step": 295 }, { "epoch": 0.3645320197044335, "grad_norm": 11.197259917333458, "learning_rate": 9.986649250723129e-06, "loss": 3.118803024291992, "step": 296 }, { "epoch": 0.3657635467980296, "grad_norm": 15.270785643435941, "learning_rate": 9.986120803725776e-06, "loss": 3.10141658782959, "step": 297 }, { "epoch": 0.3669950738916256, "grad_norm": 11.19651727126236, "learning_rate": 9.985582114808693e-06, "loss": 2.7978734970092773, "step": 298 }, { "epoch": 0.3682266009852217, "grad_norm": 14.058148431334251, "learning_rate": 9.985033185078392e-06, "loss": 2.5770411491394043, "step": 299 }, { "epoch": 0.3694581280788177, "grad_norm": 9.544840021071943, "learning_rate": 9.984474015662421e-06, "loss": 3.0273873805999756, "step": 300 }, { "epoch": 0.3706896551724138, "grad_norm": 8.198220678999139, "learning_rate": 9.983904607709365e-06, "loss": 2.9202780723571777, "step": 301 }, { "epoch": 0.37192118226600984, "grad_norm": 12.107800006970532, "learning_rate": 9.983324962388835e-06, "loss": 2.9816439151763916, "step": 302 }, { "epoch": 0.3731527093596059, "grad_norm": 7.601271321831279, "learning_rate": 9.982735080891471e-06, "loss": 2.5605852603912354, "step": 303 }, { "epoch": 0.37438423645320196, "grad_norm": 13.035543237033318, "learning_rate": 9.982134964428942e-06, "loss": 2.9378490447998047, "step": 304 }, { "epoch": 0.37561576354679804, "grad_norm": 7.731680542963359, "learning_rate": 9.981524614233938e-06, "loss": 2.410521984100342, "step": 305 }, { "epoch": 0.3768472906403941, "grad_norm": 13.52353943681927, "learning_rate": 9.98090403156017e-06, "loss": 2.381927013397217, "step": 306 }, { "epoch": 0.37807881773399016, "grad_norm": 17.35628297309107, "learning_rate": 9.98027321768237e-06, "loss": 3.1156816482543945, "step": 307 }, { "epoch": 0.3793103448275862, "grad_norm": 8.977028820084396, "learning_rate": 9.97963217389628e-06, "loss": 3.2660152912139893, "step": 308 }, { "epoch": 0.3805418719211823, "grad_norm": 14.66965301106164, "learning_rate": 9.978980901518663e-06, "loss": 3.1832613945007324, "step": 309 }, { "epoch": 0.3817733990147783, "grad_norm": 27.78972817701185, "learning_rate": 9.978319401887287e-06, "loss": 2.719600200653076, "step": 310 }, { "epoch": 0.3830049261083744, "grad_norm": 10.666579101176065, "learning_rate": 9.977647676360927e-06, "loss": 2.652092456817627, "step": 311 }, { "epoch": 0.3842364532019704, "grad_norm": 8.005520537074315, "learning_rate": 9.976965726319369e-06, "loss": 2.5932788848876953, "step": 312 }, { "epoch": 0.3854679802955665, "grad_norm": 15.690472287679249, "learning_rate": 9.976273553163393e-06, "loss": 2.558863401412964, "step": 313 }, { "epoch": 0.3866995073891626, "grad_norm": 11.958180437694066, "learning_rate": 9.975571158314783e-06, "loss": 3.1973023414611816, "step": 314 }, { "epoch": 0.3879310344827586, "grad_norm": 12.749275597057334, "learning_rate": 9.974858543216319e-06, "loss": 3.286236524581909, "step": 315 }, { "epoch": 0.3891625615763547, "grad_norm": 16.985399241319477, "learning_rate": 9.974135709331774e-06, "loss": 3.5159969329833984, "step": 316 }, { "epoch": 0.39039408866995073, "grad_norm": 10.457440991240187, "learning_rate": 9.973402658145908e-06, "loss": 2.647761821746826, "step": 317 }, { "epoch": 0.3916256157635468, "grad_norm": 9.450705495020088, "learning_rate": 9.972659391164473e-06, "loss": 2.8499808311462402, "step": 318 }, { "epoch": 0.39285714285714285, "grad_norm": 10.546244474419336, "learning_rate": 9.971905909914206e-06, "loss": 2.332852840423584, "step": 319 }, { "epoch": 0.39408866995073893, "grad_norm": 10.2366500934473, "learning_rate": 9.971142215942817e-06, "loss": 2.627098560333252, "step": 320 }, { "epoch": 0.39532019704433496, "grad_norm": 6.472838949640434, "learning_rate": 9.970368310819e-06, "loss": 2.302323341369629, "step": 321 }, { "epoch": 0.39655172413793105, "grad_norm": 6.421471401290025, "learning_rate": 9.969584196132427e-06, "loss": 2.6783509254455566, "step": 322 }, { "epoch": 0.3977832512315271, "grad_norm": 12.353934861805914, "learning_rate": 9.96878987349373e-06, "loss": 2.9487061500549316, "step": 323 }, { "epoch": 0.39901477832512317, "grad_norm": 13.993445702154649, "learning_rate": 9.967985344534521e-06, "loss": 2.5883233547210693, "step": 324 }, { "epoch": 0.4002463054187192, "grad_norm": 20.380213804590188, "learning_rate": 9.96717061090737e-06, "loss": 3.125821590423584, "step": 325 }, { "epoch": 0.4014778325123153, "grad_norm": 6.812077926758059, "learning_rate": 9.966345674285808e-06, "loss": 2.829881191253662, "step": 326 }, { "epoch": 0.4027093596059113, "grad_norm": 16.808551579421827, "learning_rate": 9.965510536364329e-06, "loss": 2.5988128185272217, "step": 327 }, { "epoch": 0.4039408866995074, "grad_norm": 7.777965739175337, "learning_rate": 9.964665198858375e-06, "loss": 2.158940315246582, "step": 328 }, { "epoch": 0.4051724137931034, "grad_norm": 10.632017505369658, "learning_rate": 9.96380966350434e-06, "loss": 2.716994285583496, "step": 329 }, { "epoch": 0.4064039408866995, "grad_norm": 12.778378390552197, "learning_rate": 9.962943932059573e-06, "loss": 3.1283516883850098, "step": 330 }, { "epoch": 0.40763546798029554, "grad_norm": 12.686658918372668, "learning_rate": 9.962068006302357e-06, "loss": 3.0957908630371094, "step": 331 }, { "epoch": 0.4088669950738916, "grad_norm": 24.890731349370103, "learning_rate": 9.961181888031917e-06, "loss": 2.3027350902557373, "step": 332 }, { "epoch": 0.4100985221674877, "grad_norm": 10.45514873243925, "learning_rate": 9.960285579068419e-06, "loss": 2.956791877746582, "step": 333 }, { "epoch": 0.41133004926108374, "grad_norm": 28.23036034704062, "learning_rate": 9.959379081252958e-06, "loss": 2.5689826011657715, "step": 334 }, { "epoch": 0.4125615763546798, "grad_norm": 8.031700376672275, "learning_rate": 9.958462396447556e-06, "loss": 3.1086199283599854, "step": 335 }, { "epoch": 0.41379310344827586, "grad_norm": 15.790958589129726, "learning_rate": 9.957535526535165e-06, "loss": 3.134901285171509, "step": 336 }, { "epoch": 0.41502463054187194, "grad_norm": 12.433447054233632, "learning_rate": 9.956598473419652e-06, "loss": 2.642225742340088, "step": 337 }, { "epoch": 0.41625615763546797, "grad_norm": 9.36121478561991, "learning_rate": 9.95565123902581e-06, "loss": 2.828200340270996, "step": 338 }, { "epoch": 0.41748768472906406, "grad_norm": 14.194698913635616, "learning_rate": 9.954693825299333e-06, "loss": 2.751354217529297, "step": 339 }, { "epoch": 0.4187192118226601, "grad_norm": 13.475276856352862, "learning_rate": 9.953726234206835e-06, "loss": 2.818434715270996, "step": 340 }, { "epoch": 0.41995073891625617, "grad_norm": 14.017642174434487, "learning_rate": 9.95274846773583e-06, "loss": 2.8631365299224854, "step": 341 }, { "epoch": 0.4211822660098522, "grad_norm": 37.92442284518435, "learning_rate": 9.951760527894733e-06, "loss": 2.387998580932617, "step": 342 }, { "epoch": 0.4224137931034483, "grad_norm": 8.636388354492292, "learning_rate": 9.950762416712862e-06, "loss": 2.366614580154419, "step": 343 }, { "epoch": 0.4236453201970443, "grad_norm": 10.06521281831273, "learning_rate": 9.949754136240416e-06, "loss": 2.4502060413360596, "step": 344 }, { "epoch": 0.4248768472906404, "grad_norm": 12.481723752818217, "learning_rate": 9.948735688548496e-06, "loss": 2.47091007232666, "step": 345 }, { "epoch": 0.42610837438423643, "grad_norm": 8.973793469902368, "learning_rate": 9.947707075729076e-06, "loss": 3.0400021076202393, "step": 346 }, { "epoch": 0.4273399014778325, "grad_norm": 10.331950331735893, "learning_rate": 9.946668299895017e-06, "loss": 2.622288227081299, "step": 347 }, { "epoch": 0.42857142857142855, "grad_norm": 22.195871941281137, "learning_rate": 9.945619363180054e-06, "loss": 3.3773419857025146, "step": 348 }, { "epoch": 0.42980295566502463, "grad_norm": 19.575310687428036, "learning_rate": 9.944560267738792e-06, "loss": 3.279005527496338, "step": 349 }, { "epoch": 0.43103448275862066, "grad_norm": 11.204766296525598, "learning_rate": 9.943491015746704e-06, "loss": 2.8206255435943604, "step": 350 }, { "epoch": 0.43226600985221675, "grad_norm": 19.31443626404287, "learning_rate": 9.942411609400127e-06, "loss": 3.312700033187866, "step": 351 }, { "epoch": 0.43349753694581283, "grad_norm": 12.40959825169754, "learning_rate": 9.941322050916251e-06, "loss": 2.580315113067627, "step": 352 }, { "epoch": 0.43472906403940886, "grad_norm": 18.26867922192619, "learning_rate": 9.940222342533126e-06, "loss": 2.8339614868164062, "step": 353 }, { "epoch": 0.43596059113300495, "grad_norm": 15.240586085653998, "learning_rate": 9.939112486509644e-06, "loss": 2.582752227783203, "step": 354 }, { "epoch": 0.437192118226601, "grad_norm": 14.054810279727889, "learning_rate": 9.937992485125547e-06, "loss": 2.9355309009552, "step": 355 }, { "epoch": 0.43842364532019706, "grad_norm": 7.204056413186231, "learning_rate": 9.936862340681412e-06, "loss": 2.796612024307251, "step": 356 }, { "epoch": 0.4396551724137931, "grad_norm": 5.797127744814052, "learning_rate": 9.935722055498655e-06, "loss": 2.6307716369628906, "step": 357 }, { "epoch": 0.4408866995073892, "grad_norm": 8.742348132173227, "learning_rate": 9.934571631919518e-06, "loss": 2.8603620529174805, "step": 358 }, { "epoch": 0.4421182266009852, "grad_norm": 12.186262361276388, "learning_rate": 9.933411072307071e-06, "loss": 3.1397266387939453, "step": 359 }, { "epoch": 0.4433497536945813, "grad_norm": 8.973047578523662, "learning_rate": 9.9322403790452e-06, "loss": 2.5362772941589355, "step": 360 }, { "epoch": 0.4445812807881773, "grad_norm": 17.982816499460725, "learning_rate": 9.931059554538613e-06, "loss": 2.7547712326049805, "step": 361 }, { "epoch": 0.4458128078817734, "grad_norm": 15.389405107024809, "learning_rate": 9.929868601212822e-06, "loss": 3.144801139831543, "step": 362 }, { "epoch": 0.44704433497536944, "grad_norm": 16.343273720769005, "learning_rate": 9.928667521514149e-06, "loss": 2.600550889968872, "step": 363 }, { "epoch": 0.4482758620689655, "grad_norm": 11.532249256759682, "learning_rate": 9.927456317909711e-06, "loss": 2.176116704940796, "step": 364 }, { "epoch": 0.44950738916256155, "grad_norm": 25.088404612293182, "learning_rate": 9.92623499288743e-06, "loss": 3.1918365955352783, "step": 365 }, { "epoch": 0.45073891625615764, "grad_norm": 12.864077493891681, "learning_rate": 9.92500354895601e-06, "loss": 2.6937577724456787, "step": 366 }, { "epoch": 0.45197044334975367, "grad_norm": 29.27990733585633, "learning_rate": 9.92376198864494e-06, "loss": 3.6490774154663086, "step": 367 }, { "epoch": 0.45320197044334976, "grad_norm": 7.620954232577737, "learning_rate": 9.922510314504493e-06, "loss": 3.0342392921447754, "step": 368 }, { "epoch": 0.4544334975369458, "grad_norm": 14.562498240608573, "learning_rate": 9.921248529105716e-06, "loss": 3.175008773803711, "step": 369 }, { "epoch": 0.45566502463054187, "grad_norm": 9.096092875139751, "learning_rate": 9.919976635040425e-06, "loss": 1.9000710248947144, "step": 370 }, { "epoch": 0.45689655172413796, "grad_norm": 19.30965262540543, "learning_rate": 9.918694634921195e-06, "loss": 3.5248589515686035, "step": 371 }, { "epoch": 0.458128078817734, "grad_norm": 10.529945298812061, "learning_rate": 9.91740253138137e-06, "loss": 2.869842529296875, "step": 372 }, { "epoch": 0.45935960591133007, "grad_norm": 10.698638706211932, "learning_rate": 9.916100327075038e-06, "loss": 1.9380724430084229, "step": 373 }, { "epoch": 0.4605911330049261, "grad_norm": 17.707591147238283, "learning_rate": 9.914788024677039e-06, "loss": 2.2112460136413574, "step": 374 }, { "epoch": 0.4618226600985222, "grad_norm": 10.065846050311237, "learning_rate": 9.913465626882954e-06, "loss": 3.1283068656921387, "step": 375 }, { "epoch": 0.4630541871921182, "grad_norm": 25.33369677490011, "learning_rate": 9.912133136409103e-06, "loss": 2.692117929458618, "step": 376 }, { "epoch": 0.4642857142857143, "grad_norm": 57.3231139544447, "learning_rate": 9.910790555992536e-06, "loss": 3.047241687774658, "step": 377 }, { "epoch": 0.46551724137931033, "grad_norm": 11.840834448379393, "learning_rate": 9.909437888391025e-06, "loss": 3.0103232860565186, "step": 378 }, { "epoch": 0.4667487684729064, "grad_norm": 15.056907160003684, "learning_rate": 9.908075136383068e-06, "loss": 2.8296966552734375, "step": 379 }, { "epoch": 0.46798029556650245, "grad_norm": 8.534626696858023, "learning_rate": 9.906702302767876e-06, "loss": 2.818819999694824, "step": 380 }, { "epoch": 0.46921182266009853, "grad_norm": 29.849300222390532, "learning_rate": 9.905319390365364e-06, "loss": 3.6281867027282715, "step": 381 }, { "epoch": 0.47044334975369456, "grad_norm": 17.161390821083423, "learning_rate": 9.903926402016153e-06, "loss": 2.7123236656188965, "step": 382 }, { "epoch": 0.47167487684729065, "grad_norm": 13.097065098778378, "learning_rate": 9.902523340581562e-06, "loss": 2.69736909866333, "step": 383 }, { "epoch": 0.4729064039408867, "grad_norm": 11.269340257234004, "learning_rate": 9.901110208943599e-06, "loss": 3.088184118270874, "step": 384 }, { "epoch": 0.47413793103448276, "grad_norm": 6.6950707947616745, "learning_rate": 9.899687010004956e-06, "loss": 2.606736183166504, "step": 385 }, { "epoch": 0.4753694581280788, "grad_norm": 10.297903581299613, "learning_rate": 9.898253746689007e-06, "loss": 2.684105157852173, "step": 386 }, { "epoch": 0.4766009852216749, "grad_norm": 15.82478266058562, "learning_rate": 9.896810421939797e-06, "loss": 2.8739280700683594, "step": 387 }, { "epoch": 0.47783251231527096, "grad_norm": 8.284309924074774, "learning_rate": 9.895357038722043e-06, "loss": 2.835542917251587, "step": 388 }, { "epoch": 0.479064039408867, "grad_norm": 15.854123121769446, "learning_rate": 9.893893600021112e-06, "loss": 2.855287551879883, "step": 389 }, { "epoch": 0.4802955665024631, "grad_norm": 7.88725535997062, "learning_rate": 9.892420108843038e-06, "loss": 2.8026838302612305, "step": 390 }, { "epoch": 0.4815270935960591, "grad_norm": 11.000709518913423, "learning_rate": 9.890936568214493e-06, "loss": 3.1150124073028564, "step": 391 }, { "epoch": 0.4827586206896552, "grad_norm": 13.588584372243895, "learning_rate": 9.889442981182802e-06, "loss": 2.578108072280884, "step": 392 }, { "epoch": 0.4839901477832512, "grad_norm": 16.34748858179715, "learning_rate": 9.88793935081592e-06, "loss": 2.7470006942749023, "step": 393 }, { "epoch": 0.4852216748768473, "grad_norm": 10.809579161505546, "learning_rate": 9.88642568020243e-06, "loss": 2.9015283584594727, "step": 394 }, { "epoch": 0.48645320197044334, "grad_norm": 13.55439142286002, "learning_rate": 9.884901972451542e-06, "loss": 3.79250431060791, "step": 395 }, { "epoch": 0.4876847290640394, "grad_norm": 8.909988613184693, "learning_rate": 9.883368230693082e-06, "loss": 3.0748767852783203, "step": 396 }, { "epoch": 0.48891625615763545, "grad_norm": 13.412610776910293, "learning_rate": 9.881824458077491e-06, "loss": 2.822726011276245, "step": 397 }, { "epoch": 0.49014778325123154, "grad_norm": 11.426335338698937, "learning_rate": 9.880270657775806e-06, "loss": 2.7966151237487793, "step": 398 }, { "epoch": 0.49137931034482757, "grad_norm": 10.55324948832395, "learning_rate": 9.878706832979668e-06, "loss": 2.8517651557922363, "step": 399 }, { "epoch": 0.49261083743842365, "grad_norm": 11.070058186972197, "learning_rate": 9.877132986901306e-06, "loss": 2.7754080295562744, "step": 400 }, { "epoch": 0.4938423645320197, "grad_norm": 8.886322673700336, "learning_rate": 9.875549122773536e-06, "loss": 2.9478702545166016, "step": 401 }, { "epoch": 0.49507389162561577, "grad_norm": 9.759021404672636, "learning_rate": 9.87395524384975e-06, "loss": 2.9535412788391113, "step": 402 }, { "epoch": 0.4963054187192118, "grad_norm": 22.265516010081125, "learning_rate": 9.872351353403912e-06, "loss": 3.415161609649658, "step": 403 }, { "epoch": 0.4975369458128079, "grad_norm": 10.3371436402533, "learning_rate": 9.870737454730552e-06, "loss": 2.573082447052002, "step": 404 }, { "epoch": 0.4987684729064039, "grad_norm": 14.615736501967937, "learning_rate": 9.869113551144754e-06, "loss": 2.4743850231170654, "step": 405 }, { "epoch": 0.5, "grad_norm": 10.275697391044838, "learning_rate": 9.867479645982158e-06, "loss": 2.6644279956817627, "step": 406 }, { "epoch": 0.5012315270935961, "grad_norm": 7.731558128938727, "learning_rate": 9.865835742598942e-06, "loss": 2.7798032760620117, "step": 407 }, { "epoch": 0.5024630541871922, "grad_norm": 28.59542346400597, "learning_rate": 9.864181844371828e-06, "loss": 3.939884662628174, "step": 408 }, { "epoch": 0.5036945812807881, "grad_norm": 21.07739414791098, "learning_rate": 9.86251795469806e-06, "loss": 2.8093104362487793, "step": 409 }, { "epoch": 0.5049261083743842, "grad_norm": 8.961555424981583, "learning_rate": 9.860844076995416e-06, "loss": 2.1494715213775635, "step": 410 }, { "epoch": 0.5061576354679803, "grad_norm": 21.200756727942377, "learning_rate": 9.85916021470218e-06, "loss": 2.964136838912964, "step": 411 }, { "epoch": 0.5073891625615764, "grad_norm": 11.020672835034468, "learning_rate": 9.857466371277152e-06, "loss": 2.641287088394165, "step": 412 }, { "epoch": 0.5086206896551724, "grad_norm": 9.8391871787113, "learning_rate": 9.85576255019963e-06, "loss": 2.454512357711792, "step": 413 }, { "epoch": 0.5098522167487685, "grad_norm": 9.302782088404763, "learning_rate": 9.85404875496941e-06, "loss": 2.4566071033477783, "step": 414 }, { "epoch": 0.5110837438423645, "grad_norm": 12.209048739605382, "learning_rate": 9.852324989106772e-06, "loss": 2.7254204750061035, "step": 415 }, { "epoch": 0.5123152709359606, "grad_norm": 17.193015982984093, "learning_rate": 9.850591256152483e-06, "loss": 2.743382215499878, "step": 416 }, { "epoch": 0.5135467980295566, "grad_norm": 31.54989094640885, "learning_rate": 9.848847559667774e-06, "loss": 3.376046657562256, "step": 417 }, { "epoch": 0.5147783251231527, "grad_norm": 11.734812553622533, "learning_rate": 9.847093903234351e-06, "loss": 2.73980975151062, "step": 418 }, { "epoch": 0.5160098522167488, "grad_norm": 8.164256099521083, "learning_rate": 9.845330290454373e-06, "loss": 2.7565903663635254, "step": 419 }, { "epoch": 0.5172413793103449, "grad_norm": 9.178438912949575, "learning_rate": 9.843556724950454e-06, "loss": 2.9061315059661865, "step": 420 }, { "epoch": 0.5184729064039408, "grad_norm": 18.23493245534027, "learning_rate": 9.841773210365646e-06, "loss": 3.1584839820861816, "step": 421 }, { "epoch": 0.5197044334975369, "grad_norm": 13.406138718704618, "learning_rate": 9.839979750363443e-06, "loss": 3.300762176513672, "step": 422 }, { "epoch": 0.520935960591133, "grad_norm": 16.907140017416133, "learning_rate": 9.838176348627768e-06, "loss": 2.5202269554138184, "step": 423 }, { "epoch": 0.5221674876847291, "grad_norm": 14.800436222535966, "learning_rate": 9.83636300886296e-06, "loss": 3.9240634441375732, "step": 424 }, { "epoch": 0.5233990147783252, "grad_norm": 13.058319822050642, "learning_rate": 9.834539734793774e-06, "loss": 3.1783556938171387, "step": 425 }, { "epoch": 0.5246305418719212, "grad_norm": 9.577210971277129, "learning_rate": 9.832706530165372e-06, "loss": 2.787106513977051, "step": 426 }, { "epoch": 0.5258620689655172, "grad_norm": 17.432663310497652, "learning_rate": 9.830863398743313e-06, "loss": 3.270280599594116, "step": 427 }, { "epoch": 0.5270935960591133, "grad_norm": 13.065514198679326, "learning_rate": 9.829010344313548e-06, "loss": 3.0135059356689453, "step": 428 }, { "epoch": 0.5283251231527094, "grad_norm": 12.9248393025633, "learning_rate": 9.82714737068241e-06, "loss": 2.989795207977295, "step": 429 }, { "epoch": 0.5295566502463054, "grad_norm": 15.64315185844485, "learning_rate": 9.825274481676605e-06, "loss": 2.5208187103271484, "step": 430 }, { "epoch": 0.5307881773399015, "grad_norm": 11.452591471364267, "learning_rate": 9.82339168114321e-06, "loss": 3.1890928745269775, "step": 431 }, { "epoch": 0.5320197044334976, "grad_norm": 11.650610381993676, "learning_rate": 9.821498972949657e-06, "loss": 3.0655789375305176, "step": 432 }, { "epoch": 0.5332512315270936, "grad_norm": 7.7840344730355335, "learning_rate": 9.81959636098373e-06, "loss": 2.611284017562866, "step": 433 }, { "epoch": 0.5344827586206896, "grad_norm": 8.93478095027874, "learning_rate": 9.817683849153561e-06, "loss": 2.863576889038086, "step": 434 }, { "epoch": 0.5357142857142857, "grad_norm": 10.52062689285789, "learning_rate": 9.815761441387609e-06, "loss": 2.6186623573303223, "step": 435 }, { "epoch": 0.5369458128078818, "grad_norm": 6.68274047677578, "learning_rate": 9.813829141634666e-06, "loss": 1.3848458528518677, "step": 436 }, { "epoch": 0.5381773399014779, "grad_norm": 9.593848866659638, "learning_rate": 9.811886953863841e-06, "loss": 3.00791597366333, "step": 437 }, { "epoch": 0.5394088669950738, "grad_norm": 7.8032629730941565, "learning_rate": 9.809934882064555e-06, "loss": 2.8431854248046875, "step": 438 }, { "epoch": 0.5406403940886699, "grad_norm": 10.324361743530943, "learning_rate": 9.807972930246531e-06, "loss": 2.3595449924468994, "step": 439 }, { "epoch": 0.541871921182266, "grad_norm": 15.306323140698186, "learning_rate": 9.806001102439789e-06, "loss": 2.55434250831604, "step": 440 }, { "epoch": 0.5431034482758621, "grad_norm": 23.37582741202724, "learning_rate": 9.804019402694627e-06, "loss": 2.4509990215301514, "step": 441 }, { "epoch": 0.5443349753694581, "grad_norm": 9.38267743442567, "learning_rate": 9.802027835081628e-06, "loss": 2.825401782989502, "step": 442 }, { "epoch": 0.5455665024630542, "grad_norm": 10.449224530160473, "learning_rate": 9.800026403691643e-06, "loss": 2.7315573692321777, "step": 443 }, { "epoch": 0.5467980295566502, "grad_norm": 22.900410887080454, "learning_rate": 9.798015112635786e-06, "loss": 3.1359333992004395, "step": 444 }, { "epoch": 0.5480295566502463, "grad_norm": 9.839888483337905, "learning_rate": 9.795993966045418e-06, "loss": 3.2884740829467773, "step": 445 }, { "epoch": 0.5492610837438424, "grad_norm": 9.35231433219537, "learning_rate": 9.793962968072149e-06, "loss": 2.8281359672546387, "step": 446 }, { "epoch": 0.5504926108374384, "grad_norm": 6.698793862232108, "learning_rate": 9.791922122887823e-06, "loss": 2.633974313735962, "step": 447 }, { "epoch": 0.5517241379310345, "grad_norm": 8.317360049933578, "learning_rate": 9.78987143468451e-06, "loss": 2.1651690006256104, "step": 448 }, { "epoch": 0.5529556650246306, "grad_norm": 11.511312923842238, "learning_rate": 9.7878109076745e-06, "loss": 3.011908531188965, "step": 449 }, { "epoch": 0.5541871921182266, "grad_norm": 15.627130212627556, "learning_rate": 9.785740546090293e-06, "loss": 3.121683359146118, "step": 450 }, { "epoch": 0.5554187192118226, "grad_norm": 14.263261857694998, "learning_rate": 9.783660354184589e-06, "loss": 2.9901375770568848, "step": 451 }, { "epoch": 0.5566502463054187, "grad_norm": 15.230602091833177, "learning_rate": 9.78157033623028e-06, "loss": 3.1121528148651123, "step": 452 }, { "epoch": 0.5578817733990148, "grad_norm": 22.32110731618789, "learning_rate": 9.779470496520442e-06, "loss": 2.9811508655548096, "step": 453 }, { "epoch": 0.5591133004926109, "grad_norm": 11.801131103021726, "learning_rate": 9.777360839368327e-06, "loss": 2.8219947814941406, "step": 454 }, { "epoch": 0.5603448275862069, "grad_norm": 10.166506753796495, "learning_rate": 9.77524136910735e-06, "loss": 2.870987892150879, "step": 455 }, { "epoch": 0.5615763546798029, "grad_norm": 9.413959781223877, "learning_rate": 9.773112090091084e-06, "loss": 3.1902365684509277, "step": 456 }, { "epoch": 0.562807881773399, "grad_norm": 12.723571043561764, "learning_rate": 9.770973006693256e-06, "loss": 3.3052220344543457, "step": 457 }, { "epoch": 0.5640394088669951, "grad_norm": 14.337077670753716, "learning_rate": 9.76882412330772e-06, "loss": 2.3376049995422363, "step": 458 }, { "epoch": 0.5652709359605911, "grad_norm": 10.245935627064924, "learning_rate": 9.766665444348472e-06, "loss": 2.8364970684051514, "step": 459 }, { "epoch": 0.5665024630541872, "grad_norm": 18.308636912090915, "learning_rate": 9.76449697424962e-06, "loss": 2.582505702972412, "step": 460 }, { "epoch": 0.5677339901477833, "grad_norm": 8.927255205757533, "learning_rate": 9.76231871746539e-06, "loss": 2.485147476196289, "step": 461 }, { "epoch": 0.5689655172413793, "grad_norm": 11.356171958036413, "learning_rate": 9.760130678470106e-06, "loss": 3.0910027027130127, "step": 462 }, { "epoch": 0.5701970443349754, "grad_norm": 10.937354765360512, "learning_rate": 9.757932861758188e-06, "loss": 3.3621506690979004, "step": 463 }, { "epoch": 0.5714285714285714, "grad_norm": 11.222097055926637, "learning_rate": 9.755725271844142e-06, "loss": 2.8310019969940186, "step": 464 }, { "epoch": 0.5726600985221675, "grad_norm": 30.527175863167063, "learning_rate": 9.753507913262548e-06, "loss": 2.797703742980957, "step": 465 }, { "epoch": 0.5738916256157636, "grad_norm": 15.045285480872131, "learning_rate": 9.751280790568047e-06, "loss": 2.6609878540039062, "step": 466 }, { "epoch": 0.5751231527093597, "grad_norm": 13.871081363987201, "learning_rate": 9.749043908335343e-06, "loss": 2.778043508529663, "step": 467 }, { "epoch": 0.5763546798029556, "grad_norm": 13.771545893500338, "learning_rate": 9.74679727115918e-06, "loss": 2.8315014839172363, "step": 468 }, { "epoch": 0.5775862068965517, "grad_norm": 19.916341772532764, "learning_rate": 9.744540883654348e-06, "loss": 3.3902840614318848, "step": 469 }, { "epoch": 0.5788177339901478, "grad_norm": 22.648986055714484, "learning_rate": 9.742274750455659e-06, "loss": 3.53080153465271, "step": 470 }, { "epoch": 0.5800492610837439, "grad_norm": 23.493391135041467, "learning_rate": 9.739998876217943e-06, "loss": 2.270110845565796, "step": 471 }, { "epoch": 0.5812807881773399, "grad_norm": 12.049204240060057, "learning_rate": 9.737713265616043e-06, "loss": 2.7059872150421143, "step": 472 }, { "epoch": 0.5825123152709359, "grad_norm": 20.2953123538445, "learning_rate": 9.735417923344798e-06, "loss": 4.328514575958252, "step": 473 }, { "epoch": 0.583743842364532, "grad_norm": 14.790979425207205, "learning_rate": 9.73311285411904e-06, "loss": 3.2155938148498535, "step": 474 }, { "epoch": 0.5849753694581281, "grad_norm": 35.79655633932577, "learning_rate": 9.730798062673575e-06, "loss": 2.277022361755371, "step": 475 }, { "epoch": 0.5862068965517241, "grad_norm": 10.760493401180613, "learning_rate": 9.728473553763186e-06, "loss": 2.794111490249634, "step": 476 }, { "epoch": 0.5874384236453202, "grad_norm": 7.877057642797786, "learning_rate": 9.726139332162613e-06, "loss": 3.00388765335083, "step": 477 }, { "epoch": 0.5886699507389163, "grad_norm": 10.974644270731439, "learning_rate": 9.723795402666549e-06, "loss": 2.5355563163757324, "step": 478 }, { "epoch": 0.5899014778325123, "grad_norm": 22.285874447386394, "learning_rate": 9.721441770089621e-06, "loss": 3.2441415786743164, "step": 479 }, { "epoch": 0.5911330049261084, "grad_norm": 13.333764613863938, "learning_rate": 9.719078439266399e-06, "loss": 2.826803207397461, "step": 480 }, { "epoch": 0.5923645320197044, "grad_norm": 6.843940415955184, "learning_rate": 9.716705415051362e-06, "loss": 2.5396804809570312, "step": 481 }, { "epoch": 0.5935960591133005, "grad_norm": 23.860174795633608, "learning_rate": 9.714322702318908e-06, "loss": 2.85546875, "step": 482 }, { "epoch": 0.5948275862068966, "grad_norm": 12.255473790019064, "learning_rate": 9.711930305963333e-06, "loss": 3.217014789581299, "step": 483 }, { "epoch": 0.5960591133004927, "grad_norm": 8.15967079186392, "learning_rate": 9.70952823089882e-06, "loss": 2.781094551086426, "step": 484 }, { "epoch": 0.5972906403940886, "grad_norm": 11.942750739396006, "learning_rate": 9.707116482059447e-06, "loss": 2.617154121398926, "step": 485 }, { "epoch": 0.5985221674876847, "grad_norm": 15.243819163950327, "learning_rate": 9.704695064399143e-06, "loss": 2.601886510848999, "step": 486 }, { "epoch": 0.5997536945812808, "grad_norm": 27.321867153996244, "learning_rate": 9.702263982891712e-06, "loss": 2.9616146087646484, "step": 487 }, { "epoch": 0.6009852216748769, "grad_norm": 9.511966390540264, "learning_rate": 9.699823242530803e-06, "loss": 2.8881943225860596, "step": 488 }, { "epoch": 0.6022167487684729, "grad_norm": 9.673073669047454, "learning_rate": 9.697372848329905e-06, "loss": 2.6718311309814453, "step": 489 }, { "epoch": 0.603448275862069, "grad_norm": 12.946431548834504, "learning_rate": 9.69491280532234e-06, "loss": 2.959104537963867, "step": 490 }, { "epoch": 0.604679802955665, "grad_norm": 13.919071872066077, "learning_rate": 9.692443118561248e-06, "loss": 2.085991621017456, "step": 491 }, { "epoch": 0.6059113300492611, "grad_norm": 168.7126461149896, "learning_rate": 9.689963793119574e-06, "loss": 4.498569488525391, "step": 492 }, { "epoch": 0.6071428571428571, "grad_norm": 12.118400731206464, "learning_rate": 9.68747483409007e-06, "loss": 2.7837424278259277, "step": 493 }, { "epoch": 0.6083743842364532, "grad_norm": 14.436749099341482, "learning_rate": 9.684976246585264e-06, "loss": 2.637524366378784, "step": 494 }, { "epoch": 0.6096059113300493, "grad_norm": 12.923969042105849, "learning_rate": 9.682468035737475e-06, "loss": 2.765727996826172, "step": 495 }, { "epoch": 0.6108374384236454, "grad_norm": 12.957696638033102, "learning_rate": 9.679950206698782e-06, "loss": 2.825129270553589, "step": 496 }, { "epoch": 0.6120689655172413, "grad_norm": 12.328586386653942, "learning_rate": 9.677422764641021e-06, "loss": 2.733224630355835, "step": 497 }, { "epoch": 0.6133004926108374, "grad_norm": 10.367355913707218, "learning_rate": 9.674885714755773e-06, "loss": 3.6287670135498047, "step": 498 }, { "epoch": 0.6145320197044335, "grad_norm": 8.212604152981882, "learning_rate": 9.672339062254359e-06, "loss": 2.38788104057312, "step": 499 }, { "epoch": 0.6157635467980296, "grad_norm": 13.545719741820621, "learning_rate": 9.66978281236782e-06, "loss": 2.942269802093506, "step": 500 }, { "epoch": 0.6169950738916257, "grad_norm": 12.748449735511594, "learning_rate": 9.667216970346916e-06, "loss": 2.4100990295410156, "step": 501 }, { "epoch": 0.6182266009852216, "grad_norm": 15.669540249604715, "learning_rate": 9.6646415414621e-06, "loss": 2.3959155082702637, "step": 502 }, { "epoch": 0.6194581280788177, "grad_norm": 7.949797631449559, "learning_rate": 9.662056531003528e-06, "loss": 2.93027925491333, "step": 503 }, { "epoch": 0.6206896551724138, "grad_norm": 10.116460165226645, "learning_rate": 9.659461944281035e-06, "loss": 3.164715528488159, "step": 504 }, { "epoch": 0.6219211822660099, "grad_norm": 16.218136964088803, "learning_rate": 9.656857786624119e-06, "loss": 2.634587287902832, "step": 505 }, { "epoch": 0.6231527093596059, "grad_norm": 10.922060482445831, "learning_rate": 9.654244063381948e-06, "loss": 3.5667788982391357, "step": 506 }, { "epoch": 0.624384236453202, "grad_norm": 8.542161812174806, "learning_rate": 9.651620779923332e-06, "loss": 2.9383740425109863, "step": 507 }, { "epoch": 0.625615763546798, "grad_norm": 10.347829866523263, "learning_rate": 9.648987941636719e-06, "loss": 2.7658987045288086, "step": 508 }, { "epoch": 0.6268472906403941, "grad_norm": 8.548905747003822, "learning_rate": 9.646345553930187e-06, "loss": 3.3089890480041504, "step": 509 }, { "epoch": 0.6280788177339901, "grad_norm": 6.487031716645425, "learning_rate": 9.643693622231426e-06, "loss": 2.6208066940307617, "step": 510 }, { "epoch": 0.6293103448275862, "grad_norm": 8.110412464341984, "learning_rate": 9.64103215198773e-06, "loss": 2.7099995613098145, "step": 511 }, { "epoch": 0.6305418719211823, "grad_norm": 14.245396567085763, "learning_rate": 9.638361148665989e-06, "loss": 2.894531488418579, "step": 512 }, { "epoch": 0.6317733990147784, "grad_norm": 11.657856176430656, "learning_rate": 9.63568061775267e-06, "loss": 3.1289191246032715, "step": 513 }, { "epoch": 0.6330049261083743, "grad_norm": 14.82098703249081, "learning_rate": 9.632990564753817e-06, "loss": 2.954707145690918, "step": 514 }, { "epoch": 0.6342364532019704, "grad_norm": 6.808305322372754, "learning_rate": 9.630290995195028e-06, "loss": 2.93411922454834, "step": 515 }, { "epoch": 0.6354679802955665, "grad_norm": 7.276364027378903, "learning_rate": 9.62758191462145e-06, "loss": 2.637021541595459, "step": 516 }, { "epoch": 0.6366995073891626, "grad_norm": 13.898029887698447, "learning_rate": 9.624863328597767e-06, "loss": 3.020066261291504, "step": 517 }, { "epoch": 0.6379310344827587, "grad_norm": 24.08793299798331, "learning_rate": 9.622135242708188e-06, "loss": 2.5983335971832275, "step": 518 }, { "epoch": 0.6391625615763546, "grad_norm": 13.609628946959008, "learning_rate": 9.619397662556434e-06, "loss": 2.714207410812378, "step": 519 }, { "epoch": 0.6403940886699507, "grad_norm": 8.67874834351866, "learning_rate": 9.616650593765733e-06, "loss": 2.8505520820617676, "step": 520 }, { "epoch": 0.6416256157635468, "grad_norm": 8.300798802306481, "learning_rate": 9.613894041978795e-06, "loss": 2.8081271648406982, "step": 521 }, { "epoch": 0.6428571428571429, "grad_norm": 10.020203888067801, "learning_rate": 9.611128012857818e-06, "loss": 3.106411933898926, "step": 522 }, { "epoch": 0.6440886699507389, "grad_norm": 9.32846194404547, "learning_rate": 9.60835251208446e-06, "loss": 3.087594985961914, "step": 523 }, { "epoch": 0.645320197044335, "grad_norm": 15.30312860694116, "learning_rate": 9.60556754535984e-06, "loss": 2.7104361057281494, "step": 524 }, { "epoch": 0.646551724137931, "grad_norm": 14.847900307580543, "learning_rate": 9.602773118404518e-06, "loss": 2.8562324047088623, "step": 525 }, { "epoch": 0.6477832512315271, "grad_norm": 8.874728218475076, "learning_rate": 9.599969236958485e-06, "loss": 3.282554864883423, "step": 526 }, { "epoch": 0.6490147783251231, "grad_norm": 8.797844640723032, "learning_rate": 9.597155906781154e-06, "loss": 2.623101234436035, "step": 527 }, { "epoch": 0.6502463054187192, "grad_norm": 9.863712955626877, "learning_rate": 9.59433313365135e-06, "loss": 2.889674663543701, "step": 528 }, { "epoch": 0.6514778325123153, "grad_norm": 10.895399946836921, "learning_rate": 9.591500923367287e-06, "loss": 2.787289619445801, "step": 529 }, { "epoch": 0.6527093596059114, "grad_norm": 10.227588231836696, "learning_rate": 9.58865928174657e-06, "loss": 2.879824161529541, "step": 530 }, { "epoch": 0.6539408866995073, "grad_norm": 8.869590002729453, "learning_rate": 9.585808214626173e-06, "loss": 2.967193126678467, "step": 531 }, { "epoch": 0.6551724137931034, "grad_norm": 8.822784237769133, "learning_rate": 9.582947727862433e-06, "loss": 3.1004772186279297, "step": 532 }, { "epoch": 0.6564039408866995, "grad_norm": 13.346747444504954, "learning_rate": 9.580077827331038e-06, "loss": 2.69935941696167, "step": 533 }, { "epoch": 0.6576354679802956, "grad_norm": 13.781647523739567, "learning_rate": 9.577198518927005e-06, "loss": 3.2806637287139893, "step": 534 }, { "epoch": 0.6588669950738916, "grad_norm": 17.336818625260154, "learning_rate": 9.574309808564682e-06, "loss": 3.050356149673462, "step": 535 }, { "epoch": 0.6600985221674877, "grad_norm": 9.311777076008125, "learning_rate": 9.57141170217773e-06, "loss": 2.8415322303771973, "step": 536 }, { "epoch": 0.6613300492610837, "grad_norm": 12.410317292425518, "learning_rate": 9.568504205719106e-06, "loss": 2.5309085845947266, "step": 537 }, { "epoch": 0.6625615763546798, "grad_norm": 15.225443304522335, "learning_rate": 9.565587325161056e-06, "loss": 3.5695877075195312, "step": 538 }, { "epoch": 0.6637931034482759, "grad_norm": 9.562550097283651, "learning_rate": 9.562661066495108e-06, "loss": 2.7938594818115234, "step": 539 }, { "epoch": 0.6650246305418719, "grad_norm": 8.825138850911314, "learning_rate": 9.559725435732042e-06, "loss": 2.8548948764801025, "step": 540 }, { "epoch": 0.666256157635468, "grad_norm": 10.262300101456184, "learning_rate": 9.556780438901899e-06, "loss": 3.054051399230957, "step": 541 }, { "epoch": 0.6674876847290641, "grad_norm": 26.545357662435233, "learning_rate": 9.553826082053951e-06, "loss": 3.566359281539917, "step": 542 }, { "epoch": 0.6687192118226601, "grad_norm": 12.751257760928588, "learning_rate": 9.550862371256705e-06, "loss": 2.8619909286499023, "step": 543 }, { "epoch": 0.6699507389162561, "grad_norm": 14.522375958962538, "learning_rate": 9.547889312597877e-06, "loss": 3.0177836418151855, "step": 544 }, { "epoch": 0.6711822660098522, "grad_norm": 21.356139863129055, "learning_rate": 9.544906912184383e-06, "loss": 1.9943304061889648, "step": 545 }, { "epoch": 0.6724137931034483, "grad_norm": 5.562548029921876, "learning_rate": 9.541915176142326e-06, "loss": 2.650038957595825, "step": 546 }, { "epoch": 0.6736453201970444, "grad_norm": 12.716408540810125, "learning_rate": 9.538914110616995e-06, "loss": 2.826953411102295, "step": 547 }, { "epoch": 0.6748768472906403, "grad_norm": 9.963475586190201, "learning_rate": 9.53590372177283e-06, "loss": 2.770202159881592, "step": 548 }, { "epoch": 0.6761083743842364, "grad_norm": 32.875675817649174, "learning_rate": 9.532884015793432e-06, "loss": 2.0859670639038086, "step": 549 }, { "epoch": 0.6773399014778325, "grad_norm": 11.983581363761447, "learning_rate": 9.529854998881534e-06, "loss": 2.7557499408721924, "step": 550 }, { "epoch": 0.6785714285714286, "grad_norm": 13.15410482971192, "learning_rate": 9.526816677258995e-06, "loss": 2.710692882537842, "step": 551 }, { "epoch": 0.6798029556650246, "grad_norm": 9.416519545873685, "learning_rate": 9.523769057166791e-06, "loss": 3.055102825164795, "step": 552 }, { "epoch": 0.6810344827586207, "grad_norm": 11.60625904359093, "learning_rate": 9.520712144864997e-06, "loss": 2.606031894683838, "step": 553 }, { "epoch": 0.6822660098522167, "grad_norm": 12.067258837088112, "learning_rate": 9.517645946632766e-06, "loss": 2.9099555015563965, "step": 554 }, { "epoch": 0.6834975369458128, "grad_norm": 10.888483887311708, "learning_rate": 9.514570468768338e-06, "loss": 2.7148189544677734, "step": 555 }, { "epoch": 0.6847290640394089, "grad_norm": 15.652077873544759, "learning_rate": 9.511485717589006e-06, "loss": 2.528857707977295, "step": 556 }, { "epoch": 0.6859605911330049, "grad_norm": 12.750166049911234, "learning_rate": 9.508391699431114e-06, "loss": 2.814006805419922, "step": 557 }, { "epoch": 0.687192118226601, "grad_norm": 12.187355034460829, "learning_rate": 9.50528842065004e-06, "loss": 3.3046352863311768, "step": 558 }, { "epoch": 0.6884236453201971, "grad_norm": 12.182964964248615, "learning_rate": 9.502175887620188e-06, "loss": 3.1519320011138916, "step": 559 }, { "epoch": 0.6896551724137931, "grad_norm": 26.00958255437091, "learning_rate": 9.499054106734963e-06, "loss": 2.2819509506225586, "step": 560 }, { "epoch": 0.6908866995073891, "grad_norm": 10.437408285902773, "learning_rate": 9.495923084406773e-06, "loss": 2.7894287109375, "step": 561 }, { "epoch": 0.6921182266009852, "grad_norm": 27.469926449959043, "learning_rate": 9.492782827067006e-06, "loss": 3.233968734741211, "step": 562 }, { "epoch": 0.6933497536945813, "grad_norm": 19.246363086379436, "learning_rate": 9.48963334116602e-06, "loss": 2.594421863555908, "step": 563 }, { "epoch": 0.6945812807881774, "grad_norm": 11.788384104886402, "learning_rate": 9.486474633173129e-06, "loss": 3.181318759918213, "step": 564 }, { "epoch": 0.6958128078817734, "grad_norm": 10.754721829366346, "learning_rate": 9.48330670957659e-06, "loss": 3.2115392684936523, "step": 565 }, { "epoch": 0.6970443349753694, "grad_norm": 12.089226690676854, "learning_rate": 9.480129576883592e-06, "loss": 2.408634901046753, "step": 566 }, { "epoch": 0.6982758620689655, "grad_norm": 13.370163003636199, "learning_rate": 9.476943241620233e-06, "loss": 2.9304041862487793, "step": 567 }, { "epoch": 0.6995073891625616, "grad_norm": 23.52604617683973, "learning_rate": 9.473747710331524e-06, "loss": 2.75127911567688, "step": 568 }, { "epoch": 0.7007389162561576, "grad_norm": 33.407245089515435, "learning_rate": 9.470542989581357e-06, "loss": 3.3793530464172363, "step": 569 }, { "epoch": 0.7019704433497537, "grad_norm": 8.494714152681327, "learning_rate": 9.467329085952505e-06, "loss": 3.001579999923706, "step": 570 }, { "epoch": 0.7032019704433498, "grad_norm": 12.457476112208125, "learning_rate": 9.464106006046602e-06, "loss": 2.063443422317505, "step": 571 }, { "epoch": 0.7044334975369458, "grad_norm": 11.893453239405563, "learning_rate": 9.460873756484128e-06, "loss": 3.079399585723877, "step": 572 }, { "epoch": 0.7056650246305419, "grad_norm": 17.600286095390665, "learning_rate": 9.457632343904404e-06, "loss": 2.6499621868133545, "step": 573 }, { "epoch": 0.7068965517241379, "grad_norm": 11.052824766544509, "learning_rate": 9.454381774965567e-06, "loss": 2.848517656326294, "step": 574 }, { "epoch": 0.708128078817734, "grad_norm": 11.779141171142625, "learning_rate": 9.451122056344564e-06, "loss": 2.936286687850952, "step": 575 }, { "epoch": 0.7093596059113301, "grad_norm": 12.447965784800195, "learning_rate": 9.44785319473714e-06, "loss": 2.315443515777588, "step": 576 }, { "epoch": 0.7105911330049262, "grad_norm": 13.488894073216153, "learning_rate": 9.444575196857814e-06, "loss": 3.121138334274292, "step": 577 }, { "epoch": 0.7118226600985221, "grad_norm": 15.155327825693226, "learning_rate": 9.441288069439876e-06, "loss": 3.326282501220703, "step": 578 }, { "epoch": 0.7130541871921182, "grad_norm": 12.463167654535278, "learning_rate": 9.437991819235366e-06, "loss": 2.8816466331481934, "step": 579 }, { "epoch": 0.7142857142857143, "grad_norm": 14.769356931380226, "learning_rate": 9.434686453015067e-06, "loss": 3.6819610595703125, "step": 580 }, { "epoch": 0.7155172413793104, "grad_norm": 33.4724384154282, "learning_rate": 9.431371977568483e-06, "loss": 2.904045343399048, "step": 581 }, { "epoch": 0.7167487684729064, "grad_norm": 8.623967512206425, "learning_rate": 9.428048399703831e-06, "loss": 3.5356435775756836, "step": 582 }, { "epoch": 0.7179802955665024, "grad_norm": 11.543651581364673, "learning_rate": 9.424715726248027e-06, "loss": 2.4456870555877686, "step": 583 }, { "epoch": 0.7192118226600985, "grad_norm": 6.392692599853808, "learning_rate": 9.421373964046665e-06, "loss": 2.5000674724578857, "step": 584 }, { "epoch": 0.7204433497536946, "grad_norm": 14.327212598984625, "learning_rate": 9.418023119964012e-06, "loss": 2.856738567352295, "step": 585 }, { "epoch": 0.7216748768472906, "grad_norm": 6.593431351524387, "learning_rate": 9.414663200882991e-06, "loss": 2.623438835144043, "step": 586 }, { "epoch": 0.7229064039408867, "grad_norm": 21.188129548487396, "learning_rate": 9.411294213705162e-06, "loss": 2.987426996231079, "step": 587 }, { "epoch": 0.7241379310344828, "grad_norm": 16.308054128010806, "learning_rate": 9.407916165350713e-06, "loss": 2.8868589401245117, "step": 588 }, { "epoch": 0.7253694581280788, "grad_norm": 5.6345787753710965, "learning_rate": 9.404529062758447e-06, "loss": 2.878659725189209, "step": 589 }, { "epoch": 0.7266009852216748, "grad_norm": 21.624096395043555, "learning_rate": 9.401132912885764e-06, "loss": 3.197636127471924, "step": 590 }, { "epoch": 0.7278325123152709, "grad_norm": 28.674970274616843, "learning_rate": 9.397727722708643e-06, "loss": 2.8974030017852783, "step": 591 }, { "epoch": 0.729064039408867, "grad_norm": 14.603582651571138, "learning_rate": 9.39431349922164e-06, "loss": 2.558945894241333, "step": 592 }, { "epoch": 0.7302955665024631, "grad_norm": 6.004290408591086, "learning_rate": 9.390890249437863e-06, "loss": 1.0518803596496582, "step": 593 }, { "epoch": 0.7315270935960592, "grad_norm": 16.62422153547852, "learning_rate": 9.38745798038896e-06, "loss": 3.5599231719970703, "step": 594 }, { "epoch": 0.7327586206896551, "grad_norm": 9.731487783525235, "learning_rate": 9.384016699125102e-06, "loss": 3.1517539024353027, "step": 595 }, { "epoch": 0.7339901477832512, "grad_norm": 10.319265754066222, "learning_rate": 9.380566412714982e-06, "loss": 2.809019088745117, "step": 596 }, { "epoch": 0.7352216748768473, "grad_norm": 14.675772943073882, "learning_rate": 9.377107128245782e-06, "loss": 3.2317776679992676, "step": 597 }, { "epoch": 0.7364532019704434, "grad_norm": 15.494293767128655, "learning_rate": 9.373638852823166e-06, "loss": 2.7792513370513916, "step": 598 }, { "epoch": 0.7376847290640394, "grad_norm": 17.02704136876628, "learning_rate": 9.370161593571274e-06, "loss": 2.75253963470459, "step": 599 }, { "epoch": 0.7389162561576355, "grad_norm": 14.987899586174, "learning_rate": 9.36667535763269e-06, "loss": 3.381519317626953, "step": 600 }, { "epoch": 0.7401477832512315, "grad_norm": 19.24830788986111, "learning_rate": 9.363180152168448e-06, "loss": 2.62427020072937, "step": 601 }, { "epoch": 0.7413793103448276, "grad_norm": 29.185871046378647, "learning_rate": 9.359675984357992e-06, "loss": 2.4824719429016113, "step": 602 }, { "epoch": 0.7426108374384236, "grad_norm": 8.673285241589555, "learning_rate": 9.356162861399188e-06, "loss": 2.8167097568511963, "step": 603 }, { "epoch": 0.7438423645320197, "grad_norm": 15.318689439779794, "learning_rate": 9.352640790508291e-06, "loss": 2.9545063972473145, "step": 604 }, { "epoch": 0.7450738916256158, "grad_norm": 16.1719679891284, "learning_rate": 9.349109778919938e-06, "loss": 2.833635091781616, "step": 605 }, { "epoch": 0.7463054187192119, "grad_norm": 9.791828516981264, "learning_rate": 9.345569833887124e-06, "loss": 2.775730609893799, "step": 606 }, { "epoch": 0.7475369458128078, "grad_norm": 28.327643593931583, "learning_rate": 9.342020962681206e-06, "loss": 2.652602195739746, "step": 607 }, { "epoch": 0.7487684729064039, "grad_norm": 10.194351110042778, "learning_rate": 9.338463172591868e-06, "loss": 2.7008144855499268, "step": 608 }, { "epoch": 0.75, "grad_norm": 9.445868833849106, "learning_rate": 9.334896470927115e-06, "loss": 2.7525248527526855, "step": 609 }, { "epoch": 0.7512315270935961, "grad_norm": 26.640278263158898, "learning_rate": 9.331320865013257e-06, "loss": 3.446526527404785, "step": 610 }, { "epoch": 0.7524630541871922, "grad_norm": 14.322498892724218, "learning_rate": 9.327736362194899e-06, "loss": 3.0489022731781006, "step": 611 }, { "epoch": 0.7536945812807881, "grad_norm": 9.879694468014232, "learning_rate": 9.324142969834916e-06, "loss": 2.840083360671997, "step": 612 }, { "epoch": 0.7549261083743842, "grad_norm": 8.637072486896487, "learning_rate": 9.32054069531444e-06, "loss": 2.878903388977051, "step": 613 }, { "epoch": 0.7561576354679803, "grad_norm": 10.815449949874669, "learning_rate": 9.316929546032855e-06, "loss": 2.568045139312744, "step": 614 }, { "epoch": 0.7573891625615764, "grad_norm": 18.206411357576574, "learning_rate": 9.313309529407773e-06, "loss": 2.8981618881225586, "step": 615 }, { "epoch": 0.7586206896551724, "grad_norm": 14.515670827099761, "learning_rate": 9.309680652875015e-06, "loss": 3.3486928939819336, "step": 616 }, { "epoch": 0.7598522167487685, "grad_norm": 10.208627841304171, "learning_rate": 9.306042923888607e-06, "loss": 3.1101677417755127, "step": 617 }, { "epoch": 0.7610837438423645, "grad_norm": 9.545526159427496, "learning_rate": 9.302396349920756e-06, "loss": 2.5806779861450195, "step": 618 }, { "epoch": 0.7623152709359606, "grad_norm": 14.260459979245976, "learning_rate": 9.298740938461835e-06, "loss": 2.678412437438965, "step": 619 }, { "epoch": 0.7635467980295566, "grad_norm": 10.808443055524243, "learning_rate": 9.295076697020378e-06, "loss": 2.62287974357605, "step": 620 }, { "epoch": 0.7647783251231527, "grad_norm": 7.635004154714619, "learning_rate": 9.291403633123046e-06, "loss": 3.0267720222473145, "step": 621 }, { "epoch": 0.7660098522167488, "grad_norm": 15.707612902426492, "learning_rate": 9.287721754314629e-06, "loss": 3.147644281387329, "step": 622 }, { "epoch": 0.7672413793103449, "grad_norm": 14.526297785533162, "learning_rate": 9.284031068158023e-06, "loss": 3.159574031829834, "step": 623 }, { "epoch": 0.7684729064039408, "grad_norm": 13.384426615670701, "learning_rate": 9.280331582234212e-06, "loss": 2.6432247161865234, "step": 624 }, { "epoch": 0.7697044334975369, "grad_norm": 14.835270706650137, "learning_rate": 9.27662330414226e-06, "loss": 3.2058279514312744, "step": 625 }, { "epoch": 0.770935960591133, "grad_norm": 10.18160016154191, "learning_rate": 9.272906241499285e-06, "loss": 2.787260055541992, "step": 626 }, { "epoch": 0.7721674876847291, "grad_norm": 13.10691777443293, "learning_rate": 9.269180401940455e-06, "loss": 2.5751729011535645, "step": 627 }, { "epoch": 0.7733990147783252, "grad_norm": 31.695378978025254, "learning_rate": 9.265445793118962e-06, "loss": 2.7433929443359375, "step": 628 }, { "epoch": 0.7746305418719212, "grad_norm": 14.739647225699887, "learning_rate": 9.261702422706014e-06, "loss": 2.771510124206543, "step": 629 }, { "epoch": 0.7758620689655172, "grad_norm": 10.064291707891675, "learning_rate": 9.257950298390815e-06, "loss": 2.873830795288086, "step": 630 }, { "epoch": 0.7770935960591133, "grad_norm": 11.389694880244464, "learning_rate": 9.254189427880548e-06, "loss": 2.7849340438842773, "step": 631 }, { "epoch": 0.7783251231527094, "grad_norm": 9.049096315314397, "learning_rate": 9.250419818900366e-06, "loss": 3.1721668243408203, "step": 632 }, { "epoch": 0.7795566502463054, "grad_norm": 10.167539529464127, "learning_rate": 9.24664147919337e-06, "loss": 2.7493605613708496, "step": 633 }, { "epoch": 0.7807881773399015, "grad_norm": 16.15312048584227, "learning_rate": 9.242854416520591e-06, "loss": 2.470233917236328, "step": 634 }, { "epoch": 0.7820197044334976, "grad_norm": 11.446898989077285, "learning_rate": 9.239058638660983e-06, "loss": 2.7109014987945557, "step": 635 }, { "epoch": 0.7832512315270936, "grad_norm": 15.265461277758774, "learning_rate": 9.235254153411394e-06, "loss": 3.0344791412353516, "step": 636 }, { "epoch": 0.7844827586206896, "grad_norm": 12.820354961892846, "learning_rate": 9.231440968586572e-06, "loss": 2.381561279296875, "step": 637 }, { "epoch": 0.7857142857142857, "grad_norm": 11.033746075983524, "learning_rate": 9.227619092019116e-06, "loss": 1.716524362564087, "step": 638 }, { "epoch": 0.7869458128078818, "grad_norm": 36.36927433118522, "learning_rate": 9.223788531559495e-06, "loss": 2.591820240020752, "step": 639 }, { "epoch": 0.7881773399014779, "grad_norm": 22.998289773218893, "learning_rate": 9.219949295076006e-06, "loss": 3.0194711685180664, "step": 640 }, { "epoch": 0.7894088669950738, "grad_norm": 9.82623401522864, "learning_rate": 9.216101390454771e-06, "loss": 2.852489471435547, "step": 641 }, { "epoch": 0.7906403940886699, "grad_norm": 16.052245879830704, "learning_rate": 9.212244825599714e-06, "loss": 3.1419005393981934, "step": 642 }, { "epoch": 0.791871921182266, "grad_norm": 7.825862600095094, "learning_rate": 9.208379608432552e-06, "loss": 2.8307576179504395, "step": 643 }, { "epoch": 0.7931034482758621, "grad_norm": 8.143984458879574, "learning_rate": 9.204505746892772e-06, "loss": 2.581083297729492, "step": 644 }, { "epoch": 0.7943349753694581, "grad_norm": 18.48744043986469, "learning_rate": 9.200623248937619e-06, "loss": 2.868973731994629, "step": 645 }, { "epoch": 0.7955665024630542, "grad_norm": 8.257209013058233, "learning_rate": 9.196732122542073e-06, "loss": 2.8063859939575195, "step": 646 }, { "epoch": 0.7967980295566502, "grad_norm": 12.8457758247775, "learning_rate": 9.192832375698845e-06, "loss": 2.990504264831543, "step": 647 }, { "epoch": 0.7980295566502463, "grad_norm": 15.29216631759892, "learning_rate": 9.18892401641835e-06, "loss": 2.390320301055908, "step": 648 }, { "epoch": 0.7992610837438424, "grad_norm": 10.724837816433517, "learning_rate": 9.185007052728689e-06, "loss": 2.671368360519409, "step": 649 }, { "epoch": 0.8004926108374384, "grad_norm": 34.65249876179552, "learning_rate": 9.181081492675645e-06, "loss": 3.259225845336914, "step": 650 }, { "epoch": 0.8017241379310345, "grad_norm": 15.454469742488547, "learning_rate": 9.177147344322651e-06, "loss": 2.6810710430145264, "step": 651 }, { "epoch": 0.8029556650246306, "grad_norm": 11.530365704888945, "learning_rate": 9.173204615750792e-06, "loss": 2.833371162414551, "step": 652 }, { "epoch": 0.8041871921182266, "grad_norm": 16.732932575361076, "learning_rate": 9.169253315058764e-06, "loss": 2.3488945960998535, "step": 653 }, { "epoch": 0.8054187192118226, "grad_norm": 9.726564803680413, "learning_rate": 9.165293450362882e-06, "loss": 2.609282970428467, "step": 654 }, { "epoch": 0.8066502463054187, "grad_norm": 7.091881545178562, "learning_rate": 9.161325029797044e-06, "loss": 2.536142587661743, "step": 655 }, { "epoch": 0.8078817733990148, "grad_norm": 9.986592341017682, "learning_rate": 9.157348061512728e-06, "loss": 2.7175073623657227, "step": 656 }, { "epoch": 0.8091133004926109, "grad_norm": 8.682128121343633, "learning_rate": 9.153362553678967e-06, "loss": 2.99211049079895, "step": 657 }, { "epoch": 0.8103448275862069, "grad_norm": 9.322932294885456, "learning_rate": 9.149368514482337e-06, "loss": 2.9390807151794434, "step": 658 }, { "epoch": 0.8115763546798029, "grad_norm": 18.322306761451276, "learning_rate": 9.145365952126937e-06, "loss": 3.0422894954681396, "step": 659 }, { "epoch": 0.812807881773399, "grad_norm": 13.085537087984829, "learning_rate": 9.141354874834372e-06, "loss": 3.0573301315307617, "step": 660 }, { "epoch": 0.8140394088669951, "grad_norm": 11.125925990068074, "learning_rate": 9.13733529084374e-06, "loss": 2.5086781978607178, "step": 661 }, { "epoch": 0.8152709359605911, "grad_norm": 12.865460326379043, "learning_rate": 9.13330720841161e-06, "loss": 2.858813762664795, "step": 662 }, { "epoch": 0.8165024630541872, "grad_norm": 16.68197454357427, "learning_rate": 9.129270635812013e-06, "loss": 2.6715052127838135, "step": 663 }, { "epoch": 0.8177339901477833, "grad_norm": 8.328828299636488, "learning_rate": 9.125225581336408e-06, "loss": 3.18508243560791, "step": 664 }, { "epoch": 0.8189655172413793, "grad_norm": 12.129831350250795, "learning_rate": 9.12117205329369e-06, "loss": 3.0426509380340576, "step": 665 }, { "epoch": 0.8201970443349754, "grad_norm": 10.31532455027376, "learning_rate": 9.11711006001015e-06, "loss": 2.8654000759124756, "step": 666 }, { "epoch": 0.8214285714285714, "grad_norm": 22.312769944556898, "learning_rate": 9.113039609829472e-06, "loss": 3.141207695007324, "step": 667 }, { "epoch": 0.8226600985221675, "grad_norm": 9.864189257198062, "learning_rate": 9.108960711112709e-06, "loss": 2.3188462257385254, "step": 668 }, { "epoch": 0.8238916256157636, "grad_norm": 7.227847497482275, "learning_rate": 9.104873372238269e-06, "loss": 2.785968542098999, "step": 669 }, { "epoch": 0.8251231527093597, "grad_norm": 11.651688072805056, "learning_rate": 9.100777601601896e-06, "loss": 3.0693092346191406, "step": 670 }, { "epoch": 0.8263546798029556, "grad_norm": 14.359029220301974, "learning_rate": 9.096673407616656e-06, "loss": 3.038943290710449, "step": 671 }, { "epoch": 0.8275862068965517, "grad_norm": 11.367718044029667, "learning_rate": 9.092560798712913e-06, "loss": 3.259847640991211, "step": 672 }, { "epoch": 0.8288177339901478, "grad_norm": 7.44988788267686, "learning_rate": 9.08843978333832e-06, "loss": 2.8227295875549316, "step": 673 }, { "epoch": 0.8300492610837439, "grad_norm": 11.316814915640423, "learning_rate": 9.084310369957795e-06, "loss": 3.373309850692749, "step": 674 }, { "epoch": 0.8312807881773399, "grad_norm": 8.828902957926932, "learning_rate": 9.08017256705351e-06, "loss": 3.2833662033081055, "step": 675 }, { "epoch": 0.8325123152709359, "grad_norm": 26.42438693311499, "learning_rate": 9.076026383124863e-06, "loss": 2.7175965309143066, "step": 676 }, { "epoch": 0.833743842364532, "grad_norm": 15.34429558424053, "learning_rate": 9.071871826688472e-06, "loss": 2.594611167907715, "step": 677 }, { "epoch": 0.8349753694581281, "grad_norm": 23.79233069504134, "learning_rate": 9.067708906278155e-06, "loss": 2.8605175018310547, "step": 678 }, { "epoch": 0.8362068965517241, "grad_norm": 16.81935056764866, "learning_rate": 9.063537630444903e-06, "loss": 2.1438748836517334, "step": 679 }, { "epoch": 0.8374384236453202, "grad_norm": 10.888612008792562, "learning_rate": 9.05935800775688e-06, "loss": 2.8170299530029297, "step": 680 }, { "epoch": 0.8386699507389163, "grad_norm": 14.167748893628115, "learning_rate": 9.055170046799386e-06, "loss": 1.7328954935073853, "step": 681 }, { "epoch": 0.8399014778325123, "grad_norm": 9.011227940975711, "learning_rate": 9.050973756174852e-06, "loss": 2.8324766159057617, "step": 682 }, { "epoch": 0.8411330049261084, "grad_norm": 10.1469630150836, "learning_rate": 9.046769144502818e-06, "loss": 2.805690288543701, "step": 683 }, { "epoch": 0.8423645320197044, "grad_norm": 18.955236663194235, "learning_rate": 9.04255622041992e-06, "loss": 2.1270194053649902, "step": 684 }, { "epoch": 0.8435960591133005, "grad_norm": 15.32094380068091, "learning_rate": 9.038334992579863e-06, "loss": 2.8757829666137695, "step": 685 }, { "epoch": 0.8448275862068966, "grad_norm": 11.38695715200097, "learning_rate": 9.034105469653412e-06, "loss": 2.84549617767334, "step": 686 }, { "epoch": 0.8460591133004927, "grad_norm": 9.897557814234148, "learning_rate": 9.029867660328369e-06, "loss": 2.4058642387390137, "step": 687 }, { "epoch": 0.8472906403940886, "grad_norm": 11.793589267069729, "learning_rate": 9.025621573309559e-06, "loss": 3.2583184242248535, "step": 688 }, { "epoch": 0.8485221674876847, "grad_norm": 16.425935376287054, "learning_rate": 9.021367217318808e-06, "loss": 2.951143264770508, "step": 689 }, { "epoch": 0.8497536945812808, "grad_norm": 23.876213749579968, "learning_rate": 9.017104601094927e-06, "loss": 3.0142836570739746, "step": 690 }, { "epoch": 0.8509852216748769, "grad_norm": 6.8041557155789345, "learning_rate": 9.012833733393697e-06, "loss": 2.7629013061523438, "step": 691 }, { "epoch": 0.8522167487684729, "grad_norm": 12.775266706976657, "learning_rate": 9.008554622987845e-06, "loss": 2.6153712272644043, "step": 692 }, { "epoch": 0.853448275862069, "grad_norm": 10.104362674966435, "learning_rate": 9.004267278667032e-06, "loss": 2.7227087020874023, "step": 693 }, { "epoch": 0.854679802955665, "grad_norm": 10.955806195385584, "learning_rate": 8.999971709237832e-06, "loss": 2.7320899963378906, "step": 694 }, { "epoch": 0.8559113300492611, "grad_norm": 9.04416662510961, "learning_rate": 8.99566792352371e-06, "loss": 2.4416356086730957, "step": 695 }, { "epoch": 0.8571428571428571, "grad_norm": 23.838296750423428, "learning_rate": 8.991355930365013e-06, "loss": 3.251642942428589, "step": 696 }, { "epoch": 0.8583743842364532, "grad_norm": 46.67562045008053, "learning_rate": 8.987035738618943e-06, "loss": 2.9292666912078857, "step": 697 }, { "epoch": 0.8596059113300493, "grad_norm": 16.120654552226135, "learning_rate": 8.982707357159549e-06, "loss": 2.804452896118164, "step": 698 }, { "epoch": 0.8608374384236454, "grad_norm": 9.903594099304835, "learning_rate": 8.978370794877691e-06, "loss": 2.4997687339782715, "step": 699 }, { "epoch": 0.8620689655172413, "grad_norm": 43.24532276513338, "learning_rate": 8.974026060681044e-06, "loss": 2.459716558456421, "step": 700 }, { "epoch": 0.8633004926108374, "grad_norm": 6.407514764745252, "learning_rate": 8.969673163494063e-06, "loss": 2.57291316986084, "step": 701 }, { "epoch": 0.8645320197044335, "grad_norm": 9.925965111489338, "learning_rate": 8.965312112257973e-06, "loss": 2.6452269554138184, "step": 702 }, { "epoch": 0.8657635467980296, "grad_norm": 15.666974346483006, "learning_rate": 8.960942915930749e-06, "loss": 2.4361040592193604, "step": 703 }, { "epoch": 0.8669950738916257, "grad_norm": 12.205200732214369, "learning_rate": 8.956565583487092e-06, "loss": 2.819046974182129, "step": 704 }, { "epoch": 0.8682266009852216, "grad_norm": 23.813445037945687, "learning_rate": 8.952180123918419e-06, "loss": 3.536510944366455, "step": 705 }, { "epoch": 0.8694581280788177, "grad_norm": 19.455220333084014, "learning_rate": 8.94778654623284e-06, "loss": 3.340855121612549, "step": 706 }, { "epoch": 0.8706896551724138, "grad_norm": 15.988003472296347, "learning_rate": 8.94338485945514e-06, "loss": 2.7881288528442383, "step": 707 }, { "epoch": 0.8719211822660099, "grad_norm": 18.44911045759373, "learning_rate": 8.938975072626762e-06, "loss": 3.119422197341919, "step": 708 }, { "epoch": 0.8731527093596059, "grad_norm": 18.233236078041163, "learning_rate": 8.934557194805787e-06, "loss": 2.694553852081299, "step": 709 }, { "epoch": 0.874384236453202, "grad_norm": 13.897466836595251, "learning_rate": 8.930131235066914e-06, "loss": 2.7162301540374756, "step": 710 }, { "epoch": 0.875615763546798, "grad_norm": 9.86969530883223, "learning_rate": 8.925697202501442e-06, "loss": 2.4017574787139893, "step": 711 }, { "epoch": 0.8768472906403941, "grad_norm": 22.07024366462836, "learning_rate": 8.92125510621726e-06, "loss": 2.491663932800293, "step": 712 }, { "epoch": 0.8780788177339901, "grad_norm": 9.704458797982127, "learning_rate": 8.916804955338807e-06, "loss": 3.09323787689209, "step": 713 }, { "epoch": 0.8793103448275862, "grad_norm": 14.245234888372442, "learning_rate": 8.91234675900708e-06, "loss": 3.0273964405059814, "step": 714 }, { "epoch": 0.8805418719211823, "grad_norm": 10.033605733175728, "learning_rate": 8.907880526379594e-06, "loss": 2.5009701251983643, "step": 715 }, { "epoch": 0.8817733990147784, "grad_norm": 14.04261929200788, "learning_rate": 8.903406266630374e-06, "loss": 2.7629752159118652, "step": 716 }, { "epoch": 0.8830049261083743, "grad_norm": 19.00265649950274, "learning_rate": 8.898923988949936e-06, "loss": 2.5285563468933105, "step": 717 }, { "epoch": 0.8842364532019704, "grad_norm": 11.293266358312355, "learning_rate": 8.89443370254526e-06, "loss": 2.6903738975524902, "step": 718 }, { "epoch": 0.8854679802955665, "grad_norm": 4.918527502448237, "learning_rate": 8.88993541663978e-06, "loss": 2.8083925247192383, "step": 719 }, { "epoch": 0.8866995073891626, "grad_norm": 14.900444889845339, "learning_rate": 8.885429140473361e-06, "loss": 3.0920486450195312, "step": 720 }, { "epoch": 0.8879310344827587, "grad_norm": 15.55585461742265, "learning_rate": 8.880914883302278e-06, "loss": 2.7464776039123535, "step": 721 }, { "epoch": 0.8891625615763546, "grad_norm": 28.218307852720514, "learning_rate": 8.876392654399208e-06, "loss": 2.7022242546081543, "step": 722 }, { "epoch": 0.8903940886699507, "grad_norm": 7.9907639594026385, "learning_rate": 8.871862463053193e-06, "loss": 3.202090263366699, "step": 723 }, { "epoch": 0.8916256157635468, "grad_norm": 12.370662746549176, "learning_rate": 8.867324318569637e-06, "loss": 2.792590856552124, "step": 724 }, { "epoch": 0.8928571428571429, "grad_norm": 12.485149742498526, "learning_rate": 8.862778230270276e-06, "loss": 2.8918404579162598, "step": 725 }, { "epoch": 0.8940886699507389, "grad_norm": 17.523163987955954, "learning_rate": 8.858224207493165e-06, "loss": 2.881380081176758, "step": 726 }, { "epoch": 0.895320197044335, "grad_norm": 10.929446497515306, "learning_rate": 8.85366225959266e-06, "loss": 2.7197518348693848, "step": 727 }, { "epoch": 0.896551724137931, "grad_norm": 14.58273441890301, "learning_rate": 8.849092395939388e-06, "loss": 2.8458380699157715, "step": 728 }, { "epoch": 0.8977832512315271, "grad_norm": 9.240130544994555, "learning_rate": 8.844514625920246e-06, "loss": 2.5815629959106445, "step": 729 }, { "epoch": 0.8990147783251231, "grad_norm": 12.536324929930204, "learning_rate": 8.839928958938364e-06, "loss": 2.388244867324829, "step": 730 }, { "epoch": 0.9002463054187192, "grad_norm": 9.268565736662921, "learning_rate": 8.835335404413096e-06, "loss": 2.678809404373169, "step": 731 }, { "epoch": 0.9014778325123153, "grad_norm": 13.664345931125762, "learning_rate": 8.830733971779996e-06, "loss": 3.4926984310150146, "step": 732 }, { "epoch": 0.9027093596059114, "grad_norm": 8.38741339708261, "learning_rate": 8.826124670490804e-06, "loss": 3.143955707550049, "step": 733 }, { "epoch": 0.9039408866995073, "grad_norm": 8.285169477267281, "learning_rate": 8.821507510013416e-06, "loss": 2.30763840675354, "step": 734 }, { "epoch": 0.9051724137931034, "grad_norm": 11.658087999854533, "learning_rate": 8.816882499831877e-06, "loss": 3.2019965648651123, "step": 735 }, { "epoch": 0.9064039408866995, "grad_norm": 11.03286006250671, "learning_rate": 8.812249649446357e-06, "loss": 2.5554118156433105, "step": 736 }, { "epoch": 0.9076354679802956, "grad_norm": 10.468019775536181, "learning_rate": 8.807608968373123e-06, "loss": 2.6560721397399902, "step": 737 }, { "epoch": 0.9088669950738916, "grad_norm": 21.753543318554573, "learning_rate": 8.802960466144537e-06, "loss": 3.2792091369628906, "step": 738 }, { "epoch": 0.9100985221674877, "grad_norm": 8.801113008077715, "learning_rate": 8.798304152309019e-06, "loss": 2.4306914806365967, "step": 739 }, { "epoch": 0.9113300492610837, "grad_norm": 11.427047186823343, "learning_rate": 8.793640036431036e-06, "loss": 2.791334867477417, "step": 740 }, { "epoch": 0.9125615763546798, "grad_norm": 11.78168946860072, "learning_rate": 8.788968128091084e-06, "loss": 2.8516879081726074, "step": 741 }, { "epoch": 0.9137931034482759, "grad_norm": 18.40294226204317, "learning_rate": 8.784288436885663e-06, "loss": 2.783674716949463, "step": 742 }, { "epoch": 0.9150246305418719, "grad_norm": 9.042045966372719, "learning_rate": 8.779600972427257e-06, "loss": 2.538564443588257, "step": 743 }, { "epoch": 0.916256157635468, "grad_norm": 21.11608056647587, "learning_rate": 8.774905744344326e-06, "loss": 2.603914260864258, "step": 744 }, { "epoch": 0.9174876847290641, "grad_norm": 18.991966127623154, "learning_rate": 8.770202762281267e-06, "loss": 2.6232197284698486, "step": 745 }, { "epoch": 0.9187192118226601, "grad_norm": 9.533961363388334, "learning_rate": 8.765492035898406e-06, "loss": 2.586906671524048, "step": 746 }, { "epoch": 0.9199507389162561, "grad_norm": 11.702571386481814, "learning_rate": 8.760773574871985e-06, "loss": 3.019075870513916, "step": 747 }, { "epoch": 0.9211822660098522, "grad_norm": 13.549959986762131, "learning_rate": 8.756047388894123e-06, "loss": 2.6554617881774902, "step": 748 }, { "epoch": 0.9224137931034483, "grad_norm": 10.617389263376301, "learning_rate": 8.751313487672815e-06, "loss": 3.3622567653656006, "step": 749 }, { "epoch": 0.9236453201970444, "grad_norm": 15.62971817318244, "learning_rate": 8.746571880931896e-06, "loss": 2.748253345489502, "step": 750 }, { "epoch": 0.9248768472906403, "grad_norm": 10.680533586135248, "learning_rate": 8.741822578411036e-06, "loss": 3.358571767807007, "step": 751 }, { "epoch": 0.9261083743842364, "grad_norm": 8.513871800316197, "learning_rate": 8.737065589865709e-06, "loss": 2.707146167755127, "step": 752 }, { "epoch": 0.9273399014778325, "grad_norm": 15.06206429941032, "learning_rate": 8.732300925067177e-06, "loss": 2.782027006149292, "step": 753 }, { "epoch": 0.9285714285714286, "grad_norm": 13.377969237833796, "learning_rate": 8.727528593802469e-06, "loss": 2.758582830429077, "step": 754 }, { "epoch": 0.9298029556650246, "grad_norm": 12.5189792863405, "learning_rate": 8.722748605874365e-06, "loss": 2.798398971557617, "step": 755 }, { "epoch": 0.9310344827586207, "grad_norm": 7.0237993457565056, "learning_rate": 8.717960971101367e-06, "loss": 2.8893141746520996, "step": 756 }, { "epoch": 0.9322660098522167, "grad_norm": 13.108491345078546, "learning_rate": 8.71316569931769e-06, "loss": 2.8260703086853027, "step": 757 }, { "epoch": 0.9334975369458128, "grad_norm": 13.669452983841648, "learning_rate": 8.708362800373235e-06, "loss": 2.8373727798461914, "step": 758 }, { "epoch": 0.9347290640394089, "grad_norm": 9.979755254671996, "learning_rate": 8.703552284133565e-06, "loss": 2.7638840675354004, "step": 759 }, { "epoch": 0.9359605911330049, "grad_norm": 12.948663627163679, "learning_rate": 8.698734160479892e-06, "loss": 3.436288833618164, "step": 760 }, { "epoch": 0.937192118226601, "grad_norm": 11.570964225425659, "learning_rate": 8.69390843930906e-06, "loss": 2.9463398456573486, "step": 761 }, { "epoch": 0.9384236453201971, "grad_norm": 7.2963116550893945, "learning_rate": 8.68907513053351e-06, "loss": 2.8301844596862793, "step": 762 }, { "epoch": 0.9396551724137931, "grad_norm": 22.281531901716622, "learning_rate": 8.684234244081274e-06, "loss": 2.329922676086426, "step": 763 }, { "epoch": 0.9408866995073891, "grad_norm": 7.190935942786577, "learning_rate": 8.67938578989595e-06, "loss": 2.2752580642700195, "step": 764 }, { "epoch": 0.9421182266009852, "grad_norm": 15.09705330042877, "learning_rate": 8.674529777936674e-06, "loss": 2.549682378768921, "step": 765 }, { "epoch": 0.9433497536945813, "grad_norm": 12.2992067648861, "learning_rate": 8.669666218178114e-06, "loss": 2.177875518798828, "step": 766 }, { "epoch": 0.9445812807881774, "grad_norm": 17.93631082058447, "learning_rate": 8.66479512061044e-06, "loss": 3.4030704498291016, "step": 767 }, { "epoch": 0.9458128078817734, "grad_norm": 12.986753736790972, "learning_rate": 8.659916495239302e-06, "loss": 2.8890881538391113, "step": 768 }, { "epoch": 0.9470443349753694, "grad_norm": 7.80817017570662, "learning_rate": 8.655030352085816e-06, "loss": 2.6665287017822266, "step": 769 }, { "epoch": 0.9482758620689655, "grad_norm": 8.892699708308717, "learning_rate": 8.650136701186537e-06, "loss": 2.8044798374176025, "step": 770 }, { "epoch": 0.9495073891625616, "grad_norm": 12.053681412169821, "learning_rate": 8.645235552593447e-06, "loss": 2.809295654296875, "step": 771 }, { "epoch": 0.9507389162561576, "grad_norm": 9.563242350440067, "learning_rate": 8.640326916373923e-06, "loss": 2.66239070892334, "step": 772 }, { "epoch": 0.9519704433497537, "grad_norm": 11.397593157331492, "learning_rate": 8.635410802610724e-06, "loss": 3.0714645385742188, "step": 773 }, { "epoch": 0.9532019704433498, "grad_norm": 11.141014900339497, "learning_rate": 8.630487221401974e-06, "loss": 2.5254178047180176, "step": 774 }, { "epoch": 0.9544334975369458, "grad_norm": 61.411465635020065, "learning_rate": 8.625556182861126e-06, "loss": 2.4160585403442383, "step": 775 }, { "epoch": 0.9556650246305419, "grad_norm": 15.426050261321397, "learning_rate": 8.620617697116957e-06, "loss": 2.972367763519287, "step": 776 }, { "epoch": 0.9568965517241379, "grad_norm": 11.628713988566439, "learning_rate": 8.615671774313543e-06, "loss": 2.9206340312957764, "step": 777 }, { "epoch": 0.958128078817734, "grad_norm": 9.967877704713992, "learning_rate": 8.61071842461023e-06, "loss": 3.192002296447754, "step": 778 }, { "epoch": 0.9593596059113301, "grad_norm": 8.547648553030225, "learning_rate": 8.605757658181626e-06, "loss": 3.0840883255004883, "step": 779 }, { "epoch": 0.9605911330049262, "grad_norm": 16.72939304902535, "learning_rate": 8.60078948521757e-06, "loss": 3.344426155090332, "step": 780 }, { "epoch": 0.9618226600985221, "grad_norm": 14.860196885671575, "learning_rate": 8.595813915923113e-06, "loss": 2.887132406234741, "step": 781 }, { "epoch": 0.9630541871921182, "grad_norm": 16.504287008501006, "learning_rate": 8.590830960518502e-06, "loss": 2.354299306869507, "step": 782 }, { "epoch": 0.9642857142857143, "grad_norm": 14.601237072457945, "learning_rate": 8.585840629239158e-06, "loss": 2.574817657470703, "step": 783 }, { "epoch": 0.9655172413793104, "grad_norm": 13.581762855163804, "learning_rate": 8.580842932335644e-06, "loss": 2.3363120555877686, "step": 784 }, { "epoch": 0.9667487684729064, "grad_norm": 8.025263413179824, "learning_rate": 8.575837880073663e-06, "loss": 2.452828884124756, "step": 785 }, { "epoch": 0.9679802955665024, "grad_norm": 13.65572211743131, "learning_rate": 8.57082548273402e-06, "loss": 2.8182177543640137, "step": 786 }, { "epoch": 0.9692118226600985, "grad_norm": 22.799475456448384, "learning_rate": 8.565805750612607e-06, "loss": 3.2871310710906982, "step": 787 }, { "epoch": 0.9704433497536946, "grad_norm": 18.807286124868686, "learning_rate": 8.560778694020387e-06, "loss": 2.959153175354004, "step": 788 }, { "epoch": 0.9716748768472906, "grad_norm": 10.644957881123116, "learning_rate": 8.555744323283364e-06, "loss": 2.859107732772827, "step": 789 }, { "epoch": 0.9729064039408867, "grad_norm": 9.606245608690044, "learning_rate": 8.550702648742566e-06, "loss": 2.8537421226501465, "step": 790 }, { "epoch": 0.9741379310344828, "grad_norm": 11.364684038946328, "learning_rate": 8.545653680754029e-06, "loss": 2.77693772315979, "step": 791 }, { "epoch": 0.9753694581280788, "grad_norm": 14.67534992412754, "learning_rate": 8.540597429688761e-06, "loss": 2.6960999965667725, "step": 792 }, { "epoch": 0.9766009852216748, "grad_norm": 14.854511519014162, "learning_rate": 8.535533905932739e-06, "loss": 3.3942298889160156, "step": 793 }, { "epoch": 0.9778325123152709, "grad_norm": 14.090660071520212, "learning_rate": 8.530463119886871e-06, "loss": 2.8664398193359375, "step": 794 }, { "epoch": 0.979064039408867, "grad_norm": 15.427403822127253, "learning_rate": 8.525385081966992e-06, "loss": 3.023148536682129, "step": 795 }, { "epoch": 0.9802955665024631, "grad_norm": 27.257958140053717, "learning_rate": 8.520299802603826e-06, "loss": 2.7858657836914062, "step": 796 }, { "epoch": 0.9815270935960592, "grad_norm": 9.983005237782791, "learning_rate": 8.515207292242969e-06, "loss": 2.4665451049804688, "step": 797 }, { "epoch": 0.9827586206896551, "grad_norm": 11.230050254551738, "learning_rate": 8.510107561344876e-06, "loss": 2.412269115447998, "step": 798 }, { "epoch": 0.9839901477832512, "grad_norm": 18.314579409480903, "learning_rate": 8.505000620384834e-06, "loss": 3.08200740814209, "step": 799 }, { "epoch": 0.9852216748768473, "grad_norm": 12.337382000838234, "learning_rate": 8.499886479852935e-06, "loss": 2.851126194000244, "step": 800 }, { "epoch": 0.9864532019704434, "grad_norm": 16.588814488060716, "learning_rate": 8.494765150254063e-06, "loss": 2.7692008018493652, "step": 801 }, { "epoch": 0.9876847290640394, "grad_norm": 10.778667289136193, "learning_rate": 8.489636642107867e-06, "loss": 2.045649290084839, "step": 802 }, { "epoch": 0.9889162561576355, "grad_norm": 16.235817598925898, "learning_rate": 8.484500965948746e-06, "loss": 3.0901870727539062, "step": 803 }, { "epoch": 0.9901477832512315, "grad_norm": 12.772148604340376, "learning_rate": 8.479358132325815e-06, "loss": 4.652253150939941, "step": 804 }, { "epoch": 0.9913793103448276, "grad_norm": 30.743685192648066, "learning_rate": 8.474208151802898e-06, "loss": 3.992189884185791, "step": 805 }, { "epoch": 0.9926108374384236, "grad_norm": 8.73281768145785, "learning_rate": 8.469051034958496e-06, "loss": 2.7150464057922363, "step": 806 }, { "epoch": 0.9938423645320197, "grad_norm": 9.053303002827397, "learning_rate": 8.46388679238577e-06, "loss": 2.807770013809204, "step": 807 }, { "epoch": 0.9950738916256158, "grad_norm": 10.322870900342917, "learning_rate": 8.458715434692515e-06, "loss": 2.386625289916992, "step": 808 }, { "epoch": 0.9963054187192119, "grad_norm": 11.08968761753187, "learning_rate": 8.453536972501146e-06, "loss": 2.585855484008789, "step": 809 }, { "epoch": 0.9975369458128078, "grad_norm": 17.867602225530977, "learning_rate": 8.448351416448664e-06, "loss": 1.9756630659103394, "step": 810 }, { "epoch": 0.9987684729064039, "grad_norm": 10.119397987976452, "learning_rate": 8.443158777186652e-06, "loss": 2.844794511795044, "step": 811 }, { "epoch": 1.0, "grad_norm": 7.980679156666685, "learning_rate": 8.437959065381232e-06, "loss": 2.8835721015930176, "step": 812 }, { "epoch": 1.001231527093596, "grad_norm": 7.910274895398585, "learning_rate": 8.432752291713058e-06, "loss": 1.4173179864883423, "step": 813 }, { "epoch": 1.0024630541871922, "grad_norm": 11.748384071481883, "learning_rate": 8.427538466877294e-06, "loss": 1.3743655681610107, "step": 814 }, { "epoch": 1.0036945812807883, "grad_norm": 15.520903995356328, "learning_rate": 8.422317601583576e-06, "loss": 1.448968768119812, "step": 815 }, { "epoch": 1.0049261083743843, "grad_norm": 10.900297712673185, "learning_rate": 8.417089706556015e-06, "loss": 1.4555410146713257, "step": 816 }, { "epoch": 1.0061576354679802, "grad_norm": 14.944365989075473, "learning_rate": 8.411854792533154e-06, "loss": 1.3096075057983398, "step": 817 }, { "epoch": 1.0073891625615763, "grad_norm": 28.47454569698464, "learning_rate": 8.406612870267957e-06, "loss": 1.8452348709106445, "step": 818 }, { "epoch": 1.0086206896551724, "grad_norm": 15.756002610301957, "learning_rate": 8.401363950527777e-06, "loss": 1.6339285373687744, "step": 819 }, { "epoch": 1.0098522167487685, "grad_norm": 6.289340790151406, "learning_rate": 8.39610804409435e-06, "loss": 1.714133381843567, "step": 820 }, { "epoch": 1.0110837438423645, "grad_norm": 11.713574774158978, "learning_rate": 8.390845161763756e-06, "loss": 1.7810550928115845, "step": 821 }, { "epoch": 1.0123152709359606, "grad_norm": 13.688437053039554, "learning_rate": 8.385575314346408e-06, "loss": 1.2523250579833984, "step": 822 }, { "epoch": 1.0135467980295567, "grad_norm": 9.835238587520983, "learning_rate": 8.380298512667023e-06, "loss": 1.4618515968322754, "step": 823 }, { "epoch": 1.0147783251231528, "grad_norm": 12.580368500055666, "learning_rate": 8.375014767564606e-06, "loss": 1.5188508033752441, "step": 824 }, { "epoch": 1.0160098522167487, "grad_norm": 13.76649655840591, "learning_rate": 8.369724089892423e-06, "loss": 1.3847301006317139, "step": 825 }, { "epoch": 1.0172413793103448, "grad_norm": 10.435853268719002, "learning_rate": 8.364426490517978e-06, "loss": 1.2926149368286133, "step": 826 }, { "epoch": 1.0184729064039408, "grad_norm": 16.445003227804108, "learning_rate": 8.359121980322992e-06, "loss": 2.3063907623291016, "step": 827 }, { "epoch": 1.019704433497537, "grad_norm": 11.557235656795728, "learning_rate": 8.353810570203392e-06, "loss": 1.8268505334854126, "step": 828 }, { "epoch": 1.020935960591133, "grad_norm": 14.632274264873946, "learning_rate": 8.34849227106926e-06, "loss": 1.7018903493881226, "step": 829 }, { "epoch": 1.022167487684729, "grad_norm": 11.600489411721503, "learning_rate": 8.343167093844847e-06, "loss": 1.228044867515564, "step": 830 }, { "epoch": 1.0233990147783252, "grad_norm": 16.088239405853525, "learning_rate": 8.337835049468517e-06, "loss": 1.8953372240066528, "step": 831 }, { "epoch": 1.0246305418719213, "grad_norm": 18.96191614490354, "learning_rate": 8.332496148892748e-06, "loss": 2.2595765590667725, "step": 832 }, { "epoch": 1.0258620689655173, "grad_norm": 15.40920733163635, "learning_rate": 8.327150403084105e-06, "loss": 1.9772108793258667, "step": 833 }, { "epoch": 1.0270935960591132, "grad_norm": 13.682030994380478, "learning_rate": 8.321797823023201e-06, "loss": 1.6397690773010254, "step": 834 }, { "epoch": 1.0283251231527093, "grad_norm": 15.155038881668695, "learning_rate": 8.3164384197047e-06, "loss": 1.8092628717422485, "step": 835 }, { "epoch": 1.0295566502463054, "grad_norm": 11.138568264810678, "learning_rate": 8.311072204137272e-06, "loss": 1.4974594116210938, "step": 836 }, { "epoch": 1.0307881773399015, "grad_norm": 12.21109867389211, "learning_rate": 8.305699187343586e-06, "loss": 1.6198664903640747, "step": 837 }, { "epoch": 1.0320197044334976, "grad_norm": 15.324750685835358, "learning_rate": 8.300319380360278e-06, "loss": 1.3746960163116455, "step": 838 }, { "epoch": 1.0332512315270936, "grad_norm": 7.824249576144248, "learning_rate": 8.294932794237936e-06, "loss": 1.6171293258666992, "step": 839 }, { "epoch": 1.0344827586206897, "grad_norm": 8.892333167572344, "learning_rate": 8.289539440041066e-06, "loss": 1.569738507270813, "step": 840 }, { "epoch": 1.0357142857142858, "grad_norm": 11.852198048161208, "learning_rate": 8.284139328848083e-06, "loss": 1.2823517322540283, "step": 841 }, { "epoch": 1.0369458128078817, "grad_norm": 8.261136034676777, "learning_rate": 8.278732471751275e-06, "loss": 1.646303415298462, "step": 842 }, { "epoch": 1.0381773399014778, "grad_norm": 10.756475200770923, "learning_rate": 8.273318879856794e-06, "loss": 1.1557375192642212, "step": 843 }, { "epoch": 1.0394088669950738, "grad_norm": 11.706598803766697, "learning_rate": 8.26789856428462e-06, "loss": 1.8793773651123047, "step": 844 }, { "epoch": 1.04064039408867, "grad_norm": 12.96726521358098, "learning_rate": 8.262471536168547e-06, "loss": 1.8577170372009277, "step": 845 }, { "epoch": 1.041871921182266, "grad_norm": 9.437922676603566, "learning_rate": 8.257037806656156e-06, "loss": 1.6104650497436523, "step": 846 }, { "epoch": 1.043103448275862, "grad_norm": 9.578661144979, "learning_rate": 8.251597386908791e-06, "loss": 1.5425922870635986, "step": 847 }, { "epoch": 1.0443349753694582, "grad_norm": 20.263987667471525, "learning_rate": 8.246150288101544e-06, "loss": 1.681383728981018, "step": 848 }, { "epoch": 1.0455665024630543, "grad_norm": 13.601576634163374, "learning_rate": 8.240696521423221e-06, "loss": 1.7646219730377197, "step": 849 }, { "epoch": 1.0467980295566504, "grad_norm": 7.679649660703675, "learning_rate": 8.23523609807633e-06, "loss": 1.445223331451416, "step": 850 }, { "epoch": 1.0480295566502462, "grad_norm": 14.66829985016366, "learning_rate": 8.229769029277044e-06, "loss": 0.9492518901824951, "step": 851 }, { "epoch": 1.0492610837438423, "grad_norm": 10.487758371701569, "learning_rate": 8.224295326255194e-06, "loss": 1.33433997631073, "step": 852 }, { "epoch": 1.0504926108374384, "grad_norm": 10.533804685248148, "learning_rate": 8.218815000254233e-06, "loss": 1.712221384048462, "step": 853 }, { "epoch": 1.0517241379310345, "grad_norm": 9.208819021387981, "learning_rate": 8.213328062531223e-06, "loss": 2.256254196166992, "step": 854 }, { "epoch": 1.0529556650246306, "grad_norm": 20.4330836347585, "learning_rate": 8.207834524356804e-06, "loss": 1.1827871799468994, "step": 855 }, { "epoch": 1.0541871921182266, "grad_norm": 16.459676535775454, "learning_rate": 8.202334397015173e-06, "loss": 1.831944465637207, "step": 856 }, { "epoch": 1.0554187192118227, "grad_norm": 9.540607740889314, "learning_rate": 8.196827691804066e-06, "loss": 1.4239716529846191, "step": 857 }, { "epoch": 1.0566502463054188, "grad_norm": 8.826612392912715, "learning_rate": 8.191314420034728e-06, "loss": 1.4468379020690918, "step": 858 }, { "epoch": 1.0578817733990147, "grad_norm": 11.710928299860754, "learning_rate": 8.185794593031889e-06, "loss": 1.5082018375396729, "step": 859 }, { "epoch": 1.0591133004926108, "grad_norm": 11.098469341339896, "learning_rate": 8.180268222133748e-06, "loss": 1.7838118076324463, "step": 860 }, { "epoch": 1.0603448275862069, "grad_norm": 14.517325254327519, "learning_rate": 8.174735318691946e-06, "loss": 2.0072226524353027, "step": 861 }, { "epoch": 1.061576354679803, "grad_norm": 15.816554295123568, "learning_rate": 8.16919589407154e-06, "loss": 1.521295189857483, "step": 862 }, { "epoch": 1.062807881773399, "grad_norm": 10.07588615463877, "learning_rate": 8.163649959650983e-06, "loss": 1.790357232093811, "step": 863 }, { "epoch": 1.064039408866995, "grad_norm": 12.92318973646725, "learning_rate": 8.1580975268221e-06, "loss": 1.602294683456421, "step": 864 }, { "epoch": 1.0652709359605912, "grad_norm": 16.86268483373184, "learning_rate": 8.152538606990065e-06, "loss": 1.4220796823501587, "step": 865 }, { "epoch": 1.0665024630541873, "grad_norm": 8.194415784575718, "learning_rate": 8.146973211573378e-06, "loss": 1.5728261470794678, "step": 866 }, { "epoch": 1.0677339901477834, "grad_norm": 9.338981810977407, "learning_rate": 8.141401352003834e-06, "loss": 1.4759845733642578, "step": 867 }, { "epoch": 1.0689655172413792, "grad_norm": 13.09579029321424, "learning_rate": 8.135823039726513e-06, "loss": 1.0524405241012573, "step": 868 }, { "epoch": 1.0701970443349753, "grad_norm": 11.844876838448121, "learning_rate": 8.130238286199747e-06, "loss": 1.538460373878479, "step": 869 }, { "epoch": 1.0714285714285714, "grad_norm": 14.772231246122598, "learning_rate": 8.124647102895098e-06, "loss": 1.1455146074295044, "step": 870 }, { "epoch": 1.0726600985221675, "grad_norm": 6.428068633502984, "learning_rate": 8.119049501297336e-06, "loss": 1.5209722518920898, "step": 871 }, { "epoch": 1.0738916256157636, "grad_norm": 8.28556104097166, "learning_rate": 8.113445492904416e-06, "loss": 1.359959602355957, "step": 872 }, { "epoch": 1.0751231527093597, "grad_norm": 17.73488508571987, "learning_rate": 8.107835089227446e-06, "loss": 0.7508935928344727, "step": 873 }, { "epoch": 1.0763546798029557, "grad_norm": 11.851747710913228, "learning_rate": 8.102218301790686e-06, "loss": 1.1200660467147827, "step": 874 }, { "epoch": 1.0775862068965518, "grad_norm": 19.474238137735632, "learning_rate": 8.096595142131491e-06, "loss": 1.4502555131912231, "step": 875 }, { "epoch": 1.0788177339901477, "grad_norm": 15.231876740076657, "learning_rate": 8.090965621800317e-06, "loss": 1.4533472061157227, "step": 876 }, { "epoch": 1.0800492610837438, "grad_norm": 11.532100577512736, "learning_rate": 8.085329752360683e-06, "loss": 1.3467981815338135, "step": 877 }, { "epoch": 1.0812807881773399, "grad_norm": 13.292362259628844, "learning_rate": 8.079687545389144e-06, "loss": 1.5720915794372559, "step": 878 }, { "epoch": 1.082512315270936, "grad_norm": 9.912980730028881, "learning_rate": 8.074039012475277e-06, "loss": 0.9794504642486572, "step": 879 }, { "epoch": 1.083743842364532, "grad_norm": 13.363222552608596, "learning_rate": 8.068384165221657e-06, "loss": 1.8581080436706543, "step": 880 }, { "epoch": 1.0849753694581281, "grad_norm": 11.004102766432679, "learning_rate": 8.062723015243821e-06, "loss": 1.5307658910751343, "step": 881 }, { "epoch": 1.0862068965517242, "grad_norm": 18.014628524050508, "learning_rate": 8.05705557417026e-06, "loss": 2.7890782356262207, "step": 882 }, { "epoch": 1.0874384236453203, "grad_norm": 14.288061386453462, "learning_rate": 8.051381853642385e-06, "loss": 1.7938904762268066, "step": 883 }, { "epoch": 1.0886699507389164, "grad_norm": 10.969422494881371, "learning_rate": 8.0457018653145e-06, "loss": 1.7228388786315918, "step": 884 }, { "epoch": 1.0899014778325122, "grad_norm": 12.323796763628843, "learning_rate": 8.04001562085379e-06, "loss": 1.2761911153793335, "step": 885 }, { "epoch": 1.0911330049261083, "grad_norm": 14.027385869484647, "learning_rate": 8.034323131940288e-06, "loss": 1.2001762390136719, "step": 886 }, { "epoch": 1.0923645320197044, "grad_norm": 14.618738176876956, "learning_rate": 8.028624410266856e-06, "loss": 1.0602792501449585, "step": 887 }, { "epoch": 1.0935960591133005, "grad_norm": 11.93157233511751, "learning_rate": 8.022919467539157e-06, "loss": 1.6093053817749023, "step": 888 }, { "epoch": 1.0948275862068966, "grad_norm": 10.808992515441345, "learning_rate": 8.017208315475633e-06, "loss": 1.3845837116241455, "step": 889 }, { "epoch": 1.0960591133004927, "grad_norm": 12.467752533525676, "learning_rate": 8.011490965807479e-06, "loss": 1.170523762702942, "step": 890 }, { "epoch": 1.0972906403940887, "grad_norm": 17.336013797078692, "learning_rate": 8.005767430278619e-06, "loss": 2.2524640560150146, "step": 891 }, { "epoch": 1.0985221674876848, "grad_norm": 15.86628802074285, "learning_rate": 8.00003772064569e-06, "loss": 1.900492787361145, "step": 892 }, { "epoch": 1.0997536945812807, "grad_norm": 19.413325130840665, "learning_rate": 7.994301848678006e-06, "loss": 1.9371180534362793, "step": 893 }, { "epoch": 1.1009852216748768, "grad_norm": 4.577148785717797, "learning_rate": 7.98855982615754e-06, "loss": 0.5737314224243164, "step": 894 }, { "epoch": 1.1022167487684729, "grad_norm": 10.864604119199031, "learning_rate": 7.982811664878897e-06, "loss": 1.9806501865386963, "step": 895 }, { "epoch": 1.103448275862069, "grad_norm": 8.224536911257772, "learning_rate": 7.977057376649295e-06, "loss": 1.0362755060195923, "step": 896 }, { "epoch": 1.104679802955665, "grad_norm": 13.847190655637428, "learning_rate": 7.971296973288534e-06, "loss": 1.70633864402771, "step": 897 }, { "epoch": 1.1059113300492611, "grad_norm": 11.90483842365472, "learning_rate": 7.965530466628977e-06, "loss": 1.787100911140442, "step": 898 }, { "epoch": 1.1071428571428572, "grad_norm": 7.493522717607931, "learning_rate": 7.959757868515526e-06, "loss": 1.725630283355713, "step": 899 }, { "epoch": 1.1083743842364533, "grad_norm": 12.386314393672189, "learning_rate": 7.953979190805587e-06, "loss": 1.216347575187683, "step": 900 }, { "epoch": 1.1096059113300494, "grad_norm": 13.629660364524488, "learning_rate": 7.948194445369065e-06, "loss": 1.4683033227920532, "step": 901 }, { "epoch": 1.1108374384236452, "grad_norm": 9.487923792239608, "learning_rate": 7.942403644088319e-06, "loss": 1.1516010761260986, "step": 902 }, { "epoch": 1.1120689655172413, "grad_norm": 10.340810165841779, "learning_rate": 7.936606798858154e-06, "loss": 1.9040346145629883, "step": 903 }, { "epoch": 1.1133004926108374, "grad_norm": 10.742162155829218, "learning_rate": 7.930803921585787e-06, "loss": 1.3092480897903442, "step": 904 }, { "epoch": 1.1145320197044335, "grad_norm": 16.471340717748625, "learning_rate": 7.924995024190825e-06, "loss": 1.5384130477905273, "step": 905 }, { "epoch": 1.1157635467980296, "grad_norm": 11.414793353837775, "learning_rate": 7.91918011860524e-06, "loss": 1.537634015083313, "step": 906 }, { "epoch": 1.1169950738916257, "grad_norm": 12.176064899819426, "learning_rate": 7.91335921677335e-06, "loss": 1.7487473487854004, "step": 907 }, { "epoch": 1.1182266009852218, "grad_norm": 12.781345279460623, "learning_rate": 7.907532330651784e-06, "loss": 2.079786539077759, "step": 908 }, { "epoch": 1.1194581280788178, "grad_norm": 10.30058954805613, "learning_rate": 7.901699472209467e-06, "loss": 1.8143104314804077, "step": 909 }, { "epoch": 1.1206896551724137, "grad_norm": 15.820572235657158, "learning_rate": 7.89586065342759e-06, "loss": 1.532914161682129, "step": 910 }, { "epoch": 1.1219211822660098, "grad_norm": 26.078680608781927, "learning_rate": 7.890015886299587e-06, "loss": 1.2643623352050781, "step": 911 }, { "epoch": 1.1231527093596059, "grad_norm": 15.92927259283418, "learning_rate": 7.884165182831112e-06, "loss": 1.9245643615722656, "step": 912 }, { "epoch": 1.124384236453202, "grad_norm": 8.730585299979154, "learning_rate": 7.878308555040012e-06, "loss": 1.7177766561508179, "step": 913 }, { "epoch": 1.125615763546798, "grad_norm": 13.722962990198047, "learning_rate": 7.872446014956302e-06, "loss": 1.8152745962142944, "step": 914 }, { "epoch": 1.1268472906403941, "grad_norm": 12.040054937289696, "learning_rate": 7.86657757462214e-06, "loss": 1.1599400043487549, "step": 915 }, { "epoch": 1.1280788177339902, "grad_norm": 17.03991328119548, "learning_rate": 7.860703246091808e-06, "loss": 2.191415786743164, "step": 916 }, { "epoch": 1.1293103448275863, "grad_norm": 8.884816055359531, "learning_rate": 7.85482304143168e-06, "loss": 1.395401120185852, "step": 917 }, { "epoch": 1.1305418719211824, "grad_norm": 10.016142876641439, "learning_rate": 7.848936972720203e-06, "loss": 1.3161064386367798, "step": 918 }, { "epoch": 1.1317733990147782, "grad_norm": 10.950651931490869, "learning_rate": 7.843045052047863e-06, "loss": 1.1442368030548096, "step": 919 }, { "epoch": 1.1330049261083743, "grad_norm": 11.684566217639523, "learning_rate": 7.837147291517172e-06, "loss": 1.7718126773834229, "step": 920 }, { "epoch": 1.1342364532019704, "grad_norm": 38.19632435773612, "learning_rate": 7.831243703242636e-06, "loss": 0.8722761869430542, "step": 921 }, { "epoch": 1.1354679802955665, "grad_norm": 13.481663274756508, "learning_rate": 7.825334299350733e-06, "loss": 1.5427806377410889, "step": 922 }, { "epoch": 1.1366995073891626, "grad_norm": 12.916623808621747, "learning_rate": 7.819419091979884e-06, "loss": 1.1668936014175415, "step": 923 }, { "epoch": 1.1379310344827587, "grad_norm": 33.988394562573184, "learning_rate": 7.813498093280432e-06, "loss": 1.1266424655914307, "step": 924 }, { "epoch": 1.1391625615763548, "grad_norm": 12.20456485780647, "learning_rate": 7.807571315414616e-06, "loss": 1.493699550628662, "step": 925 }, { "epoch": 1.1403940886699506, "grad_norm": 11.501099824006364, "learning_rate": 7.801638770556547e-06, "loss": 1.6297705173492432, "step": 926 }, { "epoch": 1.1416256157635467, "grad_norm": 15.624448888450939, "learning_rate": 7.795700470892177e-06, "loss": 2.0215024948120117, "step": 927 }, { "epoch": 1.1428571428571428, "grad_norm": 16.250949070025708, "learning_rate": 7.78975642861929e-06, "loss": 1.6887433528900146, "step": 928 }, { "epoch": 1.1440886699507389, "grad_norm": 11.317008900299918, "learning_rate": 7.783806655947454e-06, "loss": 1.3021103143692017, "step": 929 }, { "epoch": 1.145320197044335, "grad_norm": 18.00432398689311, "learning_rate": 7.777851165098012e-06, "loss": 1.2565847635269165, "step": 930 }, { "epoch": 1.146551724137931, "grad_norm": 12.425268826770786, "learning_rate": 7.771889968304054e-06, "loss": 2.616732358932495, "step": 931 }, { "epoch": 1.1477832512315271, "grad_norm": 8.224670550968264, "learning_rate": 7.765923077810389e-06, "loss": 1.4130675792694092, "step": 932 }, { "epoch": 1.1490147783251232, "grad_norm": 10.969684493935905, "learning_rate": 7.759950505873523e-06, "loss": 1.4476386308670044, "step": 933 }, { "epoch": 1.1502463054187193, "grad_norm": 11.651048950094761, "learning_rate": 7.753972264761629e-06, "loss": 2.25156307220459, "step": 934 }, { "epoch": 1.1514778325123154, "grad_norm": 8.613574530576384, "learning_rate": 7.747988366754529e-06, "loss": 1.5051602125167847, "step": 935 }, { "epoch": 1.1527093596059113, "grad_norm": 7.732488282674765, "learning_rate": 7.74199882414366e-06, "loss": 1.6275739669799805, "step": 936 }, { "epoch": 1.1539408866995073, "grad_norm": 8.808852629450387, "learning_rate": 7.736003649232058e-06, "loss": 1.595947504043579, "step": 937 }, { "epoch": 1.1551724137931034, "grad_norm": 9.458208308368622, "learning_rate": 7.730002854334328e-06, "loss": 1.4467124938964844, "step": 938 }, { "epoch": 1.1564039408866995, "grad_norm": 9.214195809195965, "learning_rate": 7.723996451776615e-06, "loss": 1.2888911962509155, "step": 939 }, { "epoch": 1.1576354679802956, "grad_norm": 9.788392349003187, "learning_rate": 7.717984453896585e-06, "loss": 1.2005081176757812, "step": 940 }, { "epoch": 1.1588669950738917, "grad_norm": 13.47176609715776, "learning_rate": 7.711966873043396e-06, "loss": 1.5737872123718262, "step": 941 }, { "epoch": 1.1600985221674878, "grad_norm": 14.995704151739991, "learning_rate": 7.705943721577679e-06, "loss": 1.929309368133545, "step": 942 }, { "epoch": 1.1613300492610836, "grad_norm": 17.48600802078703, "learning_rate": 7.699915011871502e-06, "loss": 1.2395710945129395, "step": 943 }, { "epoch": 1.1625615763546797, "grad_norm": 17.02963003158409, "learning_rate": 7.693880756308349e-06, "loss": 1.5058845281600952, "step": 944 }, { "epoch": 1.1637931034482758, "grad_norm": 9.980347268918823, "learning_rate": 7.687840967283102e-06, "loss": 1.1811325550079346, "step": 945 }, { "epoch": 1.1650246305418719, "grad_norm": 10.638678008803145, "learning_rate": 7.681795657202004e-06, "loss": 1.0631262063980103, "step": 946 }, { "epoch": 1.166256157635468, "grad_norm": 13.280226823401785, "learning_rate": 7.675744838482641e-06, "loss": 1.8445112705230713, "step": 947 }, { "epoch": 1.167487684729064, "grad_norm": 14.581956189852988, "learning_rate": 7.669688523553913e-06, "loss": 0.4735199511051178, "step": 948 }, { "epoch": 1.1687192118226601, "grad_norm": 17.412681962110952, "learning_rate": 7.66362672485601e-06, "loss": 2.7862026691436768, "step": 949 }, { "epoch": 1.1699507389162562, "grad_norm": 15.996981867868751, "learning_rate": 7.657559454840386e-06, "loss": 2.1690142154693604, "step": 950 }, { "epoch": 1.1711822660098523, "grad_norm": 13.46492564795987, "learning_rate": 7.651486725969736e-06, "loss": 1.7143161296844482, "step": 951 }, { "epoch": 1.1724137931034484, "grad_norm": 60.546763405202356, "learning_rate": 7.645408550717966e-06, "loss": 1.5288606882095337, "step": 952 }, { "epoch": 1.1736453201970443, "grad_norm": 20.830833617022666, "learning_rate": 7.639324941570165e-06, "loss": 1.8929002285003662, "step": 953 }, { "epoch": 1.1748768472906403, "grad_norm": 11.758979912185547, "learning_rate": 7.633235911022592e-06, "loss": 1.5853391885757446, "step": 954 }, { "epoch": 1.1761083743842364, "grad_norm": 9.321138258104417, "learning_rate": 7.627141471582635e-06, "loss": 1.1136324405670166, "step": 955 }, { "epoch": 1.1773399014778325, "grad_norm": 12.598497007373025, "learning_rate": 7.6210416357687975e-06, "loss": 1.868667721748352, "step": 956 }, { "epoch": 1.1785714285714286, "grad_norm": 18.119098704002848, "learning_rate": 7.614936416110668e-06, "loss": 1.5594688653945923, "step": 957 }, { "epoch": 1.1798029556650247, "grad_norm": 12.510268205050629, "learning_rate": 7.6088258251488845e-06, "loss": 2.3145830631256104, "step": 958 }, { "epoch": 1.1810344827586208, "grad_norm": 21.45877658729593, "learning_rate": 7.6027098754351306e-06, "loss": 1.1473604440689087, "step": 959 }, { "epoch": 1.1822660098522166, "grad_norm": 14.411977842812997, "learning_rate": 7.596588579532087e-06, "loss": 2.2835638523101807, "step": 960 }, { "epoch": 1.1834975369458127, "grad_norm": 10.612962818159787, "learning_rate": 7.590461950013424e-06, "loss": 1.8787577152252197, "step": 961 }, { "epoch": 1.1847290640394088, "grad_norm": 14.448843378652771, "learning_rate": 7.584329999463763e-06, "loss": 2.114804983139038, "step": 962 }, { "epoch": 1.185960591133005, "grad_norm": 18.66312529631292, "learning_rate": 7.578192740478656e-06, "loss": 1.288927435874939, "step": 963 }, { "epoch": 1.187192118226601, "grad_norm": 13.413800953526167, "learning_rate": 7.572050185664558e-06, "loss": 1.929607629776001, "step": 964 }, { "epoch": 1.188423645320197, "grad_norm": 33.30553598268168, "learning_rate": 7.565902347638806e-06, "loss": 0.5397343039512634, "step": 965 }, { "epoch": 1.1896551724137931, "grad_norm": 22.357001178408265, "learning_rate": 7.559749239029584e-06, "loss": 1.1908174753189087, "step": 966 }, { "epoch": 1.1908866995073892, "grad_norm": 12.645033432851402, "learning_rate": 7.553590872475909e-06, "loss": 1.624518632888794, "step": 967 }, { "epoch": 1.1921182266009853, "grad_norm": 7.88579724345472, "learning_rate": 7.547427260627586e-06, "loss": 1.3011376857757568, "step": 968 }, { "epoch": 1.1933497536945814, "grad_norm": 12.668296763355277, "learning_rate": 7.541258416145212e-06, "loss": 1.2930490970611572, "step": 969 }, { "epoch": 1.1945812807881773, "grad_norm": 13.656364437533624, "learning_rate": 7.535084351700117e-06, "loss": 1.34272038936615, "step": 970 }, { "epoch": 1.1958128078817734, "grad_norm": 7.953764967047039, "learning_rate": 7.528905079974358e-06, "loss": 1.2804269790649414, "step": 971 }, { "epoch": 1.1970443349753694, "grad_norm": 30.30009152991955, "learning_rate": 7.522720613660691e-06, "loss": 1.7138396501541138, "step": 972 }, { "epoch": 1.1982758620689655, "grad_norm": 11.304720421109014, "learning_rate": 7.5165309654625405e-06, "loss": 1.7358574867248535, "step": 973 }, { "epoch": 1.1995073891625616, "grad_norm": 12.764936977199811, "learning_rate": 7.510336148093975e-06, "loss": 1.0514552593231201, "step": 974 }, { "epoch": 1.2007389162561577, "grad_norm": 13.712017805285841, "learning_rate": 7.504136174279679e-06, "loss": 1.7314313650131226, "step": 975 }, { "epoch": 1.2019704433497538, "grad_norm": 10.549295388514395, "learning_rate": 7.4979310567549315e-06, "loss": 1.0069202184677124, "step": 976 }, { "epoch": 1.2032019704433496, "grad_norm": 11.995004609846932, "learning_rate": 7.491720808265576e-06, "loss": 1.1851680278778076, "step": 977 }, { "epoch": 1.2044334975369457, "grad_norm": 9.145447142909285, "learning_rate": 7.485505441567995e-06, "loss": 1.355776309967041, "step": 978 }, { "epoch": 1.2056650246305418, "grad_norm": 12.426586307445273, "learning_rate": 7.4792849694290846e-06, "loss": 1.5034677982330322, "step": 979 }, { "epoch": 1.206896551724138, "grad_norm": 10.349726791509415, "learning_rate": 7.473059404626229e-06, "loss": 1.9321900606155396, "step": 980 }, { "epoch": 1.208128078817734, "grad_norm": 15.998756607416226, "learning_rate": 7.466828759947271e-06, "loss": 1.4899095296859741, "step": 981 }, { "epoch": 1.20935960591133, "grad_norm": 9.148483453369403, "learning_rate": 7.46059304819049e-06, "loss": 1.9984737634658813, "step": 982 }, { "epoch": 1.2105911330049262, "grad_norm": 14.110455851158502, "learning_rate": 7.454352282164572e-06, "loss": 1.7756625413894653, "step": 983 }, { "epoch": 1.2118226600985222, "grad_norm": 14.856359846911952, "learning_rate": 7.448106474688588e-06, "loss": 1.47117018699646, "step": 984 }, { "epoch": 1.2130541871921183, "grad_norm": 11.010014718420686, "learning_rate": 7.441855638591958e-06, "loss": 1.3485603332519531, "step": 985 }, { "epoch": 1.2142857142857142, "grad_norm": 9.111669104291623, "learning_rate": 7.435599786714438e-06, "loss": 1.3982055187225342, "step": 986 }, { "epoch": 1.2155172413793103, "grad_norm": 8.494506145789243, "learning_rate": 7.429338931906085e-06, "loss": 1.4942795038223267, "step": 987 }, { "epoch": 1.2167487684729064, "grad_norm": 10.475857134873458, "learning_rate": 7.423073087027228e-06, "loss": 2.227587938308716, "step": 988 }, { "epoch": 1.2179802955665024, "grad_norm": 14.131512244457296, "learning_rate": 7.416802264948455e-06, "loss": 1.523234486579895, "step": 989 }, { "epoch": 1.2192118226600985, "grad_norm": 26.011485441346537, "learning_rate": 7.410526478550568e-06, "loss": 3.9873814582824707, "step": 990 }, { "epoch": 1.2204433497536946, "grad_norm": 8.306933788704631, "learning_rate": 7.404245740724573e-06, "loss": 1.279615044593811, "step": 991 }, { "epoch": 1.2216748768472907, "grad_norm": 9.109406755351628, "learning_rate": 7.3979600643716435e-06, "loss": 0.9347010850906372, "step": 992 }, { "epoch": 1.2229064039408868, "grad_norm": 8.57513677802596, "learning_rate": 7.391669462403096e-06, "loss": 1.9017002582550049, "step": 993 }, { "epoch": 1.2241379310344827, "grad_norm": 10.325069084719962, "learning_rate": 7.385373947740369e-06, "loss": 1.7247897386550903, "step": 994 }, { "epoch": 1.2253694581280787, "grad_norm": 13.648497855444653, "learning_rate": 7.379073533314988e-06, "loss": 0.7111251950263977, "step": 995 }, { "epoch": 1.2266009852216748, "grad_norm": 10.812707758109589, "learning_rate": 7.372768232068544e-06, "loss": 0.9086591601371765, "step": 996 }, { "epoch": 1.227832512315271, "grad_norm": 11.1413160950967, "learning_rate": 7.366458056952668e-06, "loss": 1.6426423788070679, "step": 997 }, { "epoch": 1.229064039408867, "grad_norm": 19.358982299314505, "learning_rate": 7.360143020929e-06, "loss": 1.2501566410064697, "step": 998 }, { "epoch": 1.230295566502463, "grad_norm": 15.35154457763416, "learning_rate": 7.353823136969167e-06, "loss": 2.263824939727783, "step": 999 }, { "epoch": 1.2315270935960592, "grad_norm": 15.502037939673096, "learning_rate": 7.34749841805475e-06, "loss": 1.3503868579864502, "step": 1000 }, { "epoch": 1.2327586206896552, "grad_norm": 12.387685564521446, "learning_rate": 7.341168877177267e-06, "loss": 1.2844277620315552, "step": 1001 }, { "epoch": 1.2339901477832513, "grad_norm": 21.028406448646585, "learning_rate": 7.3348345273381365e-06, "loss": 1.823725700378418, "step": 1002 }, { "epoch": 1.2352216748768472, "grad_norm": 12.53431965462443, "learning_rate": 7.328495381548655e-06, "loss": 1.8349339962005615, "step": 1003 }, { "epoch": 1.2364532019704433, "grad_norm": 11.75012181314542, "learning_rate": 7.322151452829972e-06, "loss": 1.431024432182312, "step": 1004 }, { "epoch": 1.2376847290640394, "grad_norm": 7.268447687614364, "learning_rate": 7.315802754213062e-06, "loss": 0.8406596183776855, "step": 1005 }, { "epoch": 1.2389162561576355, "grad_norm": 16.476664169610704, "learning_rate": 7.309449298738696e-06, "loss": 1.7037804126739502, "step": 1006 }, { "epoch": 1.2401477832512315, "grad_norm": 10.719400575974607, "learning_rate": 7.303091099457418e-06, "loss": 1.4264461994171143, "step": 1007 }, { "epoch": 1.2413793103448276, "grad_norm": 11.634717084876037, "learning_rate": 7.296728169429511e-06, "loss": 2.502678632736206, "step": 1008 }, { "epoch": 1.2426108374384237, "grad_norm": 9.436373278027489, "learning_rate": 7.290360521724984e-06, "loss": 1.5582114458084106, "step": 1009 }, { "epoch": 1.2438423645320198, "grad_norm": 10.373164591549747, "learning_rate": 7.283988169423526e-06, "loss": 1.494875192642212, "step": 1010 }, { "epoch": 1.2450738916256157, "grad_norm": 13.031187040858585, "learning_rate": 7.277611125614499e-06, "loss": 1.886913776397705, "step": 1011 }, { "epoch": 1.2463054187192117, "grad_norm": 19.92471933345498, "learning_rate": 7.271229403396896e-06, "loss": 1.8913657665252686, "step": 1012 }, { "epoch": 1.2475369458128078, "grad_norm": 21.8856932814209, "learning_rate": 7.264843015879321e-06, "loss": 1.1614234447479248, "step": 1013 }, { "epoch": 1.248768472906404, "grad_norm": 11.581317439717322, "learning_rate": 7.258451976179967e-06, "loss": 1.6838147640228271, "step": 1014 }, { "epoch": 1.25, "grad_norm": 14.274704649607155, "learning_rate": 7.25205629742657e-06, "loss": 1.1039239168167114, "step": 1015 }, { "epoch": 1.251231527093596, "grad_norm": 10.222730157124893, "learning_rate": 7.245655992756406e-06, "loss": 1.519346833229065, "step": 1016 }, { "epoch": 1.2524630541871922, "grad_norm": 8.325249693832719, "learning_rate": 7.2392510753162516e-06, "loss": 1.0175197124481201, "step": 1017 }, { "epoch": 1.2536945812807883, "grad_norm": 12.766382857494223, "learning_rate": 7.232841558262354e-06, "loss": 0.9778202772140503, "step": 1018 }, { "epoch": 1.2549261083743843, "grad_norm": 17.499343558391605, "learning_rate": 7.226427454760412e-06, "loss": 1.8379024267196655, "step": 1019 }, { "epoch": 1.2561576354679804, "grad_norm": 11.150234617545141, "learning_rate": 7.2200087779855435e-06, "loss": 1.8412721157073975, "step": 1020 }, { "epoch": 1.2573891625615763, "grad_norm": 8.992400726896724, "learning_rate": 7.213585541122261e-06, "loss": 1.8508501052856445, "step": 1021 }, { "epoch": 1.2586206896551724, "grad_norm": 12.44309006439825, "learning_rate": 7.207157757364445e-06, "loss": 1.3070871829986572, "step": 1022 }, { "epoch": 1.2598522167487685, "grad_norm": 12.840031276685824, "learning_rate": 7.200725439915314e-06, "loss": 2.1278223991394043, "step": 1023 }, { "epoch": 1.2610837438423645, "grad_norm": 8.633495704921142, "learning_rate": 7.194288601987398e-06, "loss": 1.0636892318725586, "step": 1024 }, { "epoch": 1.2623152709359606, "grad_norm": 10.874767223460788, "learning_rate": 7.187847256802518e-06, "loss": 1.7365200519561768, "step": 1025 }, { "epoch": 1.2635467980295567, "grad_norm": 12.21472476387578, "learning_rate": 7.181401417591746e-06, "loss": 1.792116403579712, "step": 1026 }, { "epoch": 1.2647783251231526, "grad_norm": 8.787411821208611, "learning_rate": 7.174951097595389e-06, "loss": 1.3348667621612549, "step": 1027 }, { "epoch": 1.2660098522167487, "grad_norm": 17.72872801553084, "learning_rate": 7.168496310062959e-06, "loss": 1.677919626235962, "step": 1028 }, { "epoch": 1.2672413793103448, "grad_norm": 13.283913596324016, "learning_rate": 7.162037068253141e-06, "loss": 1.1518199443817139, "step": 1029 }, { "epoch": 1.2684729064039408, "grad_norm": 7.98681967422814, "learning_rate": 7.155573385433772e-06, "loss": 2.1126716136932373, "step": 1030 }, { "epoch": 1.269704433497537, "grad_norm": 11.20695829302969, "learning_rate": 7.149105274881815e-06, "loss": 1.3222094774246216, "step": 1031 }, { "epoch": 1.270935960591133, "grad_norm": 9.408024877970139, "learning_rate": 7.1426327498833174e-06, "loss": 0.8843763470649719, "step": 1032 }, { "epoch": 1.272167487684729, "grad_norm": 18.111033872908873, "learning_rate": 7.136155823733405e-06, "loss": 1.3091545104980469, "step": 1033 }, { "epoch": 1.2733990147783252, "grad_norm": 11.598349915801498, "learning_rate": 7.129674509736237e-06, "loss": 1.4408364295959473, "step": 1034 }, { "epoch": 1.2746305418719213, "grad_norm": 17.074081488403696, "learning_rate": 7.12318882120499e-06, "loss": 1.330906867980957, "step": 1035 }, { "epoch": 1.2758620689655173, "grad_norm": 11.931439673872655, "learning_rate": 7.116698771461825e-06, "loss": 1.9561724662780762, "step": 1036 }, { "epoch": 1.2770935960591134, "grad_norm": 14.506364150634404, "learning_rate": 7.110204373837857e-06, "loss": 2.185842275619507, "step": 1037 }, { "epoch": 1.2783251231527093, "grad_norm": 8.783423067272876, "learning_rate": 7.1037056416731395e-06, "loss": 1.724360466003418, "step": 1038 }, { "epoch": 1.2795566502463054, "grad_norm": 10.548795738669158, "learning_rate": 7.097202588316625e-06, "loss": 1.179841160774231, "step": 1039 }, { "epoch": 1.2807881773399015, "grad_norm": 14.968187776502731, "learning_rate": 7.090695227126141e-06, "loss": 1.6783604621887207, "step": 1040 }, { "epoch": 1.2820197044334976, "grad_norm": 10.70366989067169, "learning_rate": 7.084183571468368e-06, "loss": 1.761925220489502, "step": 1041 }, { "epoch": 1.2832512315270936, "grad_norm": 12.9020971876039, "learning_rate": 7.077667634718801e-06, "loss": 0.9297729134559631, "step": 1042 }, { "epoch": 1.2844827586206897, "grad_norm": 12.446847341840494, "learning_rate": 7.071147430261738e-06, "loss": 1.6091060638427734, "step": 1043 }, { "epoch": 1.2857142857142856, "grad_norm": 8.238449521430923, "learning_rate": 7.064622971490234e-06, "loss": 1.280853509902954, "step": 1044 }, { "epoch": 1.2869458128078817, "grad_norm": 10.190528956891907, "learning_rate": 7.058094271806091e-06, "loss": 2.4095635414123535, "step": 1045 }, { "epoch": 1.2881773399014778, "grad_norm": 12.210698142217534, "learning_rate": 7.051561344619814e-06, "loss": 1.7969441413879395, "step": 1046 }, { "epoch": 1.2894088669950738, "grad_norm": 8.968258930303262, "learning_rate": 7.045024203350598e-06, "loss": 2.4331698417663574, "step": 1047 }, { "epoch": 1.29064039408867, "grad_norm": 9.034111830970843, "learning_rate": 7.0384828614262905e-06, "loss": 1.336733341217041, "step": 1048 }, { "epoch": 1.291871921182266, "grad_norm": 9.358643506315515, "learning_rate": 7.031937332283367e-06, "loss": 1.2959213256835938, "step": 1049 }, { "epoch": 1.293103448275862, "grad_norm": 15.177096960870495, "learning_rate": 7.025387629366912e-06, "loss": 1.0095289945602417, "step": 1050 }, { "epoch": 1.2943349753694582, "grad_norm": 8.708668143059782, "learning_rate": 7.018833766130571e-06, "loss": 1.8314733505249023, "step": 1051 }, { "epoch": 1.2955665024630543, "grad_norm": 12.10925693324793, "learning_rate": 7.012275756036544e-06, "loss": 1.121436595916748, "step": 1052 }, { "epoch": 1.2967980295566504, "grad_norm": 20.569530418297486, "learning_rate": 7.0057136125555456e-06, "loss": 1.5652289390563965, "step": 1053 }, { "epoch": 1.2980295566502464, "grad_norm": 14.018717429311812, "learning_rate": 6.999147349166779e-06, "loss": 1.1146215200424194, "step": 1054 }, { "epoch": 1.2992610837438423, "grad_norm": 17.232932273490494, "learning_rate": 6.9925769793579165e-06, "loss": 2.400024175643921, "step": 1055 }, { "epoch": 1.3004926108374384, "grad_norm": 11.12761938883381, "learning_rate": 6.986002516625058e-06, "loss": 1.7114648818969727, "step": 1056 }, { "epoch": 1.3017241379310345, "grad_norm": 10.072038004871871, "learning_rate": 6.979423974472714e-06, "loss": 1.5338797569274902, "step": 1057 }, { "epoch": 1.3029556650246306, "grad_norm": 8.812025010262357, "learning_rate": 6.972841366413777e-06, "loss": 1.078460931777954, "step": 1058 }, { "epoch": 1.3041871921182266, "grad_norm": 11.356722343645167, "learning_rate": 6.966254705969484e-06, "loss": 1.5467915534973145, "step": 1059 }, { "epoch": 1.3054187192118227, "grad_norm": 14.67705148794403, "learning_rate": 6.959664006669404e-06, "loss": 1.2715568542480469, "step": 1060 }, { "epoch": 1.3066502463054186, "grad_norm": 8.890913561904203, "learning_rate": 6.953069282051397e-06, "loss": 1.887066125869751, "step": 1061 }, { "epoch": 1.3078817733990147, "grad_norm": 10.182269397064065, "learning_rate": 6.946470545661593e-06, "loss": 1.419116497039795, "step": 1062 }, { "epoch": 1.3091133004926108, "grad_norm": 8.361662711059678, "learning_rate": 6.939867811054365e-06, "loss": 1.3843079805374146, "step": 1063 }, { "epoch": 1.3103448275862069, "grad_norm": 27.704350160970165, "learning_rate": 6.9332610917922915e-06, "loss": 2.5894885063171387, "step": 1064 }, { "epoch": 1.311576354679803, "grad_norm": 16.17688431061018, "learning_rate": 6.9266504014461425e-06, "loss": 1.6600944995880127, "step": 1065 }, { "epoch": 1.312807881773399, "grad_norm": 18.474330510936614, "learning_rate": 6.920035753594845e-06, "loss": 1.7698057889938354, "step": 1066 }, { "epoch": 1.314039408866995, "grad_norm": 9.914676123570585, "learning_rate": 6.913417161825449e-06, "loss": 1.5610848665237427, "step": 1067 }, { "epoch": 1.3152709359605912, "grad_norm": 8.489359998020161, "learning_rate": 6.906794639733114e-06, "loss": 1.6380643844604492, "step": 1068 }, { "epoch": 1.3165024630541873, "grad_norm": 8.9532327938231, "learning_rate": 6.900168200921065e-06, "loss": 1.390014410018921, "step": 1069 }, { "epoch": 1.3177339901477834, "grad_norm": 10.45013795003969, "learning_rate": 6.893537859000576e-06, "loss": 1.6589158773422241, "step": 1070 }, { "epoch": 1.3189655172413794, "grad_norm": 12.436644147912617, "learning_rate": 6.886903627590938e-06, "loss": 1.5524673461914062, "step": 1071 }, { "epoch": 1.3201970443349753, "grad_norm": 12.240484798983633, "learning_rate": 6.880265520319434e-06, "loss": 2.0204474925994873, "step": 1072 }, { "epoch": 1.3214285714285714, "grad_norm": 10.928634620934101, "learning_rate": 6.8736235508213024e-06, "loss": 1.7947957515716553, "step": 1073 }, { "epoch": 1.3226600985221675, "grad_norm": 12.192004015491179, "learning_rate": 6.866977732739719e-06, "loss": 1.6154756546020508, "step": 1074 }, { "epoch": 1.3238916256157636, "grad_norm": 10.239608872921218, "learning_rate": 6.860328079725764e-06, "loss": 1.419677734375, "step": 1075 }, { "epoch": 1.3251231527093597, "grad_norm": 11.490298083513249, "learning_rate": 6.853674605438395e-06, "loss": 2.2221052646636963, "step": 1076 }, { "epoch": 1.3263546798029557, "grad_norm": 10.796599749157496, "learning_rate": 6.84701732354442e-06, "loss": 1.6474840641021729, "step": 1077 }, { "epoch": 1.3275862068965516, "grad_norm": 16.05723789346112, "learning_rate": 6.840356247718466e-06, "loss": 2.035231828689575, "step": 1078 }, { "epoch": 1.3288177339901477, "grad_norm": 12.127949373836048, "learning_rate": 6.8336913916429515e-06, "loss": 1.5675947666168213, "step": 1079 }, { "epoch": 1.3300492610837438, "grad_norm": 12.561351822867852, "learning_rate": 6.827022769008068e-06, "loss": 1.2241394519805908, "step": 1080 }, { "epoch": 1.3312807881773399, "grad_norm": 10.606640209072971, "learning_rate": 6.820350393511732e-06, "loss": 1.3507403135299683, "step": 1081 }, { "epoch": 1.332512315270936, "grad_norm": 23.44696719245062, "learning_rate": 6.81367427885958e-06, "loss": 2.256551504135132, "step": 1082 }, { "epoch": 1.333743842364532, "grad_norm": 17.90054749002111, "learning_rate": 6.806994438764922e-06, "loss": 1.6412163972854614, "step": 1083 }, { "epoch": 1.3349753694581281, "grad_norm": 10.747816339677435, "learning_rate": 6.8003108869487225e-06, "loss": 1.500988483428955, "step": 1084 }, { "epoch": 1.3362068965517242, "grad_norm": 8.86240548184895, "learning_rate": 6.79362363713957e-06, "loss": 1.4661070108413696, "step": 1085 }, { "epoch": 1.3374384236453203, "grad_norm": 9.325455271074935, "learning_rate": 6.786932703073648e-06, "loss": 1.42755126953125, "step": 1086 }, { "epoch": 1.3386699507389164, "grad_norm": 14.863538954404982, "learning_rate": 6.780238098494711e-06, "loss": 1.165806531906128, "step": 1087 }, { "epoch": 1.3399014778325122, "grad_norm": 21.9332846077213, "learning_rate": 6.773539837154051e-06, "loss": 1.3795387744903564, "step": 1088 }, { "epoch": 1.3411330049261083, "grad_norm": 15.064922882268542, "learning_rate": 6.766837932810468e-06, "loss": 1.3203850984573364, "step": 1089 }, { "epoch": 1.3423645320197044, "grad_norm": 12.791071147567429, "learning_rate": 6.7601323992302525e-06, "loss": 1.645883321762085, "step": 1090 }, { "epoch": 1.3435960591133005, "grad_norm": 8.072143933965927, "learning_rate": 6.7534232501871425e-06, "loss": 1.6904821395874023, "step": 1091 }, { "epoch": 1.3448275862068966, "grad_norm": 8.711589751937055, "learning_rate": 6.7467104994623066e-06, "loss": 1.332162618637085, "step": 1092 }, { "epoch": 1.3460591133004927, "grad_norm": 9.451447429997234, "learning_rate": 6.7399941608443096e-06, "loss": 1.4389145374298096, "step": 1093 }, { "epoch": 1.3472906403940887, "grad_norm": 7.323937666452591, "learning_rate": 6.733274248129089e-06, "loss": 1.6597908735275269, "step": 1094 }, { "epoch": 1.3485221674876846, "grad_norm": 17.883843051775344, "learning_rate": 6.72655077511992e-06, "loss": 0.9520257711410522, "step": 1095 }, { "epoch": 1.3497536945812807, "grad_norm": 11.223594087909252, "learning_rate": 6.719823755627393e-06, "loss": 1.4488117694854736, "step": 1096 }, { "epoch": 1.3509852216748768, "grad_norm": 7.977177991617555, "learning_rate": 6.713093203469384e-06, "loss": 1.5133984088897705, "step": 1097 }, { "epoch": 1.3522167487684729, "grad_norm": 8.682066451366055, "learning_rate": 6.7063591324710234e-06, "loss": 1.846522569656372, "step": 1098 }, { "epoch": 1.353448275862069, "grad_norm": 12.792486675857687, "learning_rate": 6.6996215564646705e-06, "loss": 0.9724826812744141, "step": 1099 }, { "epoch": 1.354679802955665, "grad_norm": 11.989074062954435, "learning_rate": 6.692880489289885e-06, "loss": 1.24728262424469, "step": 1100 }, { "epoch": 1.3559113300492611, "grad_norm": 22.734635359059652, "learning_rate": 6.686135944793395e-06, "loss": 1.5332872867584229, "step": 1101 }, { "epoch": 1.3571428571428572, "grad_norm": 11.645074036110657, "learning_rate": 6.679387936829076e-06, "loss": 1.5978163480758667, "step": 1102 }, { "epoch": 1.3583743842364533, "grad_norm": 9.223736434919791, "learning_rate": 6.672636479257912e-06, "loss": 2.05710506439209, "step": 1103 }, { "epoch": 1.3596059113300494, "grad_norm": 11.48041589458668, "learning_rate": 6.665881585947981e-06, "loss": 1.667812466621399, "step": 1104 }, { "epoch": 1.3608374384236452, "grad_norm": 18.141176793209265, "learning_rate": 6.659123270774406e-06, "loss": 1.3053381443023682, "step": 1105 }, { "epoch": 1.3620689655172413, "grad_norm": 11.11014263526773, "learning_rate": 6.652361547619352e-06, "loss": 1.5228716135025024, "step": 1106 }, { "epoch": 1.3633004926108374, "grad_norm": 11.869708221541034, "learning_rate": 6.645596430371976e-06, "loss": 1.3818378448486328, "step": 1107 }, { "epoch": 1.3645320197044335, "grad_norm": 11.298030039811758, "learning_rate": 6.6388279329284065e-06, "loss": 1.217841386795044, "step": 1108 }, { "epoch": 1.3657635467980296, "grad_norm": 21.11595250544298, "learning_rate": 6.632056069191723e-06, "loss": 1.4309210777282715, "step": 1109 }, { "epoch": 1.3669950738916257, "grad_norm": 13.7021684816084, "learning_rate": 6.6252808530719095e-06, "loss": 1.3015059232711792, "step": 1110 }, { "epoch": 1.3682266009852218, "grad_norm": 11.973457349226296, "learning_rate": 6.618502298485844e-06, "loss": 1.2734256982803345, "step": 1111 }, { "epoch": 1.3694581280788176, "grad_norm": 15.830227785424638, "learning_rate": 6.611720419357257e-06, "loss": 1.907172441482544, "step": 1112 }, { "epoch": 1.3706896551724137, "grad_norm": 10.756653422484252, "learning_rate": 6.604935229616711e-06, "loss": 1.1207606792449951, "step": 1113 }, { "epoch": 1.3719211822660098, "grad_norm": 12.736281126843005, "learning_rate": 6.598146743201568e-06, "loss": 2.3231239318847656, "step": 1114 }, { "epoch": 1.3731527093596059, "grad_norm": 11.597483205953116, "learning_rate": 6.5913549740559606e-06, "loss": 1.1395865678787231, "step": 1115 }, { "epoch": 1.374384236453202, "grad_norm": 14.754486017260728, "learning_rate": 6.584559936130763e-06, "loss": 3.1981747150421143, "step": 1116 }, { "epoch": 1.375615763546798, "grad_norm": 12.874438415282308, "learning_rate": 6.57776164338357e-06, "loss": 1.7495319843292236, "step": 1117 }, { "epoch": 1.3768472906403941, "grad_norm": 12.611228408009778, "learning_rate": 6.570960109778655e-06, "loss": 1.3304778337478638, "step": 1118 }, { "epoch": 1.3780788177339902, "grad_norm": 11.84441441686591, "learning_rate": 6.564155349286952e-06, "loss": 1.6510775089263916, "step": 1119 }, { "epoch": 1.3793103448275863, "grad_norm": 13.996316648052032, "learning_rate": 6.557347375886022e-06, "loss": 1.3382967710494995, "step": 1120 }, { "epoch": 1.3805418719211824, "grad_norm": 11.351524045305764, "learning_rate": 6.550536203560028e-06, "loss": 1.418992042541504, "step": 1121 }, { "epoch": 1.3817733990147782, "grad_norm": 16.848897992260934, "learning_rate": 6.543721846299701e-06, "loss": 1.4815843105316162, "step": 1122 }, { "epoch": 1.3830049261083743, "grad_norm": 13.42654012333122, "learning_rate": 6.536904318102314e-06, "loss": 0.9823303818702698, "step": 1123 }, { "epoch": 1.3842364532019704, "grad_norm": 11.039715301984293, "learning_rate": 6.530083632971658e-06, "loss": 1.4959704875946045, "step": 1124 }, { "epoch": 1.3854679802955665, "grad_norm": 13.499332863560449, "learning_rate": 6.523259804918001e-06, "loss": 1.3141142129898071, "step": 1125 }, { "epoch": 1.3866995073891626, "grad_norm": 18.762617405218773, "learning_rate": 6.516432847958074e-06, "loss": 1.60225248336792, "step": 1126 }, { "epoch": 1.3879310344827587, "grad_norm": 12.76800599324204, "learning_rate": 6.509602776115029e-06, "loss": 1.7774362564086914, "step": 1127 }, { "epoch": 1.3891625615763548, "grad_norm": 14.80003777651342, "learning_rate": 6.502769603418423e-06, "loss": 1.3750693798065186, "step": 1128 }, { "epoch": 1.3903940886699506, "grad_norm": 12.846839874270263, "learning_rate": 6.4959333439041775e-06, "loss": 1.0850452184677124, "step": 1129 }, { "epoch": 1.3916256157635467, "grad_norm": 17.175837709461415, "learning_rate": 6.489094011614553e-06, "loss": 1.7440909147262573, "step": 1130 }, { "epoch": 1.3928571428571428, "grad_norm": 8.34120026588026, "learning_rate": 6.482251620598129e-06, "loss": 1.5904752016067505, "step": 1131 }, { "epoch": 1.3940886699507389, "grad_norm": 10.398946422121055, "learning_rate": 6.47540618490976e-06, "loss": 1.4864649772644043, "step": 1132 }, { "epoch": 1.395320197044335, "grad_norm": 16.449380414530893, "learning_rate": 6.4685577186105595e-06, "loss": 1.3869491815567017, "step": 1133 }, { "epoch": 1.396551724137931, "grad_norm": 11.708541771363075, "learning_rate": 6.461706235767866e-06, "loss": 1.1635327339172363, "step": 1134 }, { "epoch": 1.3977832512315271, "grad_norm": 6.616557203492817, "learning_rate": 6.45485175045521e-06, "loss": 1.4063032865524292, "step": 1135 }, { "epoch": 1.3990147783251232, "grad_norm": 26.794737362449215, "learning_rate": 6.447994276752293e-06, "loss": 2.2259998321533203, "step": 1136 }, { "epoch": 1.4002463054187193, "grad_norm": 10.511853223185177, "learning_rate": 6.441133828744954e-06, "loss": 1.2302110195159912, "step": 1137 }, { "epoch": 1.4014778325123154, "grad_norm": 10.658533095355526, "learning_rate": 6.434270420525144e-06, "loss": 1.2579622268676758, "step": 1138 }, { "epoch": 1.4027093596059113, "grad_norm": 18.972607390940905, "learning_rate": 6.427404066190889e-06, "loss": 1.6761397123336792, "step": 1139 }, { "epoch": 1.4039408866995073, "grad_norm": 12.172946298049014, "learning_rate": 6.4205347798462704e-06, "loss": 1.3933346271514893, "step": 1140 }, { "epoch": 1.4051724137931034, "grad_norm": 13.681043588339055, "learning_rate": 6.413662575601391e-06, "loss": 1.9914003610610962, "step": 1141 }, { "epoch": 1.4064039408866995, "grad_norm": 16.934291210588032, "learning_rate": 6.406787467572348e-06, "loss": 1.9921746253967285, "step": 1142 }, { "epoch": 1.4076354679802956, "grad_norm": 18.5006822922468, "learning_rate": 6.3999094698812055e-06, "loss": 1.6050479412078857, "step": 1143 }, { "epoch": 1.4088669950738917, "grad_norm": 12.333046745730567, "learning_rate": 6.393028596655958e-06, "loss": 1.7796251773834229, "step": 1144 }, { "epoch": 1.4100985221674878, "grad_norm": 18.731485023409682, "learning_rate": 6.386144862030508e-06, "loss": 1.7936886548995972, "step": 1145 }, { "epoch": 1.4113300492610836, "grad_norm": 18.37593149730845, "learning_rate": 6.37925828014464e-06, "loss": 1.9030745029449463, "step": 1146 }, { "epoch": 1.4125615763546797, "grad_norm": 11.93678536094984, "learning_rate": 6.3723688651439806e-06, "loss": 1.4446496963500977, "step": 1147 }, { "epoch": 1.4137931034482758, "grad_norm": 13.469356839829612, "learning_rate": 6.365476631179982e-06, "loss": 1.5683763027191162, "step": 1148 }, { "epoch": 1.4150246305418719, "grad_norm": 8.488203520402504, "learning_rate": 6.358581592409881e-06, "loss": 1.4594917297363281, "step": 1149 }, { "epoch": 1.416256157635468, "grad_norm": 25.588676453436552, "learning_rate": 6.351683762996681e-06, "loss": 2.1706323623657227, "step": 1150 }, { "epoch": 1.417487684729064, "grad_norm": 11.810343655960159, "learning_rate": 6.344783157109114e-06, "loss": 1.835425853729248, "step": 1151 }, { "epoch": 1.4187192118226601, "grad_norm": 10.711102782202751, "learning_rate": 6.337879788921615e-06, "loss": 1.1789867877960205, "step": 1152 }, { "epoch": 1.4199507389162562, "grad_norm": 28.404082710690172, "learning_rate": 6.3309736726142965e-06, "loss": 1.9750418663024902, "step": 1153 }, { "epoch": 1.4211822660098523, "grad_norm": 14.02852797567233, "learning_rate": 6.324064822372913e-06, "loss": 1.4960027933120728, "step": 1154 }, { "epoch": 1.4224137931034484, "grad_norm": 20.199397968799044, "learning_rate": 6.317153252388834e-06, "loss": 1.12904691696167, "step": 1155 }, { "epoch": 1.4236453201970443, "grad_norm": 10.534543863605384, "learning_rate": 6.31023897685902e-06, "loss": 1.30333411693573, "step": 1156 }, { "epoch": 1.4248768472906403, "grad_norm": 15.66714236524435, "learning_rate": 6.303322009985984e-06, "loss": 2.5257434844970703, "step": 1157 }, { "epoch": 1.4261083743842364, "grad_norm": 18.065303617570866, "learning_rate": 6.296402365977767e-06, "loss": 0.9684423208236694, "step": 1158 }, { "epoch": 1.4273399014778325, "grad_norm": 12.376925974972115, "learning_rate": 6.289480059047915e-06, "loss": 1.457876443862915, "step": 1159 }, { "epoch": 1.4285714285714286, "grad_norm": 9.05985921030025, "learning_rate": 6.282555103415438e-06, "loss": 1.5206713676452637, "step": 1160 }, { "epoch": 1.4298029556650247, "grad_norm": 14.712390356925216, "learning_rate": 6.27562751330479e-06, "loss": 1.680644154548645, "step": 1161 }, { "epoch": 1.4310344827586206, "grad_norm": 9.786932196785434, "learning_rate": 6.268697302945835e-06, "loss": 1.3704997301101685, "step": 1162 }, { "epoch": 1.4322660098522166, "grad_norm": 9.786888328650228, "learning_rate": 6.261764486573816e-06, "loss": 1.3250343799591064, "step": 1163 }, { "epoch": 1.4334975369458127, "grad_norm": 15.544106160026582, "learning_rate": 6.254829078429336e-06, "loss": 1.8659427165985107, "step": 1164 }, { "epoch": 1.4347290640394088, "grad_norm": 21.077430430000046, "learning_rate": 6.247891092758319e-06, "loss": 2.043597936630249, "step": 1165 }, { "epoch": 1.435960591133005, "grad_norm": 12.476492579798414, "learning_rate": 6.24095054381198e-06, "loss": 1.5634403228759766, "step": 1166 }, { "epoch": 1.437192118226601, "grad_norm": 11.790373846414154, "learning_rate": 6.2340074458468014e-06, "loss": 1.1179373264312744, "step": 1167 }, { "epoch": 1.438423645320197, "grad_norm": 13.094422813370427, "learning_rate": 6.227061813124504e-06, "loss": 0.8013179302215576, "step": 1168 }, { "epoch": 1.4396551724137931, "grad_norm": 9.010286032120458, "learning_rate": 6.220113659912012e-06, "loss": 1.3435392379760742, "step": 1169 }, { "epoch": 1.4408866995073892, "grad_norm": 8.308881028265468, "learning_rate": 6.213163000481428e-06, "loss": 1.39387845993042, "step": 1170 }, { "epoch": 1.4421182266009853, "grad_norm": 8.499060752632088, "learning_rate": 6.206209849110001e-06, "loss": 1.760462760925293, "step": 1171 }, { "epoch": 1.4433497536945814, "grad_norm": 13.348998095152654, "learning_rate": 6.1992542200801035e-06, "loss": 1.0812432765960693, "step": 1172 }, { "epoch": 1.4445812807881773, "grad_norm": 9.263056193047571, "learning_rate": 6.1922961276791925e-06, "loss": 1.7997616529464722, "step": 1173 }, { "epoch": 1.4458128078817734, "grad_norm": 11.646405372699148, "learning_rate": 6.1853355861997854e-06, "loss": 1.773369550704956, "step": 1174 }, { "epoch": 1.4470443349753694, "grad_norm": 8.442523087287304, "learning_rate": 6.1783726099394324e-06, "loss": 1.9488962888717651, "step": 1175 }, { "epoch": 1.4482758620689655, "grad_norm": 13.332895782423902, "learning_rate": 6.171407213200683e-06, "loss": 1.6990149021148682, "step": 1176 }, { "epoch": 1.4495073891625616, "grad_norm": 12.609637801512664, "learning_rate": 6.164439410291061e-06, "loss": 1.4307571649551392, "step": 1177 }, { "epoch": 1.4507389162561577, "grad_norm": 8.885074358137231, "learning_rate": 6.157469215523031e-06, "loss": 1.3966443538665771, "step": 1178 }, { "epoch": 1.4519704433497536, "grad_norm": 16.606696238854166, "learning_rate": 6.150496643213969e-06, "loss": 1.2959253787994385, "step": 1179 }, { "epoch": 1.4532019704433496, "grad_norm": 16.898895754976742, "learning_rate": 6.143521707686137e-06, "loss": 1.4992142915725708, "step": 1180 }, { "epoch": 1.4544334975369457, "grad_norm": 16.69245348652636, "learning_rate": 6.136544423266651e-06, "loss": 1.8196167945861816, "step": 1181 }, { "epoch": 1.4556650246305418, "grad_norm": 16.12465629803321, "learning_rate": 6.129564804287454e-06, "loss": 1.4129021167755127, "step": 1182 }, { "epoch": 1.456896551724138, "grad_norm": 15.4451290282442, "learning_rate": 6.122582865085278e-06, "loss": 1.2009403705596924, "step": 1183 }, { "epoch": 1.458128078817734, "grad_norm": 12.682560791700617, "learning_rate": 6.115598620001627e-06, "loss": 1.698556661605835, "step": 1184 }, { "epoch": 1.45935960591133, "grad_norm": 21.414952415899087, "learning_rate": 6.108612083382739e-06, "loss": 1.5819299221038818, "step": 1185 }, { "epoch": 1.4605911330049262, "grad_norm": 10.708464197323055, "learning_rate": 6.101623269579558e-06, "loss": 1.374379277229309, "step": 1186 }, { "epoch": 1.4618226600985222, "grad_norm": 10.541290993965774, "learning_rate": 6.094632192947711e-06, "loss": 1.2765707969665527, "step": 1187 }, { "epoch": 1.4630541871921183, "grad_norm": 14.098976562454558, "learning_rate": 6.087638867847465e-06, "loss": 1.2740705013275146, "step": 1188 }, { "epoch": 1.4642857142857144, "grad_norm": 11.154362665776958, "learning_rate": 6.08064330864371e-06, "loss": 1.6713453531265259, "step": 1189 }, { "epoch": 1.4655172413793103, "grad_norm": 9.205967970627526, "learning_rate": 6.073645529705926e-06, "loss": 1.6606531143188477, "step": 1190 }, { "epoch": 1.4667487684729064, "grad_norm": 12.43504089477338, "learning_rate": 6.066645545408149e-06, "loss": 1.6029870510101318, "step": 1191 }, { "epoch": 1.4679802955665024, "grad_norm": 9.416406443647212, "learning_rate": 6.0596433701289506e-06, "loss": 1.5884819030761719, "step": 1192 }, { "epoch": 1.4692118226600985, "grad_norm": 17.434043985101933, "learning_rate": 6.052639018251394e-06, "loss": 1.060668706893921, "step": 1193 }, { "epoch": 1.4704433497536946, "grad_norm": 13.053843358479307, "learning_rate": 6.045632504163024e-06, "loss": 1.6251329183578491, "step": 1194 }, { "epoch": 1.4716748768472907, "grad_norm": 10.200397873502725, "learning_rate": 6.03862384225582e-06, "loss": 1.2369989156723022, "step": 1195 }, { "epoch": 1.4729064039408866, "grad_norm": 28.146477262288624, "learning_rate": 6.0316130469261705e-06, "loss": 1.7742527723312378, "step": 1196 }, { "epoch": 1.4741379310344827, "grad_norm": 6.380213600146285, "learning_rate": 6.024600132574855e-06, "loss": 2.166492223739624, "step": 1197 }, { "epoch": 1.4753694581280787, "grad_norm": 15.296147923549848, "learning_rate": 6.017585113606999e-06, "loss": 1.8031083345413208, "step": 1198 }, { "epoch": 1.4766009852216748, "grad_norm": 7.580109898357858, "learning_rate": 6.010568004432055e-06, "loss": 1.9966365098953247, "step": 1199 }, { "epoch": 1.477832512315271, "grad_norm": 13.138438168026589, "learning_rate": 6.0035488194637645e-06, "loss": 1.0125515460968018, "step": 1200 }, { "epoch": 1.479064039408867, "grad_norm": 16.24938270382903, "learning_rate": 5.9965275731201364e-06, "loss": 1.1396842002868652, "step": 1201 }, { "epoch": 1.480295566502463, "grad_norm": 6.579201955073294, "learning_rate": 5.9895042798234125e-06, "loss": 1.8030388355255127, "step": 1202 }, { "epoch": 1.4815270935960592, "grad_norm": 12.865016417179568, "learning_rate": 5.982478954000042e-06, "loss": 1.4132026433944702, "step": 1203 }, { "epoch": 1.4827586206896552, "grad_norm": 11.295614659779242, "learning_rate": 5.975451610080643e-06, "loss": 1.3726825714111328, "step": 1204 }, { "epoch": 1.4839901477832513, "grad_norm": 10.812781562044428, "learning_rate": 5.968422262499983e-06, "loss": 2.3436193466186523, "step": 1205 }, { "epoch": 1.4852216748768474, "grad_norm": 11.93980767439267, "learning_rate": 5.961390925696947e-06, "loss": 1.4617420434951782, "step": 1206 }, { "epoch": 1.4864532019704433, "grad_norm": 8.752972802049372, "learning_rate": 5.9543576141145035e-06, "loss": 1.8050814867019653, "step": 1207 }, { "epoch": 1.4876847290640394, "grad_norm": 11.595272230479853, "learning_rate": 5.947322342199674e-06, "loss": 1.3426543474197388, "step": 1208 }, { "epoch": 1.4889162561576355, "grad_norm": 13.910327681643947, "learning_rate": 5.940285124403517e-06, "loss": 1.6211771965026855, "step": 1209 }, { "epoch": 1.4901477832512315, "grad_norm": 10.490417163522949, "learning_rate": 5.933245975181074e-06, "loss": 2.695863723754883, "step": 1210 }, { "epoch": 1.4913793103448276, "grad_norm": 9.128292414129945, "learning_rate": 5.926204908991366e-06, "loss": 1.2743788957595825, "step": 1211 }, { "epoch": 1.4926108374384237, "grad_norm": 11.2632445422812, "learning_rate": 5.919161940297346e-06, "loss": 1.652765154838562, "step": 1212 }, { "epoch": 1.4938423645320196, "grad_norm": 7.537950882850561, "learning_rate": 5.912117083565874e-06, "loss": 1.3720670938491821, "step": 1213 }, { "epoch": 1.4950738916256157, "grad_norm": 14.216763115794095, "learning_rate": 5.905070353267692e-06, "loss": 1.222616195678711, "step": 1214 }, { "epoch": 1.4963054187192117, "grad_norm": 7.742622309976788, "learning_rate": 5.898021763877388e-06, "loss": 1.4626069068908691, "step": 1215 }, { "epoch": 1.4975369458128078, "grad_norm": 10.044815043339705, "learning_rate": 5.890971329873366e-06, "loss": 1.7813634872436523, "step": 1216 }, { "epoch": 1.498768472906404, "grad_norm": 14.537107209189347, "learning_rate": 5.883919065737827e-06, "loss": 0.5114675760269165, "step": 1217 }, { "epoch": 1.5, "grad_norm": 18.934697309871, "learning_rate": 5.876864985956722e-06, "loss": 1.6000962257385254, "step": 1218 }, { "epoch": 1.501231527093596, "grad_norm": 33.040397060632486, "learning_rate": 5.869809105019738e-06, "loss": 1.5674512386322021, "step": 1219 }, { "epoch": 1.5024630541871922, "grad_norm": 9.76563438047523, "learning_rate": 5.8627514374202596e-06, "loss": 1.7963311672210693, "step": 1220 }, { "epoch": 1.5036945812807883, "grad_norm": 10.95067481959561, "learning_rate": 5.85569199765534e-06, "loss": 1.1649596691131592, "step": 1221 }, { "epoch": 1.5049261083743843, "grad_norm": 9.927773449159055, "learning_rate": 5.848630800225678e-06, "loss": 1.140197992324829, "step": 1222 }, { "epoch": 1.5061576354679804, "grad_norm": 8.586607717080767, "learning_rate": 5.841567859635572e-06, "loss": 1.865435242652893, "step": 1223 }, { "epoch": 1.5073891625615765, "grad_norm": 11.43552738813054, "learning_rate": 5.834503190392912e-06, "loss": 1.457642912864685, "step": 1224 }, { "epoch": 1.5086206896551724, "grad_norm": 9.978595721772624, "learning_rate": 5.827436807009133e-06, "loss": 1.3783336877822876, "step": 1225 }, { "epoch": 1.5098522167487685, "grad_norm": 10.75044326200818, "learning_rate": 5.8203687239991935e-06, "loss": 1.939549207687378, "step": 1226 }, { "epoch": 1.5110837438423645, "grad_norm": 14.588582695069839, "learning_rate": 5.813298955881542e-06, "loss": 1.3607597351074219, "step": 1227 }, { "epoch": 1.5123152709359606, "grad_norm": 9.739548479278437, "learning_rate": 5.806227517178089e-06, "loss": 0.81966233253479, "step": 1228 }, { "epoch": 1.5135467980295565, "grad_norm": 7.228017183846092, "learning_rate": 5.799154422414174e-06, "loss": 0.9481602311134338, "step": 1229 }, { "epoch": 1.5147783251231526, "grad_norm": 16.162733557662186, "learning_rate": 5.79207968611854e-06, "loss": 1.3550889492034912, "step": 1230 }, { "epoch": 1.5160098522167487, "grad_norm": 10.696500057601996, "learning_rate": 5.785003322823307e-06, "loss": 2.022425889968872, "step": 1231 }, { "epoch": 1.5172413793103448, "grad_norm": 8.501680697642309, "learning_rate": 5.777925347063927e-06, "loss": 1.5649950504302979, "step": 1232 }, { "epoch": 1.5184729064039408, "grad_norm": 12.185227926920462, "learning_rate": 5.7708457733791715e-06, "loss": 1.9720977544784546, "step": 1233 }, { "epoch": 1.519704433497537, "grad_norm": 12.902985615374178, "learning_rate": 5.763764616311089e-06, "loss": 1.0029213428497314, "step": 1234 }, { "epoch": 1.520935960591133, "grad_norm": 13.23751211435566, "learning_rate": 5.756681890404987e-06, "loss": 1.8926727771759033, "step": 1235 }, { "epoch": 1.522167487684729, "grad_norm": 8.93687413398984, "learning_rate": 5.749597610209392e-06, "loss": 1.462761402130127, "step": 1236 }, { "epoch": 1.5233990147783252, "grad_norm": 10.137890971821589, "learning_rate": 5.7425117902760195e-06, "loss": 2.1467416286468506, "step": 1237 }, { "epoch": 1.5246305418719213, "grad_norm": 12.30865285718221, "learning_rate": 5.7354244451597545e-06, "loss": 1.191473364830017, "step": 1238 }, { "epoch": 1.5258620689655173, "grad_norm": 11.884477014639941, "learning_rate": 5.72833558941861e-06, "loss": 0.896723210811615, "step": 1239 }, { "epoch": 1.5270935960591134, "grad_norm": 12.439035862181441, "learning_rate": 5.721245237613704e-06, "loss": 0.8741526007652283, "step": 1240 }, { "epoch": 1.5283251231527095, "grad_norm": 11.437489612490284, "learning_rate": 5.714153404309228e-06, "loss": 1.6330994367599487, "step": 1241 }, { "epoch": 1.5295566502463054, "grad_norm": 8.493940846915361, "learning_rate": 5.707060104072415e-06, "loss": 2.2386982440948486, "step": 1242 }, { "epoch": 1.5307881773399015, "grad_norm": 15.002139823216499, "learning_rate": 5.6999653514735124e-06, "loss": 1.5266145467758179, "step": 1243 }, { "epoch": 1.5320197044334976, "grad_norm": 10.763593391596421, "learning_rate": 5.6928691610857515e-06, "loss": 1.4918262958526611, "step": 1244 }, { "epoch": 1.5332512315270936, "grad_norm": 13.978563202935332, "learning_rate": 5.685771547485312e-06, "loss": 1.241945743560791, "step": 1245 }, { "epoch": 1.5344827586206895, "grad_norm": 13.403953021065679, "learning_rate": 5.678672525251304e-06, "loss": 1.1569273471832275, "step": 1246 }, { "epoch": 1.5357142857142856, "grad_norm": 11.182023407334606, "learning_rate": 5.671572108965729e-06, "loss": 1.946014404296875, "step": 1247 }, { "epoch": 1.5369458128078817, "grad_norm": 11.304302205859694, "learning_rate": 5.664470313213448e-06, "loss": 1.8601741790771484, "step": 1248 }, { "epoch": 1.5381773399014778, "grad_norm": 16.894321658591, "learning_rate": 5.65736715258216e-06, "loss": 1.7164549827575684, "step": 1249 }, { "epoch": 1.5394088669950738, "grad_norm": 10.02548837159482, "learning_rate": 5.650262641662367e-06, "loss": 2.0459697246551514, "step": 1250 }, { "epoch": 1.54064039408867, "grad_norm": 9.37570660013781, "learning_rate": 5.643156795047343e-06, "loss": 1.4485859870910645, "step": 1251 }, { "epoch": 1.541871921182266, "grad_norm": 7.685396722064439, "learning_rate": 5.6360496273331055e-06, "loss": 1.8672525882720947, "step": 1252 }, { "epoch": 1.543103448275862, "grad_norm": 10.04870984968868, "learning_rate": 5.628941153118388e-06, "loss": 1.4309324026107788, "step": 1253 }, { "epoch": 1.5443349753694582, "grad_norm": 8.68197237847592, "learning_rate": 5.621831387004603e-06, "loss": 1.8784745931625366, "step": 1254 }, { "epoch": 1.5455665024630543, "grad_norm": 13.277977807429252, "learning_rate": 5.6147203435958246e-06, "loss": 2.109992027282715, "step": 1255 }, { "epoch": 1.5467980295566504, "grad_norm": 12.972460738003901, "learning_rate": 5.607608037498742e-06, "loss": 1.5892071723937988, "step": 1256 }, { "epoch": 1.5480295566502464, "grad_norm": 13.365650986627243, "learning_rate": 5.600494483322643e-06, "loss": 1.3583379983901978, "step": 1257 }, { "epoch": 1.5492610837438425, "grad_norm": 20.27099102357665, "learning_rate": 5.593379695679378e-06, "loss": 2.126896381378174, "step": 1258 }, { "epoch": 1.5504926108374384, "grad_norm": 17.176572909103676, "learning_rate": 5.586263689183332e-06, "loss": 1.7454299926757812, "step": 1259 }, { "epoch": 1.5517241379310345, "grad_norm": 13.916773869762237, "learning_rate": 5.5791464784513905e-06, "loss": 1.1533763408660889, "step": 1260 }, { "epoch": 1.5529556650246306, "grad_norm": 7.929553367189426, "learning_rate": 5.572028078102917e-06, "loss": 1.4818049669265747, "step": 1261 }, { "epoch": 1.5541871921182266, "grad_norm": 10.401505556673449, "learning_rate": 5.564908502759714e-06, "loss": 1.7103283405303955, "step": 1262 }, { "epoch": 1.5554187192118225, "grad_norm": 9.47500952850124, "learning_rate": 5.557787767046001e-06, "loss": 2.1653401851654053, "step": 1263 }, { "epoch": 1.5566502463054186, "grad_norm": 11.53902942298552, "learning_rate": 5.55066588558838e-06, "loss": 1.3127275705337524, "step": 1264 }, { "epoch": 1.5578817733990147, "grad_norm": 16.55540616140196, "learning_rate": 5.543542873015806e-06, "loss": 1.0865871906280518, "step": 1265 }, { "epoch": 1.5591133004926108, "grad_norm": 11.513704169835737, "learning_rate": 5.536418743959559e-06, "loss": 1.341281533241272, "step": 1266 }, { "epoch": 1.5603448275862069, "grad_norm": 13.363897307451165, "learning_rate": 5.529293513053207e-06, "loss": 1.1612720489501953, "step": 1267 }, { "epoch": 1.561576354679803, "grad_norm": 8.231595025537441, "learning_rate": 5.522167194932588e-06, "loss": 1.7491642236709595, "step": 1268 }, { "epoch": 1.562807881773399, "grad_norm": 14.714195860173573, "learning_rate": 5.515039804235772e-06, "loss": 1.8244414329528809, "step": 1269 }, { "epoch": 1.564039408866995, "grad_norm": 14.369418745397832, "learning_rate": 5.50791135560303e-06, "loss": 1.6449997425079346, "step": 1270 }, { "epoch": 1.5652709359605912, "grad_norm": 10.791840038500066, "learning_rate": 5.5007818636768055e-06, "loss": 1.258559226989746, "step": 1271 }, { "epoch": 1.5665024630541873, "grad_norm": 12.265469895779276, "learning_rate": 5.493651343101686e-06, "loss": 2.075775146484375, "step": 1272 }, { "epoch": 1.5677339901477834, "grad_norm": 33.663491606092755, "learning_rate": 5.486519808524374e-06, "loss": 1.8196138143539429, "step": 1273 }, { "epoch": 1.5689655172413794, "grad_norm": 10.504622195873791, "learning_rate": 5.479387274593653e-06, "loss": 1.129037618637085, "step": 1274 }, { "epoch": 1.5701970443349755, "grad_norm": 10.887519946570082, "learning_rate": 5.472253755960358e-06, "loss": 1.7367748022079468, "step": 1275 }, { "epoch": 1.5714285714285714, "grad_norm": 9.127598313619417, "learning_rate": 5.4651192672773475e-06, "loss": 1.9274532794952393, "step": 1276 }, { "epoch": 1.5726600985221675, "grad_norm": 17.490821839529264, "learning_rate": 5.457983823199475e-06, "loss": 1.4018654823303223, "step": 1277 }, { "epoch": 1.5738916256157636, "grad_norm": 17.899672160499332, "learning_rate": 5.450847438383555e-06, "loss": 1.383131504058838, "step": 1278 }, { "epoch": 1.5751231527093597, "grad_norm": 6.595048027752494, "learning_rate": 5.443710127488331e-06, "loss": 1.277740716934204, "step": 1279 }, { "epoch": 1.5763546798029555, "grad_norm": 9.304406142462632, "learning_rate": 5.4365719051744556e-06, "loss": 1.507627010345459, "step": 1280 }, { "epoch": 1.5775862068965516, "grad_norm": 13.383687869982538, "learning_rate": 5.429432786104446e-06, "loss": 1.609743595123291, "step": 1281 }, { "epoch": 1.5788177339901477, "grad_norm": 14.966009265010456, "learning_rate": 5.422292784942666e-06, "loss": 3.7705276012420654, "step": 1282 }, { "epoch": 1.5800492610837438, "grad_norm": 8.997880163576188, "learning_rate": 5.415151916355292e-06, "loss": 1.5003160238265991, "step": 1283 }, { "epoch": 1.5812807881773399, "grad_norm": 9.476478190888859, "learning_rate": 5.408010195010278e-06, "loss": 2.2466366291046143, "step": 1284 }, { "epoch": 1.582512315270936, "grad_norm": 7.465134227448914, "learning_rate": 5.400867635577335e-06, "loss": 1.0722277164459229, "step": 1285 }, { "epoch": 1.583743842364532, "grad_norm": 13.942249242079209, "learning_rate": 5.3937242527278885e-06, "loss": 1.3113644123077393, "step": 1286 }, { "epoch": 1.5849753694581281, "grad_norm": 14.224147707467683, "learning_rate": 5.3865800611350634e-06, "loss": 1.4688694477081299, "step": 1287 }, { "epoch": 1.5862068965517242, "grad_norm": 9.648975936769988, "learning_rate": 5.379435075473641e-06, "loss": 1.3646764755249023, "step": 1288 }, { "epoch": 1.5874384236453203, "grad_norm": 8.753285038565833, "learning_rate": 5.372289310420032e-06, "loss": 1.6248177289962769, "step": 1289 }, { "epoch": 1.5886699507389164, "grad_norm": 9.773114583134893, "learning_rate": 5.365142780652255e-06, "loss": 1.5507471561431885, "step": 1290 }, { "epoch": 1.5899014778325125, "grad_norm": 8.752822975110762, "learning_rate": 5.35799550084989e-06, "loss": 1.2866086959838867, "step": 1291 }, { "epoch": 1.5911330049261085, "grad_norm": 10.021050170312028, "learning_rate": 5.350847485694067e-06, "loss": 2.336108684539795, "step": 1292 }, { "epoch": 1.5923645320197044, "grad_norm": 11.648640054355637, "learning_rate": 5.343698749867421e-06, "loss": 1.6604368686676025, "step": 1293 }, { "epoch": 1.5935960591133005, "grad_norm": 16.28378480699955, "learning_rate": 5.336549308054066e-06, "loss": 1.2169203758239746, "step": 1294 }, { "epoch": 1.5948275862068966, "grad_norm": 14.069009000417143, "learning_rate": 5.329399174939572e-06, "loss": 1.546027421951294, "step": 1295 }, { "epoch": 1.5960591133004927, "grad_norm": 9.646944240372145, "learning_rate": 5.3222483652109235e-06, "loss": 1.1372979879379272, "step": 1296 }, { "epoch": 1.5972906403940885, "grad_norm": 10.548510904543294, "learning_rate": 5.315096893556497e-06, "loss": 1.3435921669006348, "step": 1297 }, { "epoch": 1.5985221674876846, "grad_norm": 14.79008878560828, "learning_rate": 5.307944774666029e-06, "loss": 1.522647500038147, "step": 1298 }, { "epoch": 1.5997536945812807, "grad_norm": 17.912683434114346, "learning_rate": 5.300792023230587e-06, "loss": 2.0829434394836426, "step": 1299 }, { "epoch": 1.6009852216748768, "grad_norm": 8.420566897576393, "learning_rate": 5.2936386539425325e-06, "loss": 1.761828064918518, "step": 1300 }, { "epoch": 1.6022167487684729, "grad_norm": 14.83308627903251, "learning_rate": 5.2864846814955e-06, "loss": 2.4108588695526123, "step": 1301 }, { "epoch": 1.603448275862069, "grad_norm": 7.959651684795871, "learning_rate": 5.279330120584365e-06, "loss": 1.626701831817627, "step": 1302 }, { "epoch": 1.604679802955665, "grad_norm": 15.705970904875606, "learning_rate": 5.272174985905207e-06, "loss": 1.2424887418746948, "step": 1303 }, { "epoch": 1.6059113300492611, "grad_norm": 12.239359710615943, "learning_rate": 5.2650192921552845e-06, "loss": 2.149031639099121, "step": 1304 }, { "epoch": 1.6071428571428572, "grad_norm": 10.231856507403213, "learning_rate": 5.257863054033012e-06, "loss": 2.6947379112243652, "step": 1305 }, { "epoch": 1.6083743842364533, "grad_norm": 18.838018326977505, "learning_rate": 5.25070628623791e-06, "loss": 1.665069818496704, "step": 1306 }, { "epoch": 1.6096059113300494, "grad_norm": 14.325294673284358, "learning_rate": 5.243549003470599e-06, "loss": 1.3887734413146973, "step": 1307 }, { "epoch": 1.6108374384236455, "grad_norm": 11.840772011671689, "learning_rate": 5.236391220432745e-06, "loss": 1.340559720993042, "step": 1308 }, { "epoch": 1.6120689655172413, "grad_norm": 10.400173398296557, "learning_rate": 5.229232951827054e-06, "loss": 1.1291146278381348, "step": 1309 }, { "epoch": 1.6133004926108374, "grad_norm": 11.008129364503455, "learning_rate": 5.222074212357221e-06, "loss": 1.8375647068023682, "step": 1310 }, { "epoch": 1.6145320197044335, "grad_norm": 26.174008264121436, "learning_rate": 5.2149150167279106e-06, "loss": 1.3299870491027832, "step": 1311 }, { "epoch": 1.6157635467980296, "grad_norm": 9.874671943961642, "learning_rate": 5.2077553796447254e-06, "loss": 1.1574440002441406, "step": 1312 }, { "epoch": 1.6169950738916257, "grad_norm": 9.304756709434216, "learning_rate": 5.200595315814174e-06, "loss": 1.8118785619735718, "step": 1313 }, { "epoch": 1.6182266009852215, "grad_norm": 10.54430610217864, "learning_rate": 5.19343483994364e-06, "loss": 1.333923101425171, "step": 1314 }, { "epoch": 1.6194581280788176, "grad_norm": 8.365290613104223, "learning_rate": 5.18627396674136e-06, "loss": 1.2107478380203247, "step": 1315 }, { "epoch": 1.6206896551724137, "grad_norm": 11.934365489822259, "learning_rate": 5.1791127109163734e-06, "loss": 1.662817120552063, "step": 1316 }, { "epoch": 1.6219211822660098, "grad_norm": 11.66068657995672, "learning_rate": 5.17195108717852e-06, "loss": 1.7790195941925049, "step": 1317 }, { "epoch": 1.6231527093596059, "grad_norm": 15.883414066148024, "learning_rate": 5.164789110238387e-06, "loss": 1.5893058776855469, "step": 1318 }, { "epoch": 1.624384236453202, "grad_norm": 9.631844787083402, "learning_rate": 5.15762679480729e-06, "loss": 1.256395936012268, "step": 1319 }, { "epoch": 1.625615763546798, "grad_norm": 18.80096398191795, "learning_rate": 5.150464155597239e-06, "loss": 1.3061628341674805, "step": 1320 }, { "epoch": 1.6268472906403941, "grad_norm": 8.93680164244121, "learning_rate": 5.143301207320909e-06, "loss": 1.4399319887161255, "step": 1321 }, { "epoch": 1.6280788177339902, "grad_norm": 13.559338660465917, "learning_rate": 5.136137964691609e-06, "loss": 1.2071207761764526, "step": 1322 }, { "epoch": 1.6293103448275863, "grad_norm": 15.329093630080337, "learning_rate": 5.128974442423254e-06, "loss": 2.2784008979797363, "step": 1323 }, { "epoch": 1.6305418719211824, "grad_norm": 10.677223802578135, "learning_rate": 5.121810655230336e-06, "loss": 1.3703962564468384, "step": 1324 }, { "epoch": 1.6317733990147785, "grad_norm": 7.672085033643185, "learning_rate": 5.114646617827884e-06, "loss": 0.6955282688140869, "step": 1325 }, { "epoch": 1.6330049261083743, "grad_norm": 9.372418453872616, "learning_rate": 5.107482344931448e-06, "loss": 1.5774227380752563, "step": 1326 }, { "epoch": 1.6342364532019704, "grad_norm": 7.569882170382433, "learning_rate": 5.100317851257057e-06, "loss": 1.6811349391937256, "step": 1327 }, { "epoch": 1.6354679802955665, "grad_norm": 13.234466243138659, "learning_rate": 5.093153151521196e-06, "loss": 1.563596487045288, "step": 1328 }, { "epoch": 1.6366995073891626, "grad_norm": 13.317086470459271, "learning_rate": 5.085988260440776e-06, "loss": 1.44309401512146, "step": 1329 }, { "epoch": 1.6379310344827587, "grad_norm": 12.614583983426193, "learning_rate": 5.0788231927330924e-06, "loss": 1.5392205715179443, "step": 1330 }, { "epoch": 1.6391625615763545, "grad_norm": 19.688183928504156, "learning_rate": 5.0716579631158124e-06, "loss": 0.9557719826698303, "step": 1331 }, { "epoch": 1.6403940886699506, "grad_norm": 12.748000945416605, "learning_rate": 5.064492586306931e-06, "loss": 1.1032493114471436, "step": 1332 }, { "epoch": 1.6416256157635467, "grad_norm": 14.590229259835747, "learning_rate": 5.057327077024745e-06, "loss": 1.4907091856002808, "step": 1333 }, { "epoch": 1.6428571428571428, "grad_norm": 13.569513298786392, "learning_rate": 5.050161449987828e-06, "loss": 1.4919164180755615, "step": 1334 }, { "epoch": 1.6440886699507389, "grad_norm": 17.53788627610522, "learning_rate": 5.0429957199149905e-06, "loss": 2.177396297454834, "step": 1335 }, { "epoch": 1.645320197044335, "grad_norm": 9.011039030303097, "learning_rate": 5.035829901525258e-06, "loss": 1.2386332750320435, "step": 1336 }, { "epoch": 1.646551724137931, "grad_norm": 7.326320563707851, "learning_rate": 5.028664009537835e-06, "loss": 1.2984986305236816, "step": 1337 }, { "epoch": 1.6477832512315271, "grad_norm": 8.373461994458872, "learning_rate": 5.021498058672076e-06, "loss": 1.1399617195129395, "step": 1338 }, { "epoch": 1.6490147783251232, "grad_norm": 7.295316739226097, "learning_rate": 5.014332063647462e-06, "loss": 1.9816789627075195, "step": 1339 }, { "epoch": 1.6502463054187193, "grad_norm": 7.86464342129843, "learning_rate": 5.007166039183561e-06, "loss": 1.4210541248321533, "step": 1340 }, { "epoch": 1.6514778325123154, "grad_norm": 12.713637168049194, "learning_rate": 5e-06, "loss": 1.5061390399932861, "step": 1341 }, { "epoch": 1.6527093596059115, "grad_norm": 8.899156333262312, "learning_rate": 4.99283396081644e-06, "loss": 1.4701118469238281, "step": 1342 }, { "epoch": 1.6539408866995073, "grad_norm": 10.54571567541005, "learning_rate": 4.985667936352538e-06, "loss": 1.4879779815673828, "step": 1343 }, { "epoch": 1.6551724137931034, "grad_norm": 10.432279538827562, "learning_rate": 4.978501941327926e-06, "loss": 1.51373291015625, "step": 1344 }, { "epoch": 1.6564039408866995, "grad_norm": 7.981064947021898, "learning_rate": 4.971335990462168e-06, "loss": 1.5439019203186035, "step": 1345 }, { "epoch": 1.6576354679802956, "grad_norm": 14.863181962691362, "learning_rate": 4.964170098474744e-06, "loss": 1.7145721912384033, "step": 1346 }, { "epoch": 1.6588669950738915, "grad_norm": 7.816226303611453, "learning_rate": 4.95700428008501e-06, "loss": 1.6367833614349365, "step": 1347 }, { "epoch": 1.6600985221674875, "grad_norm": 12.087333147554537, "learning_rate": 4.949838550012172e-06, "loss": 1.4300103187561035, "step": 1348 }, { "epoch": 1.6613300492610836, "grad_norm": 6.881924405292677, "learning_rate": 4.942672922975255e-06, "loss": 2.0569915771484375, "step": 1349 }, { "epoch": 1.6625615763546797, "grad_norm": 15.296469591183284, "learning_rate": 4.935507413693071e-06, "loss": 1.1028980016708374, "step": 1350 }, { "epoch": 1.6637931034482758, "grad_norm": 9.201861102909985, "learning_rate": 4.928342036884189e-06, "loss": 1.6323003768920898, "step": 1351 }, { "epoch": 1.6650246305418719, "grad_norm": 10.996157407203105, "learning_rate": 4.921176807266909e-06, "loss": 1.5050472021102905, "step": 1352 }, { "epoch": 1.666256157635468, "grad_norm": 17.127722044101333, "learning_rate": 4.914011739559225e-06, "loss": 1.3893849849700928, "step": 1353 }, { "epoch": 1.667487684729064, "grad_norm": 13.548169676262727, "learning_rate": 4.906846848478803e-06, "loss": 1.1478514671325684, "step": 1354 }, { "epoch": 1.6687192118226601, "grad_norm": 16.337726396970115, "learning_rate": 4.899682148742944e-06, "loss": 1.2397665977478027, "step": 1355 }, { "epoch": 1.6699507389162562, "grad_norm": 8.122019629920894, "learning_rate": 4.892517655068555e-06, "loss": 1.1658974885940552, "step": 1356 }, { "epoch": 1.6711822660098523, "grad_norm": 10.105771734426996, "learning_rate": 4.8853533821721175e-06, "loss": 1.7130283117294312, "step": 1357 }, { "epoch": 1.6724137931034484, "grad_norm": 10.758386009234124, "learning_rate": 4.878189344769666e-06, "loss": 0.9516315460205078, "step": 1358 }, { "epoch": 1.6736453201970445, "grad_norm": 11.103808898671073, "learning_rate": 4.871025557576747e-06, "loss": 1.143174171447754, "step": 1359 }, { "epoch": 1.6748768472906403, "grad_norm": 11.525961008953772, "learning_rate": 4.863862035308392e-06, "loss": 1.7117831707000732, "step": 1360 }, { "epoch": 1.6761083743842364, "grad_norm": 17.64687941795743, "learning_rate": 4.8566987926790946e-06, "loss": 2.507868528366089, "step": 1361 }, { "epoch": 1.6773399014778325, "grad_norm": 9.376137745201675, "learning_rate": 4.849535844402762e-06, "loss": 1.476400375366211, "step": 1362 }, { "epoch": 1.6785714285714286, "grad_norm": 8.721089378493017, "learning_rate": 4.8423732051927115e-06, "loss": 1.3162943124771118, "step": 1363 }, { "epoch": 1.6798029556650245, "grad_norm": 10.422911150427735, "learning_rate": 4.835210889761614e-06, "loss": 2.2291440963745117, "step": 1364 }, { "epoch": 1.6810344827586206, "grad_norm": 9.602624562609396, "learning_rate": 4.82804891282148e-06, "loss": 1.2231886386871338, "step": 1365 }, { "epoch": 1.6822660098522166, "grad_norm": 14.076238439157445, "learning_rate": 4.820887289083629e-06, "loss": 1.3799304962158203, "step": 1366 }, { "epoch": 1.6834975369458127, "grad_norm": 15.54796648321669, "learning_rate": 4.813726033258643e-06, "loss": 1.856811761856079, "step": 1367 }, { "epoch": 1.6847290640394088, "grad_norm": 9.64062645814171, "learning_rate": 4.80656516005636e-06, "loss": 1.5948967933654785, "step": 1368 }, { "epoch": 1.685960591133005, "grad_norm": 13.962004352631022, "learning_rate": 4.799404684185828e-06, "loss": 1.5035887956619263, "step": 1369 }, { "epoch": 1.687192118226601, "grad_norm": 11.27741103317867, "learning_rate": 4.792244620355275e-06, "loss": 1.4715675115585327, "step": 1370 }, { "epoch": 1.688423645320197, "grad_norm": 15.373869655729267, "learning_rate": 4.78508498327209e-06, "loss": 1.393894076347351, "step": 1371 }, { "epoch": 1.6896551724137931, "grad_norm": 12.537169523242483, "learning_rate": 4.777925787642781e-06, "loss": 1.8458061218261719, "step": 1372 }, { "epoch": 1.6908866995073892, "grad_norm": 12.62635000347042, "learning_rate": 4.770767048172948e-06, "loss": 1.0604429244995117, "step": 1373 }, { "epoch": 1.6921182266009853, "grad_norm": 10.74648464318841, "learning_rate": 4.7636087795672565e-06, "loss": 1.3261964321136475, "step": 1374 }, { "epoch": 1.6933497536945814, "grad_norm": 9.576848082824501, "learning_rate": 4.756450996529403e-06, "loss": 1.6243900060653687, "step": 1375 }, { "epoch": 1.6945812807881775, "grad_norm": 13.575969601291865, "learning_rate": 4.749293713762091e-06, "loss": 1.8087639808654785, "step": 1376 }, { "epoch": 1.6958128078817734, "grad_norm": 8.48685992922433, "learning_rate": 4.742136945966991e-06, "loss": 1.9180892705917358, "step": 1377 }, { "epoch": 1.6970443349753694, "grad_norm": 12.706829097920151, "learning_rate": 4.734980707844716e-06, "loss": 1.6797364950180054, "step": 1378 }, { "epoch": 1.6982758620689655, "grad_norm": 10.281614379219002, "learning_rate": 4.727825014094795e-06, "loss": 0.9649052023887634, "step": 1379 }, { "epoch": 1.6995073891625616, "grad_norm": 7.785652444986331, "learning_rate": 4.720669879415637e-06, "loss": 1.4185916185379028, "step": 1380 }, { "epoch": 1.7007389162561575, "grad_norm": 10.73836489858494, "learning_rate": 4.713515318504501e-06, "loss": 1.8681238889694214, "step": 1381 }, { "epoch": 1.7019704433497536, "grad_norm": 9.950804244952993, "learning_rate": 4.706361346057468e-06, "loss": 1.2830915451049805, "step": 1382 }, { "epoch": 1.7032019704433496, "grad_norm": 18.988866497939586, "learning_rate": 4.699207976769416e-06, "loss": 1.0888878107070923, "step": 1383 }, { "epoch": 1.7044334975369457, "grad_norm": 12.689992799691533, "learning_rate": 4.692055225333972e-06, "loss": 1.4439440965652466, "step": 1384 }, { "epoch": 1.7056650246305418, "grad_norm": 7.183191439849756, "learning_rate": 4.684903106443504e-06, "loss": 1.0282858610153198, "step": 1385 }, { "epoch": 1.706896551724138, "grad_norm": 13.261845343202891, "learning_rate": 4.677751634789078e-06, "loss": 1.6842533349990845, "step": 1386 }, { "epoch": 1.708128078817734, "grad_norm": 14.612290761713947, "learning_rate": 4.670600825060429e-06, "loss": 1.5473763942718506, "step": 1387 }, { "epoch": 1.70935960591133, "grad_norm": 19.73106165634469, "learning_rate": 4.663450691945936e-06, "loss": 1.839112401008606, "step": 1388 }, { "epoch": 1.7105911330049262, "grad_norm": 10.917539579247505, "learning_rate": 4.656301250132581e-06, "loss": 1.5349544286727905, "step": 1389 }, { "epoch": 1.7118226600985222, "grad_norm": 11.132766984186494, "learning_rate": 4.649152514305934e-06, "loss": 1.5788905620574951, "step": 1390 }, { "epoch": 1.7130541871921183, "grad_norm": 10.21681078103426, "learning_rate": 4.6420044991501104e-06, "loss": 1.4541325569152832, "step": 1391 }, { "epoch": 1.7142857142857144, "grad_norm": 9.227689699191664, "learning_rate": 4.634857219347746e-06, "loss": 1.8231902122497559, "step": 1392 }, { "epoch": 1.7155172413793105, "grad_norm": 10.500866364265818, "learning_rate": 4.627710689579968e-06, "loss": 1.6302368640899658, "step": 1393 }, { "epoch": 1.7167487684729064, "grad_norm": 17.60594188273056, "learning_rate": 4.62056492452636e-06, "loss": 1.497374415397644, "step": 1394 }, { "epoch": 1.7179802955665024, "grad_norm": 15.287585545597818, "learning_rate": 4.613419938864937e-06, "loss": 1.1390448808670044, "step": 1395 }, { "epoch": 1.7192118226600985, "grad_norm": 10.328419466218456, "learning_rate": 4.606275747272112e-06, "loss": 1.4320652484893799, "step": 1396 }, { "epoch": 1.7204433497536946, "grad_norm": 9.176084187845012, "learning_rate": 4.599132364422666e-06, "loss": 1.2651784420013428, "step": 1397 }, { "epoch": 1.7216748768472905, "grad_norm": 15.836729193949362, "learning_rate": 4.5919898049897225e-06, "loss": 1.719766616821289, "step": 1398 }, { "epoch": 1.7229064039408866, "grad_norm": 12.937422715545681, "learning_rate": 4.58484808364471e-06, "loss": 1.707594394683838, "step": 1399 }, { "epoch": 1.7241379310344827, "grad_norm": 14.730027238842638, "learning_rate": 4.5777072150573355e-06, "loss": 1.4608323574066162, "step": 1400 }, { "epoch": 1.7253694581280787, "grad_norm": 9.894706364799527, "learning_rate": 4.570567213895555e-06, "loss": 1.5542428493499756, "step": 1401 }, { "epoch": 1.7266009852216748, "grad_norm": 10.251938635324704, "learning_rate": 4.563428094825546e-06, "loss": 1.2282288074493408, "step": 1402 }, { "epoch": 1.727832512315271, "grad_norm": 12.91095594163412, "learning_rate": 4.556289872511669e-06, "loss": 1.1870850324630737, "step": 1403 }, { "epoch": 1.729064039408867, "grad_norm": 19.656749282746095, "learning_rate": 4.549152561616445e-06, "loss": 1.8125461339950562, "step": 1404 }, { "epoch": 1.730295566502463, "grad_norm": 13.055834351152246, "learning_rate": 4.542016176800527e-06, "loss": 1.4419995546340942, "step": 1405 }, { "epoch": 1.7315270935960592, "grad_norm": 12.427293973832745, "learning_rate": 4.534880732722653e-06, "loss": 1.8834543228149414, "step": 1406 }, { "epoch": 1.7327586206896552, "grad_norm": 9.308568400780414, "learning_rate": 4.527746244039644e-06, "loss": 1.120203971862793, "step": 1407 }, { "epoch": 1.7339901477832513, "grad_norm": 10.965136861668267, "learning_rate": 4.5206127254063495e-06, "loss": 0.9131630659103394, "step": 1408 }, { "epoch": 1.7352216748768474, "grad_norm": 18.40693337146411, "learning_rate": 4.513480191475627e-06, "loss": 1.86919367313385, "step": 1409 }, { "epoch": 1.7364532019704435, "grad_norm": 16.72423206220796, "learning_rate": 4.506348656898316e-06, "loss": 1.6573272943496704, "step": 1410 }, { "epoch": 1.7376847290640394, "grad_norm": 12.29145112798753, "learning_rate": 4.499218136323197e-06, "loss": 1.2864340543746948, "step": 1411 }, { "epoch": 1.7389162561576355, "grad_norm": 9.205794418080544, "learning_rate": 4.492088644396972e-06, "loss": 1.5519993305206299, "step": 1412 }, { "epoch": 1.7401477832512315, "grad_norm": 10.304423144578244, "learning_rate": 4.4849601957642295e-06, "loss": 1.7556722164154053, "step": 1413 }, { "epoch": 1.7413793103448276, "grad_norm": 12.170127229505125, "learning_rate": 4.477832805067412e-06, "loss": 1.6349589824676514, "step": 1414 }, { "epoch": 1.7426108374384235, "grad_norm": 18.04544459439354, "learning_rate": 4.470706486946797e-06, "loss": 1.3583035469055176, "step": 1415 }, { "epoch": 1.7438423645320196, "grad_norm": 16.035788014412844, "learning_rate": 4.463581256040445e-06, "loss": 1.5367932319641113, "step": 1416 }, { "epoch": 1.7450738916256157, "grad_norm": 10.971734568897116, "learning_rate": 4.456457126984196e-06, "loss": 1.5078128576278687, "step": 1417 }, { "epoch": 1.7463054187192117, "grad_norm": 8.435567334501869, "learning_rate": 4.449334114411622e-06, "loss": 1.8653573989868164, "step": 1418 }, { "epoch": 1.7475369458128078, "grad_norm": 11.511023238806931, "learning_rate": 4.4422122329539996e-06, "loss": 1.1381313800811768, "step": 1419 }, { "epoch": 1.748768472906404, "grad_norm": 9.115530827164923, "learning_rate": 4.435091497240287e-06, "loss": 1.4135184288024902, "step": 1420 }, { "epoch": 1.75, "grad_norm": 19.148242044300115, "learning_rate": 4.427971921897086e-06, "loss": 1.2186479568481445, "step": 1421 }, { "epoch": 1.751231527093596, "grad_norm": 11.735225834432583, "learning_rate": 4.420853521548611e-06, "loss": 1.3139259815216064, "step": 1422 }, { "epoch": 1.7524630541871922, "grad_norm": 9.908228964820347, "learning_rate": 4.413736310816669e-06, "loss": 2.0143887996673584, "step": 1423 }, { "epoch": 1.7536945812807883, "grad_norm": 11.72709904223931, "learning_rate": 4.4066203043206226e-06, "loss": 1.5800344944000244, "step": 1424 }, { "epoch": 1.7549261083743843, "grad_norm": 13.351525970289408, "learning_rate": 4.399505516677358e-06, "loss": 1.449183702468872, "step": 1425 }, { "epoch": 1.7561576354679804, "grad_norm": 14.449460918267059, "learning_rate": 4.3923919625012605e-06, "loss": 0.6957097053527832, "step": 1426 }, { "epoch": 1.7573891625615765, "grad_norm": 16.656517142384814, "learning_rate": 4.385279656404178e-06, "loss": 1.0665647983551025, "step": 1427 }, { "epoch": 1.7586206896551724, "grad_norm": 8.728452405950277, "learning_rate": 4.3781686129953975e-06, "loss": 1.2771016359329224, "step": 1428 }, { "epoch": 1.7598522167487685, "grad_norm": 9.380843658329356, "learning_rate": 4.371058846881614e-06, "loss": 1.4222235679626465, "step": 1429 }, { "epoch": 1.7610837438423645, "grad_norm": 18.6167744042239, "learning_rate": 4.363950372666896e-06, "loss": 2.1237497329711914, "step": 1430 }, { "epoch": 1.7623152709359606, "grad_norm": 15.81534835320748, "learning_rate": 4.356843204952657e-06, "loss": 1.3875718116760254, "step": 1431 }, { "epoch": 1.7635467980295565, "grad_norm": 11.325736932128727, "learning_rate": 4.349737358337635e-06, "loss": 1.2585203647613525, "step": 1432 }, { "epoch": 1.7647783251231526, "grad_norm": 10.890833810787267, "learning_rate": 4.3426328474178405e-06, "loss": 1.3183746337890625, "step": 1433 }, { "epoch": 1.7660098522167487, "grad_norm": 11.455742000334912, "learning_rate": 4.335529686786554e-06, "loss": 1.7174941301345825, "step": 1434 }, { "epoch": 1.7672413793103448, "grad_norm": 9.946830568051285, "learning_rate": 4.328427891034273e-06, "loss": 1.9503614902496338, "step": 1435 }, { "epoch": 1.7684729064039408, "grad_norm": 13.787149559571247, "learning_rate": 4.321327474748697e-06, "loss": 1.3797223567962646, "step": 1436 }, { "epoch": 1.769704433497537, "grad_norm": 14.935693009519694, "learning_rate": 4.3142284525146915e-06, "loss": 1.4113730192184448, "step": 1437 }, { "epoch": 1.770935960591133, "grad_norm": 11.978351079391912, "learning_rate": 4.307130838914252e-06, "loss": 2.383976697921753, "step": 1438 }, { "epoch": 1.772167487684729, "grad_norm": 10.033247535379967, "learning_rate": 4.300034648526489e-06, "loss": 1.7687448263168335, "step": 1439 }, { "epoch": 1.7733990147783252, "grad_norm": 15.25338664216219, "learning_rate": 4.292939895927587e-06, "loss": 1.5130079984664917, "step": 1440 }, { "epoch": 1.7746305418719213, "grad_norm": 16.671641040457516, "learning_rate": 4.2858465956907726e-06, "loss": 1.0863475799560547, "step": 1441 }, { "epoch": 1.7758620689655173, "grad_norm": 21.777249707868723, "learning_rate": 4.278754762386297e-06, "loss": 1.1504137516021729, "step": 1442 }, { "epoch": 1.7770935960591134, "grad_norm": 10.960123964926488, "learning_rate": 4.271664410581392e-06, "loss": 1.1227596998214722, "step": 1443 }, { "epoch": 1.7783251231527095, "grad_norm": 10.668478758892386, "learning_rate": 4.264575554840248e-06, "loss": 1.4501817226409912, "step": 1444 }, { "epoch": 1.7795566502463054, "grad_norm": 8.508770946365994, "learning_rate": 4.257488209723981e-06, "loss": 0.48442721366882324, "step": 1445 }, { "epoch": 1.7807881773399015, "grad_norm": 19.774025943442037, "learning_rate": 4.25040238979061e-06, "loss": 1.218263864517212, "step": 1446 }, { "epoch": 1.7820197044334976, "grad_norm": 11.107941835251008, "learning_rate": 4.243318109595014e-06, "loss": 1.1711516380310059, "step": 1447 }, { "epoch": 1.7832512315270936, "grad_norm": 14.393581709964357, "learning_rate": 4.2362353836889126e-06, "loss": 1.3575153350830078, "step": 1448 }, { "epoch": 1.7844827586206895, "grad_norm": 15.514668018354685, "learning_rate": 4.229154226620832e-06, "loss": 2.6967573165893555, "step": 1449 }, { "epoch": 1.7857142857142856, "grad_norm": 16.398555290477788, "learning_rate": 4.2220746529360745e-06, "loss": 2.2812700271606445, "step": 1450 }, { "epoch": 1.7869458128078817, "grad_norm": 7.44372678737394, "learning_rate": 4.2149966771766945e-06, "loss": 1.2746225595474243, "step": 1451 }, { "epoch": 1.7881773399014778, "grad_norm": 24.76309740203676, "learning_rate": 4.207920313881459e-06, "loss": 1.4866999387741089, "step": 1452 }, { "epoch": 1.7894088669950738, "grad_norm": 12.129429402231283, "learning_rate": 4.200845577585827e-06, "loss": 1.4830021858215332, "step": 1453 }, { "epoch": 1.79064039408867, "grad_norm": 14.927464924948287, "learning_rate": 4.193772482821914e-06, "loss": 2.5529747009277344, "step": 1454 }, { "epoch": 1.791871921182266, "grad_norm": 10.342903175989482, "learning_rate": 4.186701044118459e-06, "loss": 1.413874626159668, "step": 1455 }, { "epoch": 1.793103448275862, "grad_norm": 25.730295260232445, "learning_rate": 4.179631276000807e-06, "loss": 2.1567163467407227, "step": 1456 }, { "epoch": 1.7943349753694582, "grad_norm": 30.70195031797357, "learning_rate": 4.1725631929908684e-06, "loss": 1.851858139038086, "step": 1457 }, { "epoch": 1.7955665024630543, "grad_norm": 15.74317099171368, "learning_rate": 4.165496809607089e-06, "loss": 1.2765101194381714, "step": 1458 }, { "epoch": 1.7967980295566504, "grad_norm": 10.995413854030392, "learning_rate": 4.158432140364431e-06, "loss": 1.9869401454925537, "step": 1459 }, { "epoch": 1.7980295566502464, "grad_norm": 14.263851286153963, "learning_rate": 4.151369199774325e-06, "loss": 1.5319430828094482, "step": 1460 }, { "epoch": 1.7992610837438425, "grad_norm": 10.506976676212952, "learning_rate": 4.1443080023446605e-06, "loss": 1.487468957901001, "step": 1461 }, { "epoch": 1.8004926108374384, "grad_norm": 23.04137362584248, "learning_rate": 4.137248562579742e-06, "loss": 1.6152423620224, "step": 1462 }, { "epoch": 1.8017241379310345, "grad_norm": 8.431434363474125, "learning_rate": 4.130190894980262e-06, "loss": 1.5262070894241333, "step": 1463 }, { "epoch": 1.8029556650246306, "grad_norm": 9.129193697661835, "learning_rate": 4.123135014043279e-06, "loss": 1.6697289943695068, "step": 1464 }, { "epoch": 1.8041871921182266, "grad_norm": 14.310350877734502, "learning_rate": 4.116080934262175e-06, "loss": 1.470789909362793, "step": 1465 }, { "epoch": 1.8054187192118225, "grad_norm": 10.462627135626132, "learning_rate": 4.109028670126635e-06, "loss": 1.62421715259552, "step": 1466 }, { "epoch": 1.8066502463054186, "grad_norm": 9.463272161807932, "learning_rate": 4.101978236122613e-06, "loss": 2.1249561309814453, "step": 1467 }, { "epoch": 1.8078817733990147, "grad_norm": 10.291280772031216, "learning_rate": 4.094929646732309e-06, "loss": 1.3368217945098877, "step": 1468 }, { "epoch": 1.8091133004926108, "grad_norm": 13.897028873169491, "learning_rate": 4.087882916434126e-06, "loss": 0.8684915900230408, "step": 1469 }, { "epoch": 1.8103448275862069, "grad_norm": 9.114980502172534, "learning_rate": 4.080838059702656e-06, "loss": 1.6997764110565186, "step": 1470 }, { "epoch": 1.811576354679803, "grad_norm": 15.00723435129453, "learning_rate": 4.0737950910086354e-06, "loss": 0.8933043479919434, "step": 1471 }, { "epoch": 1.812807881773399, "grad_norm": 8.849165431721978, "learning_rate": 4.0667540248189265e-06, "loss": 1.689558982849121, "step": 1472 }, { "epoch": 1.814039408866995, "grad_norm": 8.28022241305891, "learning_rate": 4.059714875596486e-06, "loss": 1.797630786895752, "step": 1473 }, { "epoch": 1.8152709359605912, "grad_norm": 8.44088037241126, "learning_rate": 4.052677657800327e-06, "loss": 2.023120164871216, "step": 1474 }, { "epoch": 1.8165024630541873, "grad_norm": 13.31766346957086, "learning_rate": 4.045642385885497e-06, "loss": 1.5412349700927734, "step": 1475 }, { "epoch": 1.8177339901477834, "grad_norm": 11.713991741569846, "learning_rate": 4.038609074303055e-06, "loss": 0.786411464214325, "step": 1476 }, { "epoch": 1.8189655172413794, "grad_norm": 12.300017528117012, "learning_rate": 4.0315777375000185e-06, "loss": 1.3470659255981445, "step": 1477 }, { "epoch": 1.8201970443349755, "grad_norm": 10.149728213380525, "learning_rate": 4.02454838991936e-06, "loss": 1.3983774185180664, "step": 1478 }, { "epoch": 1.8214285714285714, "grad_norm": 8.907879387840488, "learning_rate": 4.017521045999961e-06, "loss": 1.9945271015167236, "step": 1479 }, { "epoch": 1.8226600985221675, "grad_norm": 14.485464092551117, "learning_rate": 4.0104957201765874e-06, "loss": 1.6103991270065308, "step": 1480 }, { "epoch": 1.8238916256157636, "grad_norm": 10.17521459795804, "learning_rate": 4.003472426879866e-06, "loss": 1.2794644832611084, "step": 1481 }, { "epoch": 1.8251231527093597, "grad_norm": 12.76602401465421, "learning_rate": 3.996451180536237e-06, "loss": 1.4485671520233154, "step": 1482 }, { "epoch": 1.8263546798029555, "grad_norm": 10.794290467835673, "learning_rate": 3.989431995567947e-06, "loss": 1.1264885663986206, "step": 1483 }, { "epoch": 1.8275862068965516, "grad_norm": 9.866085409894106, "learning_rate": 3.982414886393002e-06, "loss": 1.7849301099777222, "step": 1484 }, { "epoch": 1.8288177339901477, "grad_norm": 12.201702589426084, "learning_rate": 3.975399867425146e-06, "loss": 2.4955849647521973, "step": 1485 }, { "epoch": 1.8300492610837438, "grad_norm": 9.102568432625791, "learning_rate": 3.96838695307383e-06, "loss": 1.3440265655517578, "step": 1486 }, { "epoch": 1.8312807881773399, "grad_norm": 8.145548979456889, "learning_rate": 3.961376157744183e-06, "loss": 1.7565090656280518, "step": 1487 }, { "epoch": 1.832512315270936, "grad_norm": 10.525904376218351, "learning_rate": 3.954367495836978e-06, "loss": 2.086646318435669, "step": 1488 }, { "epoch": 1.833743842364532, "grad_norm": 11.110223461103494, "learning_rate": 3.947360981748607e-06, "loss": 2.0356874465942383, "step": 1489 }, { "epoch": 1.8349753694581281, "grad_norm": 18.648426152647907, "learning_rate": 3.940356629871051e-06, "loss": 1.3129501342773438, "step": 1490 }, { "epoch": 1.8362068965517242, "grad_norm": 9.730568476467749, "learning_rate": 3.933354454591851e-06, "loss": 1.468184471130371, "step": 1491 }, { "epoch": 1.8374384236453203, "grad_norm": 11.185413004826554, "learning_rate": 3.926354470294077e-06, "loss": 1.4110320806503296, "step": 1492 }, { "epoch": 1.8386699507389164, "grad_norm": 12.98897769174535, "learning_rate": 3.9193566913562915e-06, "loss": 1.0595703125, "step": 1493 }, { "epoch": 1.8399014778325125, "grad_norm": 10.530840377449582, "learning_rate": 3.912361132152537e-06, "loss": 1.628462791442871, "step": 1494 }, { "epoch": 1.8411330049261085, "grad_norm": 14.948049661995398, "learning_rate": 3.9053678070522904e-06, "loss": 1.3903121948242188, "step": 1495 }, { "epoch": 1.8423645320197044, "grad_norm": 9.309801488918017, "learning_rate": 3.898376730420442e-06, "loss": 1.6935603618621826, "step": 1496 }, { "epoch": 1.8435960591133005, "grad_norm": 12.543386647265335, "learning_rate": 3.891387916617261e-06, "loss": 1.2785383462905884, "step": 1497 }, { "epoch": 1.8448275862068966, "grad_norm": 16.302631057977127, "learning_rate": 3.884401379998375e-06, "loss": 0.9488393068313599, "step": 1498 }, { "epoch": 1.8460591133004927, "grad_norm": 13.324215983939714, "learning_rate": 3.877417134914724e-06, "loss": 1.7822269201278687, "step": 1499 }, { "epoch": 1.8472906403940885, "grad_norm": 18.86267601616338, "learning_rate": 3.870435195712547e-06, "loss": 2.0112462043762207, "step": 1500 }, { "epoch": 1.8485221674876846, "grad_norm": 9.69652966834403, "learning_rate": 3.863455576733349e-06, "loss": 1.3558632135391235, "step": 1501 }, { "epoch": 1.8497536945812807, "grad_norm": 11.295411751598015, "learning_rate": 3.856478292313864e-06, "loss": 1.34049391746521, "step": 1502 }, { "epoch": 1.8509852216748768, "grad_norm": 14.146066291430358, "learning_rate": 3.849503356786034e-06, "loss": 1.5048649311065674, "step": 1503 }, { "epoch": 1.8522167487684729, "grad_norm": 15.401780869737596, "learning_rate": 3.842530784476971e-06, "loss": 1.595820426940918, "step": 1504 }, { "epoch": 1.853448275862069, "grad_norm": 14.910425010360937, "learning_rate": 3.83556058970894e-06, "loss": 1.4003782272338867, "step": 1505 }, { "epoch": 1.854679802955665, "grad_norm": 7.9611824961674476, "learning_rate": 3.828592786799318e-06, "loss": 1.6082279682159424, "step": 1506 }, { "epoch": 1.8559113300492611, "grad_norm": 10.255592390028927, "learning_rate": 3.821627390060568e-06, "loss": 1.7311087846755981, "step": 1507 }, { "epoch": 1.8571428571428572, "grad_norm": 12.058780526558753, "learning_rate": 3.8146644138002154e-06, "loss": 1.2369680404663086, "step": 1508 }, { "epoch": 1.8583743842364533, "grad_norm": 19.050247314658538, "learning_rate": 3.807703872320809e-06, "loss": 0.8267203569412231, "step": 1509 }, { "epoch": 1.8596059113300494, "grad_norm": 10.351521057178017, "learning_rate": 3.8007457799198977e-06, "loss": 1.310041904449463, "step": 1510 }, { "epoch": 1.8608374384236455, "grad_norm": 10.657442856658305, "learning_rate": 3.79379015089e-06, "loss": 1.483811378479004, "step": 1511 }, { "epoch": 1.8620689655172413, "grad_norm": 11.888669790205059, "learning_rate": 3.7868369995185734e-06, "loss": 1.7339284420013428, "step": 1512 }, { "epoch": 1.8633004926108374, "grad_norm": 10.593168183344854, "learning_rate": 3.7798863400879894e-06, "loss": 0.8915985822677612, "step": 1513 }, { "epoch": 1.8645320197044335, "grad_norm": 10.734489115549072, "learning_rate": 3.7729381868754985e-06, "loss": 2.3413619995117188, "step": 1514 }, { "epoch": 1.8657635467980296, "grad_norm": 9.967376867351366, "learning_rate": 3.7659925541532006e-06, "loss": 1.422214388847351, "step": 1515 }, { "epoch": 1.8669950738916257, "grad_norm": 9.453365529159266, "learning_rate": 3.759049456188022e-06, "loss": 1.435701847076416, "step": 1516 }, { "epoch": 1.8682266009852215, "grad_norm": 13.939960554468646, "learning_rate": 3.752108907241682e-06, "loss": 1.0702649354934692, "step": 1517 }, { "epoch": 1.8694581280788176, "grad_norm": 14.375834204057075, "learning_rate": 3.7451709215706643e-06, "loss": 1.3625175952911377, "step": 1518 }, { "epoch": 1.8706896551724137, "grad_norm": 14.38912976471083, "learning_rate": 3.738235513426184e-06, "loss": 0.6707335710525513, "step": 1519 }, { "epoch": 1.8719211822660098, "grad_norm": 6.68307140655082, "learning_rate": 3.7313026970541687e-06, "loss": 0.9573410749435425, "step": 1520 }, { "epoch": 1.8731527093596059, "grad_norm": 8.282620378739653, "learning_rate": 3.7243724866952114e-06, "loss": 1.625769853591919, "step": 1521 }, { "epoch": 1.874384236453202, "grad_norm": 12.4684771792282, "learning_rate": 3.717444896584562e-06, "loss": 1.2327096462249756, "step": 1522 }, { "epoch": 1.875615763546798, "grad_norm": 13.733071586817578, "learning_rate": 3.710519940952085e-06, "loss": 1.9436770677566528, "step": 1523 }, { "epoch": 1.8768472906403941, "grad_norm": 11.428790282383929, "learning_rate": 3.703597634022232e-06, "loss": 1.260964274406433, "step": 1524 }, { "epoch": 1.8780788177339902, "grad_norm": 10.74418094547702, "learning_rate": 3.6966779900140193e-06, "loss": 0.9448941946029663, "step": 1525 }, { "epoch": 1.8793103448275863, "grad_norm": 14.784266967626037, "learning_rate": 3.689761023140981e-06, "loss": 1.0470240116119385, "step": 1526 }, { "epoch": 1.8805418719211824, "grad_norm": 12.626289871406675, "learning_rate": 3.6828467476111664e-06, "loss": 1.290519118309021, "step": 1527 }, { "epoch": 1.8817733990147785, "grad_norm": 8.368189133022403, "learning_rate": 3.675935177627088e-06, "loss": 1.6617997884750366, "step": 1528 }, { "epoch": 1.8830049261083743, "grad_norm": 22.331563820583295, "learning_rate": 3.6690263273857035e-06, "loss": 2.624133825302124, "step": 1529 }, { "epoch": 1.8842364532019704, "grad_norm": 11.125845605261798, "learning_rate": 3.662120211078385e-06, "loss": 1.189339518547058, "step": 1530 }, { "epoch": 1.8854679802955665, "grad_norm": 11.063623504952298, "learning_rate": 3.6552168428908886e-06, "loss": 1.2045223712921143, "step": 1531 }, { "epoch": 1.8866995073891626, "grad_norm": 21.05973901513674, "learning_rate": 3.648316237003321e-06, "loss": 1.4260770082473755, "step": 1532 }, { "epoch": 1.8879310344827587, "grad_norm": 9.70528654459795, "learning_rate": 3.6414184075901206e-06, "loss": 1.1973135471343994, "step": 1533 }, { "epoch": 1.8891625615763545, "grad_norm": 18.383885319550775, "learning_rate": 3.6345233688200195e-06, "loss": 1.4474105834960938, "step": 1534 }, { "epoch": 1.8903940886699506, "grad_norm": 9.565993696711384, "learning_rate": 3.62763113485602e-06, "loss": 1.5732392072677612, "step": 1535 }, { "epoch": 1.8916256157635467, "grad_norm": 18.830417927799424, "learning_rate": 3.6207417198553624e-06, "loss": 1.992612361907959, "step": 1536 }, { "epoch": 1.8928571428571428, "grad_norm": 8.528733872408509, "learning_rate": 3.6138551379694936e-06, "loss": 1.8015589714050293, "step": 1537 }, { "epoch": 1.8940886699507389, "grad_norm": 20.045548838222032, "learning_rate": 3.606971403344044e-06, "loss": 1.1887943744659424, "step": 1538 }, { "epoch": 1.895320197044335, "grad_norm": 8.574686397942823, "learning_rate": 3.6000905301187953e-06, "loss": 1.035568118095398, "step": 1539 }, { "epoch": 1.896551724137931, "grad_norm": 8.862677959647126, "learning_rate": 3.5932125324276524e-06, "loss": 1.8441094160079956, "step": 1540 }, { "epoch": 1.8977832512315271, "grad_norm": 21.317551937175974, "learning_rate": 3.586337424398609e-06, "loss": 2.7305843830108643, "step": 1541 }, { "epoch": 1.8990147783251232, "grad_norm": 12.092619936908829, "learning_rate": 3.579465220153733e-06, "loss": 2.1233139038085938, "step": 1542 }, { "epoch": 1.9002463054187193, "grad_norm": 11.705206958955536, "learning_rate": 3.5725959338091133e-06, "loss": 1.232177495956421, "step": 1543 }, { "epoch": 1.9014778325123154, "grad_norm": 7.174113743881224, "learning_rate": 3.565729579474858e-06, "loss": 1.89857017993927, "step": 1544 }, { "epoch": 1.9027093596059115, "grad_norm": 15.788866110425763, "learning_rate": 3.5588661712550464e-06, "loss": 1.1281499862670898, "step": 1545 }, { "epoch": 1.9039408866995073, "grad_norm": 10.470956040036935, "learning_rate": 3.5520057232477073e-06, "loss": 1.2526335716247559, "step": 1546 }, { "epoch": 1.9051724137931034, "grad_norm": 9.301464059536526, "learning_rate": 3.545148249544793e-06, "loss": 1.8187229633331299, "step": 1547 }, { "epoch": 1.9064039408866995, "grad_norm": 9.75451095353705, "learning_rate": 3.5382937642321356e-06, "loss": 2.5140726566314697, "step": 1548 }, { "epoch": 1.9076354679802956, "grad_norm": 12.829934813861579, "learning_rate": 3.5314422813894413e-06, "loss": 1.4403750896453857, "step": 1549 }, { "epoch": 1.9088669950738915, "grad_norm": 16.531679337353626, "learning_rate": 3.524593815090241e-06, "loss": 2.1372480392456055, "step": 1550 }, { "epoch": 1.9100985221674875, "grad_norm": 15.674375359336546, "learning_rate": 3.517748379401872e-06, "loss": 1.3283928632736206, "step": 1551 }, { "epoch": 1.9113300492610836, "grad_norm": 18.1169052598084, "learning_rate": 3.510905988385449e-06, "loss": 0.915777325630188, "step": 1552 }, { "epoch": 1.9125615763546797, "grad_norm": 9.21207861248202, "learning_rate": 3.5040666560958246e-06, "loss": 1.4235864877700806, "step": 1553 }, { "epoch": 1.9137931034482758, "grad_norm": 10.331880853016509, "learning_rate": 3.497230396581579e-06, "loss": 1.0727063417434692, "step": 1554 }, { "epoch": 1.9150246305418719, "grad_norm": 6.2183233261424675, "learning_rate": 3.4903972238849727e-06, "loss": 1.2492493391036987, "step": 1555 }, { "epoch": 1.916256157635468, "grad_norm": 8.689347093090742, "learning_rate": 3.483567152041928e-06, "loss": 1.855743408203125, "step": 1556 }, { "epoch": 1.917487684729064, "grad_norm": 13.400775432098582, "learning_rate": 3.4767401950820003e-06, "loss": 1.2882115840911865, "step": 1557 }, { "epoch": 1.9187192118226601, "grad_norm": 17.24953530796186, "learning_rate": 3.469916367028345e-06, "loss": 1.0586508512496948, "step": 1558 }, { "epoch": 1.9199507389162562, "grad_norm": 7.936641918837841, "learning_rate": 3.4630956818976875e-06, "loss": 1.6678158044815063, "step": 1559 }, { "epoch": 1.9211822660098523, "grad_norm": 7.533268622313887, "learning_rate": 3.4562781537003e-06, "loss": 1.242276906967163, "step": 1560 }, { "epoch": 1.9224137931034484, "grad_norm": 11.64160436044446, "learning_rate": 3.4494637964399723e-06, "loss": 1.1909584999084473, "step": 1561 }, { "epoch": 1.9236453201970445, "grad_norm": 10.255728334199201, "learning_rate": 3.4426526241139778e-06, "loss": 1.7636524438858032, "step": 1562 }, { "epoch": 1.9248768472906403, "grad_norm": 9.49054957516609, "learning_rate": 3.4358446507130503e-06, "loss": 1.709825873374939, "step": 1563 }, { "epoch": 1.9261083743842364, "grad_norm": 10.818350574028944, "learning_rate": 3.4290398902213473e-06, "loss": 1.0826925039291382, "step": 1564 }, { "epoch": 1.9273399014778325, "grad_norm": 8.939498431984473, "learning_rate": 3.4222383566164314e-06, "loss": 1.2868252992630005, "step": 1565 }, { "epoch": 1.9285714285714286, "grad_norm": 8.295112275795647, "learning_rate": 3.4154400638692376e-06, "loss": 1.9238274097442627, "step": 1566 }, { "epoch": 1.9298029556650245, "grad_norm": 15.317456416232107, "learning_rate": 3.408645025944042e-06, "loss": 1.615818977355957, "step": 1567 }, { "epoch": 1.9310344827586206, "grad_norm": 10.763654992556582, "learning_rate": 3.4018532567984326e-06, "loss": 1.124712586402893, "step": 1568 }, { "epoch": 1.9322660098522166, "grad_norm": 12.365184508586257, "learning_rate": 3.3950647703832907e-06, "loss": 1.0411077737808228, "step": 1569 }, { "epoch": 1.9334975369458127, "grad_norm": 12.632249203055522, "learning_rate": 3.3882795806427437e-06, "loss": 1.4247188568115234, "step": 1570 }, { "epoch": 1.9347290640394088, "grad_norm": 9.103913844192295, "learning_rate": 3.3814977015141576e-06, "loss": 1.9558757543563843, "step": 1571 }, { "epoch": 1.935960591133005, "grad_norm": 13.783502778663575, "learning_rate": 3.3747191469280917e-06, "loss": 1.4765770435333252, "step": 1572 }, { "epoch": 1.937192118226601, "grad_norm": 12.11586545643866, "learning_rate": 3.3679439308082777e-06, "loss": 1.2025914192199707, "step": 1573 }, { "epoch": 1.938423645320197, "grad_norm": 8.389746847537833, "learning_rate": 3.361172067071595e-06, "loss": 1.938293695449829, "step": 1574 }, { "epoch": 1.9396551724137931, "grad_norm": 24.18653835255333, "learning_rate": 3.3544035696280264e-06, "loss": 1.9626538753509521, "step": 1575 }, { "epoch": 1.9408866995073892, "grad_norm": 16.707227251461827, "learning_rate": 3.34763845238065e-06, "loss": 2.4771430492401123, "step": 1576 }, { "epoch": 1.9421182266009853, "grad_norm": 9.24643762447737, "learning_rate": 3.340876729225595e-06, "loss": 1.5694981813430786, "step": 1577 }, { "epoch": 1.9433497536945814, "grad_norm": 12.976086056891674, "learning_rate": 3.334118414052021e-06, "loss": 1.3358147144317627, "step": 1578 }, { "epoch": 1.9445812807881775, "grad_norm": 10.05009781073385, "learning_rate": 3.327363520742087e-06, "loss": 1.6929140090942383, "step": 1579 }, { "epoch": 1.9458128078817734, "grad_norm": 14.460477433027636, "learning_rate": 3.320612063170926e-06, "loss": 1.1454588174819946, "step": 1580 }, { "epoch": 1.9470443349753694, "grad_norm": 15.890241219417488, "learning_rate": 3.313864055206607e-06, "loss": 1.3037209510803223, "step": 1581 }, { "epoch": 1.9482758620689655, "grad_norm": 18.657112628058126, "learning_rate": 3.3071195107101163e-06, "loss": 1.2016770839691162, "step": 1582 }, { "epoch": 1.9495073891625616, "grad_norm": 8.600208828774889, "learning_rate": 3.3003784435353304e-06, "loss": 1.5525718927383423, "step": 1583 }, { "epoch": 1.9507389162561575, "grad_norm": 12.025296512404239, "learning_rate": 3.293640867528978e-06, "loss": 1.293796420097351, "step": 1584 }, { "epoch": 1.9519704433497536, "grad_norm": 14.973626912716192, "learning_rate": 3.2869067965306178e-06, "loss": 1.544161081314087, "step": 1585 }, { "epoch": 1.9532019704433496, "grad_norm": 12.518775732631475, "learning_rate": 3.2801762443726087e-06, "loss": 1.584174633026123, "step": 1586 }, { "epoch": 1.9544334975369457, "grad_norm": 9.595940744200961, "learning_rate": 3.273449224880081e-06, "loss": 1.4985432624816895, "step": 1587 }, { "epoch": 1.9556650246305418, "grad_norm": 14.194278219604545, "learning_rate": 3.2667257518709124e-06, "loss": 1.4310071468353271, "step": 1588 }, { "epoch": 1.956896551724138, "grad_norm": 6.232251277924355, "learning_rate": 3.260005839155691e-06, "loss": 1.2174272537231445, "step": 1589 }, { "epoch": 1.958128078817734, "grad_norm": 8.206207570805137, "learning_rate": 3.2532895005376943e-06, "loss": 1.4618067741394043, "step": 1590 }, { "epoch": 1.95935960591133, "grad_norm": 9.028580710101858, "learning_rate": 3.2465767498128596e-06, "loss": 1.2786412239074707, "step": 1591 }, { "epoch": 1.9605911330049262, "grad_norm": 14.53956960212149, "learning_rate": 3.2398676007697495e-06, "loss": 1.152226209640503, "step": 1592 }, { "epoch": 1.9618226600985222, "grad_norm": 9.573027989064228, "learning_rate": 3.233162067189533e-06, "loss": 1.8345131874084473, "step": 1593 }, { "epoch": 1.9630541871921183, "grad_norm": 12.386896406400556, "learning_rate": 3.2264601628459513e-06, "loss": 1.310433030128479, "step": 1594 }, { "epoch": 1.9642857142857144, "grad_norm": 18.010952199354442, "learning_rate": 3.2197619015052893e-06, "loss": 2.3967676162719727, "step": 1595 }, { "epoch": 1.9655172413793105, "grad_norm": 8.956387198130372, "learning_rate": 3.2130672969263543e-06, "loss": 1.7937273979187012, "step": 1596 }, { "epoch": 1.9667487684729064, "grad_norm": 8.393117465017726, "learning_rate": 3.206376362860432e-06, "loss": 2.0265514850616455, "step": 1597 }, { "epoch": 1.9679802955665024, "grad_norm": 21.13089299468655, "learning_rate": 3.1996891130512796e-06, "loss": 1.9514051675796509, "step": 1598 }, { "epoch": 1.9692118226600985, "grad_norm": 13.738115707885685, "learning_rate": 3.1930055612350795e-06, "loss": 1.4068338871002197, "step": 1599 }, { "epoch": 1.9704433497536946, "grad_norm": 11.875525005970715, "learning_rate": 3.18632572114042e-06, "loss": 1.9438577890396118, "step": 1600 }, { "epoch": 1.9716748768472905, "grad_norm": 12.6800038807384, "learning_rate": 3.1796496064882677e-06, "loss": 1.432902455329895, "step": 1601 }, { "epoch": 1.9729064039408866, "grad_norm": 10.748520734517344, "learning_rate": 3.172977230991935e-06, "loss": 1.6505646705627441, "step": 1602 }, { "epoch": 1.9741379310344827, "grad_norm": 9.807738223531803, "learning_rate": 3.1663086083570493e-06, "loss": 2.332062005996704, "step": 1603 }, { "epoch": 1.9753694581280787, "grad_norm": 7.777919459923873, "learning_rate": 3.159643752281536e-06, "loss": 1.737352967262268, "step": 1604 }, { "epoch": 1.9766009852216748, "grad_norm": 12.828820681008972, "learning_rate": 3.152982676455581e-06, "loss": 1.5183820724487305, "step": 1605 }, { "epoch": 1.977832512315271, "grad_norm": 12.058545370748947, "learning_rate": 3.1463253945616056e-06, "loss": 1.5560420751571655, "step": 1606 }, { "epoch": 1.979064039408867, "grad_norm": 12.080370196486308, "learning_rate": 3.1396719202742375e-06, "loss": 2.2159786224365234, "step": 1607 }, { "epoch": 1.980295566502463, "grad_norm": 11.349700550180101, "learning_rate": 3.133022267260283e-06, "loss": 3.4431471824645996, "step": 1608 }, { "epoch": 1.9815270935960592, "grad_norm": 15.960971258656029, "learning_rate": 3.1263764491786984e-06, "loss": 1.0674099922180176, "step": 1609 }, { "epoch": 1.9827586206896552, "grad_norm": 10.915353003367029, "learning_rate": 3.1197344796805675e-06, "loss": 1.2427492141723633, "step": 1610 }, { "epoch": 1.9839901477832513, "grad_norm": 13.554860694250717, "learning_rate": 3.1130963724090626e-06, "loss": 1.5895799398422241, "step": 1611 }, { "epoch": 1.9852216748768474, "grad_norm": 8.558375384118374, "learning_rate": 3.1064621409994245e-06, "loss": 1.3781355619430542, "step": 1612 }, { "epoch": 1.9864532019704435, "grad_norm": 17.36928034840775, "learning_rate": 3.0998317990789378e-06, "loss": 1.3307732343673706, "step": 1613 }, { "epoch": 1.9876847290640394, "grad_norm": 13.9784605520041, "learning_rate": 3.0932053602668876e-06, "loss": 1.340241551399231, "step": 1614 }, { "epoch": 1.9889162561576355, "grad_norm": 9.756766918680166, "learning_rate": 3.0865828381745515e-06, "loss": 1.5866634845733643, "step": 1615 }, { "epoch": 1.9901477832512315, "grad_norm": 14.514845100981475, "learning_rate": 3.0799642464051573e-06, "loss": 1.363608717918396, "step": 1616 }, { "epoch": 1.9913793103448276, "grad_norm": 13.803723137880525, "learning_rate": 3.0733495985538575e-06, "loss": 0.8918144106864929, "step": 1617 }, { "epoch": 1.9926108374384235, "grad_norm": 18.044340986569775, "learning_rate": 3.0667389082077114e-06, "loss": 1.4538538455963135, "step": 1618 }, { "epoch": 1.9938423645320196, "grad_norm": 11.435301654271841, "learning_rate": 3.0601321889456378e-06, "loss": 1.6913137435913086, "step": 1619 }, { "epoch": 1.9950738916256157, "grad_norm": 9.858778951797417, "learning_rate": 3.0535294543384074e-06, "loss": 1.4266109466552734, "step": 1620 }, { "epoch": 1.9963054187192117, "grad_norm": 22.051543439765215, "learning_rate": 3.046930717948604e-06, "loss": 1.2479441165924072, "step": 1621 }, { "epoch": 1.9975369458128078, "grad_norm": 9.286359312990374, "learning_rate": 3.0403359933305965e-06, "loss": 2.138500213623047, "step": 1622 }, { "epoch": 1.998768472906404, "grad_norm": 7.759425069440999, "learning_rate": 3.033745294030517e-06, "loss": 1.7762420177459717, "step": 1623 }, { "epoch": 2.0, "grad_norm": 16.72677410836059, "learning_rate": 3.0271586335862258e-06, "loss": 0.858219563961029, "step": 1624 }, { "epoch": 2.001231527093596, "grad_norm": 14.643925249137768, "learning_rate": 3.0205760255272874e-06, "loss": 0.5493918657302856, "step": 1625 }, { "epoch": 2.002463054187192, "grad_norm": 6.249448248328766, "learning_rate": 3.013997483374944e-06, "loss": 0.25155016779899597, "step": 1626 }, { "epoch": 2.0036945812807883, "grad_norm": 12.443278487913815, "learning_rate": 3.007423020642084e-06, "loss": 0.7727752923965454, "step": 1627 }, { "epoch": 2.0049261083743843, "grad_norm": 8.331944645794822, "learning_rate": 3.0008526508332216e-06, "loss": 0.43595510721206665, "step": 1628 }, { "epoch": 2.0061576354679804, "grad_norm": 12.199248861649188, "learning_rate": 2.9942863874444565e-06, "loss": 0.3856297433376312, "step": 1629 }, { "epoch": 2.0073891625615765, "grad_norm": 10.194964984786639, "learning_rate": 2.987724243963458e-06, "loss": 0.8458558917045593, "step": 1630 }, { "epoch": 2.0086206896551726, "grad_norm": 10.400619109316716, "learning_rate": 2.981166233869429e-06, "loss": 0.46873772144317627, "step": 1631 }, { "epoch": 2.0098522167487687, "grad_norm": 7.542731982064387, "learning_rate": 2.9746123706330886e-06, "loss": 0.42779290676116943, "step": 1632 }, { "epoch": 2.0110837438423643, "grad_norm": 9.375159014521008, "learning_rate": 2.9680626677166324e-06, "loss": 0.627717912197113, "step": 1633 }, { "epoch": 2.0123152709359604, "grad_norm": 7.3118642493157155, "learning_rate": 2.9615171385737107e-06, "loss": 1.0879265069961548, "step": 1634 }, { "epoch": 2.0135467980295565, "grad_norm": 10.467281128404773, "learning_rate": 2.9549757966494053e-06, "loss": 0.6282559037208557, "step": 1635 }, { "epoch": 2.0147783251231526, "grad_norm": 11.126192184454366, "learning_rate": 2.9484386553801875e-06, "loss": 0.5774171352386475, "step": 1636 }, { "epoch": 2.0160098522167487, "grad_norm": 10.360450434232337, "learning_rate": 2.9419057281939106e-06, "loss": 0.38788995146751404, "step": 1637 }, { "epoch": 2.0172413793103448, "grad_norm": 13.340772113855921, "learning_rate": 2.935377028509766e-06, "loss": 1.1726861000061035, "step": 1638 }, { "epoch": 2.018472906403941, "grad_norm": 9.74656398362734, "learning_rate": 2.9288525697382623e-06, "loss": 0.7854858636856079, "step": 1639 }, { "epoch": 2.019704433497537, "grad_norm": 11.086797967435993, "learning_rate": 2.922332365281201e-06, "loss": 0.25507253408432007, "step": 1640 }, { "epoch": 2.020935960591133, "grad_norm": 13.738902835067712, "learning_rate": 2.9158164285316356e-06, "loss": 0.5835862755775452, "step": 1641 }, { "epoch": 2.022167487684729, "grad_norm": 12.908512466729006, "learning_rate": 2.9093047728738604e-06, "loss": 0.49123138189315796, "step": 1642 }, { "epoch": 2.023399014778325, "grad_norm": 6.708189349635942, "learning_rate": 2.9027974116833756e-06, "loss": 0.20273317396640778, "step": 1643 }, { "epoch": 2.0246305418719213, "grad_norm": 12.517783768989945, "learning_rate": 2.896294358326862e-06, "loss": 0.46980565786361694, "step": 1644 }, { "epoch": 2.0258620689655173, "grad_norm": 12.98671748044912, "learning_rate": 2.889795626162143e-06, "loss": 0.23243547976016998, "step": 1645 }, { "epoch": 2.0270935960591134, "grad_norm": 21.52509717224934, "learning_rate": 2.883301228538178e-06, "loss": 1.3259830474853516, "step": 1646 }, { "epoch": 2.0283251231527095, "grad_norm": 10.539113199927511, "learning_rate": 2.8768111787950105e-06, "loss": 0.3021068274974823, "step": 1647 }, { "epoch": 2.0295566502463056, "grad_norm": 9.17401806944997, "learning_rate": 2.8703254902637646e-06, "loss": 0.3854427933692932, "step": 1648 }, { "epoch": 2.0307881773399017, "grad_norm": 14.201306893364228, "learning_rate": 2.8638441762665957e-06, "loss": 0.3356427848339081, "step": 1649 }, { "epoch": 2.0320197044334973, "grad_norm": 17.83956908779597, "learning_rate": 2.857367250116682e-06, "loss": 0.4785861372947693, "step": 1650 }, { "epoch": 2.0332512315270934, "grad_norm": 7.19305688493566, "learning_rate": 2.8508947251181885e-06, "loss": 0.1944020539522171, "step": 1651 }, { "epoch": 2.0344827586206895, "grad_norm": 10.046970652926046, "learning_rate": 2.8444266145662284e-06, "loss": 0.29677248001098633, "step": 1652 }, { "epoch": 2.0357142857142856, "grad_norm": 24.647186410998657, "learning_rate": 2.8379629317468604e-06, "loss": 1.517862319946289, "step": 1653 }, { "epoch": 2.0369458128078817, "grad_norm": 13.23680169167266, "learning_rate": 2.8315036899370442e-06, "loss": 0.5191118717193604, "step": 1654 }, { "epoch": 2.0381773399014778, "grad_norm": 13.059908687808356, "learning_rate": 2.825048902404612e-06, "loss": 0.42354950308799744, "step": 1655 }, { "epoch": 2.039408866995074, "grad_norm": 12.282344754345834, "learning_rate": 2.818598582408255e-06, "loss": 0.6974557638168335, "step": 1656 }, { "epoch": 2.04064039408867, "grad_norm": 11.678426390945974, "learning_rate": 2.8121527431974838e-06, "loss": 0.8337801694869995, "step": 1657 }, { "epoch": 2.041871921182266, "grad_norm": 11.653625925472546, "learning_rate": 2.805711398012604e-06, "loss": 0.48300114274024963, "step": 1658 }, { "epoch": 2.043103448275862, "grad_norm": 8.699921165351283, "learning_rate": 2.799274560084688e-06, "loss": 0.2231900542974472, "step": 1659 }, { "epoch": 2.044334975369458, "grad_norm": 11.080926890704283, "learning_rate": 2.7928422426355554e-06, "loss": 0.7431713342666626, "step": 1660 }, { "epoch": 2.0455665024630543, "grad_norm": 10.18242138749306, "learning_rate": 2.7864144588777403e-06, "loss": 0.5905585289001465, "step": 1661 }, { "epoch": 2.0467980295566504, "grad_norm": 12.79007023215843, "learning_rate": 2.779991222014459e-06, "loss": 0.5379045009613037, "step": 1662 }, { "epoch": 2.0480295566502464, "grad_norm": 10.204627357114346, "learning_rate": 2.77357254523959e-06, "loss": 0.4073173403739929, "step": 1663 }, { "epoch": 2.0492610837438425, "grad_norm": 16.54029756463169, "learning_rate": 2.767158441737646e-06, "loss": 0.37792834639549255, "step": 1664 }, { "epoch": 2.0504926108374386, "grad_norm": 12.199606214048373, "learning_rate": 2.7607489246837505e-06, "loss": 0.5250200629234314, "step": 1665 }, { "epoch": 2.0517241379310347, "grad_norm": 15.23569807667072, "learning_rate": 2.754344007243594e-06, "loss": 0.7716425061225891, "step": 1666 }, { "epoch": 2.0529556650246303, "grad_norm": 7.925817755895629, "learning_rate": 2.74794370257343e-06, "loss": 0.6505113244056702, "step": 1667 }, { "epoch": 2.0541871921182264, "grad_norm": 13.232372975936459, "learning_rate": 2.741548023820037e-06, "loss": 1.237591028213501, "step": 1668 }, { "epoch": 2.0554187192118225, "grad_norm": 7.821194651549222, "learning_rate": 2.7351569841206792e-06, "loss": 0.33151859045028687, "step": 1669 }, { "epoch": 2.0566502463054186, "grad_norm": 9.91473906287112, "learning_rate": 2.728770596603105e-06, "loss": 0.42522889375686646, "step": 1670 }, { "epoch": 2.0578817733990147, "grad_norm": 10.678926533172987, "learning_rate": 2.722388874385503e-06, "loss": 0.3359280824661255, "step": 1671 }, { "epoch": 2.0591133004926108, "grad_norm": 9.193563725792906, "learning_rate": 2.716011830576475e-06, "loss": 0.23182198405265808, "step": 1672 }, { "epoch": 2.060344827586207, "grad_norm": 13.12855060675622, "learning_rate": 2.7096394782750186e-06, "loss": 0.30262982845306396, "step": 1673 }, { "epoch": 2.061576354679803, "grad_norm": 7.791350721856929, "learning_rate": 2.7032718305704887e-06, "loss": 0.23311859369277954, "step": 1674 }, { "epoch": 2.062807881773399, "grad_norm": 12.221292312776084, "learning_rate": 2.696908900542584e-06, "loss": 0.6328019499778748, "step": 1675 }, { "epoch": 2.064039408866995, "grad_norm": 10.8289045782447, "learning_rate": 2.690550701261304e-06, "loss": 0.30473750829696655, "step": 1676 }, { "epoch": 2.065270935960591, "grad_norm": 8.921318423622994, "learning_rate": 2.684197245786938e-06, "loss": 0.2824372947216034, "step": 1677 }, { "epoch": 2.0665024630541873, "grad_norm": 15.101179094698006, "learning_rate": 2.677848547170029e-06, "loss": 0.3543265163898468, "step": 1678 }, { "epoch": 2.0677339901477834, "grad_norm": 8.79612621311314, "learning_rate": 2.671504618451348e-06, "loss": 0.6176484823226929, "step": 1679 }, { "epoch": 2.0689655172413794, "grad_norm": 10.985306627235934, "learning_rate": 2.665165472661866e-06, "loss": 0.5290611386299133, "step": 1680 }, { "epoch": 2.0701970443349755, "grad_norm": 8.398062035832517, "learning_rate": 2.658831122822735e-06, "loss": 0.5321454405784607, "step": 1681 }, { "epoch": 2.0714285714285716, "grad_norm": 11.540193919775621, "learning_rate": 2.6525015819452504e-06, "loss": 0.27902156114578247, "step": 1682 }, { "epoch": 2.0726600985221673, "grad_norm": 12.60801369495054, "learning_rate": 2.6461768630308326e-06, "loss": 0.46582847833633423, "step": 1683 }, { "epoch": 2.0738916256157633, "grad_norm": 15.322116984466021, "learning_rate": 2.6398569790710007e-06, "loss": 0.651951014995575, "step": 1684 }, { "epoch": 2.0751231527093594, "grad_norm": 9.74038331873093, "learning_rate": 2.633541943047334e-06, "loss": 0.36612239480018616, "step": 1685 }, { "epoch": 2.0763546798029555, "grad_norm": 7.730903286765135, "learning_rate": 2.6272317679314573e-06, "loss": 0.22278031706809998, "step": 1686 }, { "epoch": 2.0775862068965516, "grad_norm": 7.781634586207103, "learning_rate": 2.620926466685013e-06, "loss": 0.33012956380844116, "step": 1687 }, { "epoch": 2.0788177339901477, "grad_norm": 9.397683957095191, "learning_rate": 2.6146260522596334e-06, "loss": 0.7396690845489502, "step": 1688 }, { "epoch": 2.0800492610837438, "grad_norm": 11.988801603692485, "learning_rate": 2.608330537596907e-06, "loss": 0.8257578611373901, "step": 1689 }, { "epoch": 2.08128078817734, "grad_norm": 8.855369489146483, "learning_rate": 2.6020399356283586e-06, "loss": 0.4538348317146301, "step": 1690 }, { "epoch": 2.082512315270936, "grad_norm": 9.991399228257757, "learning_rate": 2.595754259275428e-06, "loss": 0.992777943611145, "step": 1691 }, { "epoch": 2.083743842364532, "grad_norm": 11.406818947912145, "learning_rate": 2.589473521449434e-06, "loss": 0.346379816532135, "step": 1692 }, { "epoch": 2.084975369458128, "grad_norm": 18.61665504561422, "learning_rate": 2.583197735051546e-06, "loss": 0.4523533284664154, "step": 1693 }, { "epoch": 2.086206896551724, "grad_norm": 9.296672908995824, "learning_rate": 2.576926912972771e-06, "loss": 0.11842907965183258, "step": 1694 }, { "epoch": 2.0874384236453203, "grad_norm": 8.459525770988064, "learning_rate": 2.5706610680939186e-06, "loss": 0.381897896528244, "step": 1695 }, { "epoch": 2.0886699507389164, "grad_norm": 11.109371262298351, "learning_rate": 2.564400213285564e-06, "loss": 0.3824227452278137, "step": 1696 }, { "epoch": 2.0899014778325125, "grad_norm": 7.622915250326246, "learning_rate": 2.5581443614080433e-06, "loss": 0.4153192639350891, "step": 1697 }, { "epoch": 2.0911330049261085, "grad_norm": 12.840140963343943, "learning_rate": 2.5518935253114153e-06, "loss": 0.3284783959388733, "step": 1698 }, { "epoch": 2.0923645320197046, "grad_norm": 9.586633818986163, "learning_rate": 2.545647717835428e-06, "loss": 0.7730638980865479, "step": 1699 }, { "epoch": 2.0935960591133007, "grad_norm": 9.329889124511917, "learning_rate": 2.539406951809512e-06, "loss": 0.31647253036499023, "step": 1700 }, { "epoch": 2.0948275862068964, "grad_norm": 12.004447197114908, "learning_rate": 2.53317124005273e-06, "loss": 0.5977708101272583, "step": 1701 }, { "epoch": 2.0960591133004924, "grad_norm": 8.69992433934411, "learning_rate": 2.5269405953737735e-06, "loss": 0.2646758556365967, "step": 1702 }, { "epoch": 2.0972906403940885, "grad_norm": 8.02489022856674, "learning_rate": 2.5207150305709167e-06, "loss": 0.5242122411727905, "step": 1703 }, { "epoch": 2.0985221674876846, "grad_norm": 13.343080912035035, "learning_rate": 2.5144945584320056e-06, "loss": 0.43271976709365845, "step": 1704 }, { "epoch": 2.0997536945812807, "grad_norm": 16.386560709178422, "learning_rate": 2.5082791917344256e-06, "loss": 0.902009904384613, "step": 1705 }, { "epoch": 2.100985221674877, "grad_norm": 8.363747351262921, "learning_rate": 2.5020689432450706e-06, "loss": 0.5218071937561035, "step": 1706 }, { "epoch": 2.102216748768473, "grad_norm": 13.441523308623053, "learning_rate": 2.495863825720322e-06, "loss": 0.7475143671035767, "step": 1707 }, { "epoch": 2.103448275862069, "grad_norm": 9.20779623087441, "learning_rate": 2.4896638519060257e-06, "loss": 0.31655290722846985, "step": 1708 }, { "epoch": 2.104679802955665, "grad_norm": 12.453919142267711, "learning_rate": 2.4834690345374608e-06, "loss": 0.30808842182159424, "step": 1709 }, { "epoch": 2.105911330049261, "grad_norm": 12.241452294332287, "learning_rate": 2.477279386339309e-06, "loss": 0.7037611603736877, "step": 1710 }, { "epoch": 2.107142857142857, "grad_norm": 14.091630182879387, "learning_rate": 2.471094920025644e-06, "loss": 0.4699273407459259, "step": 1711 }, { "epoch": 2.1083743842364533, "grad_norm": 13.920276564221119, "learning_rate": 2.4649156482998873e-06, "loss": 0.5032830238342285, "step": 1712 }, { "epoch": 2.1096059113300494, "grad_norm": 12.895772980307312, "learning_rate": 2.45874158385479e-06, "loss": 1.2563080787658691, "step": 1713 }, { "epoch": 2.1108374384236455, "grad_norm": 7.446774906593091, "learning_rate": 2.4525727393724136e-06, "loss": 0.29728978872299194, "step": 1714 }, { "epoch": 2.1120689655172415, "grad_norm": 9.446867560016528, "learning_rate": 2.446409127524094e-06, "loss": 0.2391032576560974, "step": 1715 }, { "epoch": 2.1133004926108376, "grad_norm": 13.287475847065688, "learning_rate": 2.4402507609704163e-06, "loss": 0.4612117409706116, "step": 1716 }, { "epoch": 2.1145320197044333, "grad_norm": 9.000836025460185, "learning_rate": 2.4340976523611957e-06, "loss": 0.36539849638938904, "step": 1717 }, { "epoch": 2.1157635467980294, "grad_norm": 6.954876550316873, "learning_rate": 2.427949814335443e-06, "loss": 0.2918080687522888, "step": 1718 }, { "epoch": 2.1169950738916254, "grad_norm": 12.290862216055704, "learning_rate": 2.4218072595213467e-06, "loss": 0.4508627653121948, "step": 1719 }, { "epoch": 2.1182266009852215, "grad_norm": 10.395578945684981, "learning_rate": 2.4156700005362384e-06, "loss": 0.43477705121040344, "step": 1720 }, { "epoch": 2.1194581280788176, "grad_norm": 13.97203519429258, "learning_rate": 2.409538049986576e-06, "loss": 0.36739200353622437, "step": 1721 }, { "epoch": 2.1206896551724137, "grad_norm": 10.000232328294244, "learning_rate": 2.403411420467916e-06, "loss": 0.722801923751831, "step": 1722 }, { "epoch": 2.12192118226601, "grad_norm": 8.047857628714285, "learning_rate": 2.3972901245648724e-06, "loss": 0.3729158043861389, "step": 1723 }, { "epoch": 2.123152709359606, "grad_norm": 9.083191980371518, "learning_rate": 2.3911741748511163e-06, "loss": 0.741644024848938, "step": 1724 }, { "epoch": 2.124384236453202, "grad_norm": 11.04614906019948, "learning_rate": 2.385063583889335e-06, "loss": 0.21925917267799377, "step": 1725 }, { "epoch": 2.125615763546798, "grad_norm": 8.204563983460345, "learning_rate": 2.378958364231202e-06, "loss": 0.3161308765411377, "step": 1726 }, { "epoch": 2.126847290640394, "grad_norm": 9.198617981495676, "learning_rate": 2.3728585284173646e-06, "loss": 0.2520957887172699, "step": 1727 }, { "epoch": 2.12807881773399, "grad_norm": 17.99753939345998, "learning_rate": 2.3667640889774096e-06, "loss": 0.5538915991783142, "step": 1728 }, { "epoch": 2.1293103448275863, "grad_norm": 15.205601395041407, "learning_rate": 2.3606750584298375e-06, "loss": 0.5438660979270935, "step": 1729 }, { "epoch": 2.1305418719211824, "grad_norm": 11.445216371439214, "learning_rate": 2.3545914492820366e-06, "loss": 0.39724698662757874, "step": 1730 }, { "epoch": 2.1317733990147785, "grad_norm": 13.240651517787109, "learning_rate": 2.348513274030264e-06, "loss": 0.3480866551399231, "step": 1731 }, { "epoch": 2.1330049261083746, "grad_norm": 8.909285636059167, "learning_rate": 2.3424405451596143e-06, "loss": 0.9076392650604248, "step": 1732 }, { "epoch": 2.1342364532019706, "grad_norm": 10.08773566622176, "learning_rate": 2.3363732751439926e-06, "loss": 0.19863876700401306, "step": 1733 }, { "epoch": 2.1354679802955667, "grad_norm": 18.974399402946254, "learning_rate": 2.3303114764460887e-06, "loss": 0.5347404479980469, "step": 1734 }, { "epoch": 2.1366995073891624, "grad_norm": 13.439122993751143, "learning_rate": 2.32425516151736e-06, "loss": 0.4876821041107178, "step": 1735 }, { "epoch": 2.1379310344827585, "grad_norm": 11.45775521594229, "learning_rate": 2.3182043427979973e-06, "loss": 0.24914954602718353, "step": 1736 }, { "epoch": 2.1391625615763545, "grad_norm": 8.201340069963411, "learning_rate": 2.3121590327168987e-06, "loss": 0.5773565769195557, "step": 1737 }, { "epoch": 2.1403940886699506, "grad_norm": 11.57987957433396, "learning_rate": 2.30611924369165e-06, "loss": 0.7779598832130432, "step": 1738 }, { "epoch": 2.1416256157635467, "grad_norm": 10.793230544693655, "learning_rate": 2.3000849881285016e-06, "loss": 0.27866464853286743, "step": 1739 }, { "epoch": 2.142857142857143, "grad_norm": 10.857850500188468, "learning_rate": 2.2940562784223224e-06, "loss": 0.5243108868598938, "step": 1740 }, { "epoch": 2.144088669950739, "grad_norm": 11.19069440448601, "learning_rate": 2.2880331269566043e-06, "loss": 0.6560786366462708, "step": 1741 }, { "epoch": 2.145320197044335, "grad_norm": 13.01584696243558, "learning_rate": 2.282015546103418e-06, "loss": 0.6339880228042603, "step": 1742 }, { "epoch": 2.146551724137931, "grad_norm": 9.571310950804556, "learning_rate": 2.2760035482233868e-06, "loss": 0.2517808973789215, "step": 1743 }, { "epoch": 2.147783251231527, "grad_norm": 20.291798315352697, "learning_rate": 2.269997145665674e-06, "loss": 0.40347909927368164, "step": 1744 }, { "epoch": 2.149014778325123, "grad_norm": 9.550073631094609, "learning_rate": 2.263996350767942e-06, "loss": 0.4681488573551178, "step": 1745 }, { "epoch": 2.1502463054187193, "grad_norm": 9.340283980757114, "learning_rate": 2.2580011758563418e-06, "loss": 0.6371068954467773, "step": 1746 }, { "epoch": 2.1514778325123154, "grad_norm": 21.612590436052542, "learning_rate": 2.2520116332454726e-06, "loss": 0.4741581678390503, "step": 1747 }, { "epoch": 2.1527093596059115, "grad_norm": 8.523455664504207, "learning_rate": 2.2460277352383713e-06, "loss": 0.3354438543319702, "step": 1748 }, { "epoch": 2.1539408866995076, "grad_norm": 14.050991791769299, "learning_rate": 2.240049494126479e-06, "loss": 0.593233585357666, "step": 1749 }, { "epoch": 2.1551724137931036, "grad_norm": 11.626128632656414, "learning_rate": 2.234076922189613e-06, "loss": 0.32123100757598877, "step": 1750 }, { "epoch": 2.1564039408866993, "grad_norm": 17.381626157091297, "learning_rate": 2.2281100316959476e-06, "loss": 1.0594584941864014, "step": 1751 }, { "epoch": 2.1576354679802954, "grad_norm": 9.794184199968742, "learning_rate": 2.2221488349019903e-06, "loss": 0.8586208820343018, "step": 1752 }, { "epoch": 2.1588669950738915, "grad_norm": 10.979739823361593, "learning_rate": 2.2161933440525474e-06, "loss": 0.38074642419815063, "step": 1753 }, { "epoch": 2.1600985221674875, "grad_norm": 10.732650739543086, "learning_rate": 2.21024357138071e-06, "loss": 0.28768736124038696, "step": 1754 }, { "epoch": 2.1613300492610836, "grad_norm": 10.263056998284627, "learning_rate": 2.2042995291078227e-06, "loss": 1.1843211650848389, "step": 1755 }, { "epoch": 2.1625615763546797, "grad_norm": 13.635797719225163, "learning_rate": 2.1983612294434563e-06, "loss": 0.7616925835609436, "step": 1756 }, { "epoch": 2.163793103448276, "grad_norm": 9.78260695772624, "learning_rate": 2.192428684585386e-06, "loss": 0.4518227279186249, "step": 1757 }, { "epoch": 2.165024630541872, "grad_norm": 14.669561384919394, "learning_rate": 2.1865019067195685e-06, "loss": 0.9173997640609741, "step": 1758 }, { "epoch": 2.166256157635468, "grad_norm": 9.861706475635476, "learning_rate": 2.180580908020117e-06, "loss": 0.4044645428657532, "step": 1759 }, { "epoch": 2.167487684729064, "grad_norm": 11.783858103052328, "learning_rate": 2.174665700649267e-06, "loss": 0.7771418690681458, "step": 1760 }, { "epoch": 2.16871921182266, "grad_norm": 12.555695641041428, "learning_rate": 2.1687562967573645e-06, "loss": 0.39461982250213623, "step": 1761 }, { "epoch": 2.1699507389162562, "grad_norm": 8.510682084443147, "learning_rate": 2.1628527084828283e-06, "loss": 0.2924491763114929, "step": 1762 }, { "epoch": 2.1711822660098523, "grad_norm": 7.789254339344862, "learning_rate": 2.156954947952139e-06, "loss": 0.2507514953613281, "step": 1763 }, { "epoch": 2.1724137931034484, "grad_norm": 9.474786369957261, "learning_rate": 2.151063027279798e-06, "loss": 0.44257861375808716, "step": 1764 }, { "epoch": 2.1736453201970445, "grad_norm": 9.165088005805186, "learning_rate": 2.1451769585683196e-06, "loss": 0.2863251268863678, "step": 1765 }, { "epoch": 2.1748768472906406, "grad_norm": 14.506373027900759, "learning_rate": 2.139296753908195e-06, "loss": 0.6882431507110596, "step": 1766 }, { "epoch": 2.1761083743842367, "grad_norm": 10.237681928740948, "learning_rate": 2.1334224253778628e-06, "loss": 0.8318816423416138, "step": 1767 }, { "epoch": 2.1773399014778327, "grad_norm": 8.92298078848023, "learning_rate": 2.1275539850437006e-06, "loss": 0.3899531364440918, "step": 1768 }, { "epoch": 2.1785714285714284, "grad_norm": 10.24700092560103, "learning_rate": 2.1216914449599905e-06, "loss": 0.6424532532691956, "step": 1769 }, { "epoch": 2.1798029556650245, "grad_norm": 10.006066437806421, "learning_rate": 2.1158348171688888e-06, "loss": 0.6676028370857239, "step": 1770 }, { "epoch": 2.1810344827586206, "grad_norm": 11.577953051638056, "learning_rate": 2.109984113700413e-06, "loss": 0.4219639301300049, "step": 1771 }, { "epoch": 2.1822660098522166, "grad_norm": 6.842671899586793, "learning_rate": 2.1041393465724114e-06, "loss": 0.32283568382263184, "step": 1772 }, { "epoch": 2.1834975369458127, "grad_norm": 9.373944237506624, "learning_rate": 2.0983005277905348e-06, "loss": 0.26172614097595215, "step": 1773 }, { "epoch": 2.184729064039409, "grad_norm": 8.04859888971959, "learning_rate": 2.092467669348217e-06, "loss": 0.585732638835907, "step": 1774 }, { "epoch": 2.185960591133005, "grad_norm": 17.13691371915511, "learning_rate": 2.0866407832266506e-06, "loss": 0.42734187841415405, "step": 1775 }, { "epoch": 2.187192118226601, "grad_norm": 9.353812644763135, "learning_rate": 2.0808198813947606e-06, "loss": 0.24151989817619324, "step": 1776 }, { "epoch": 2.188423645320197, "grad_norm": 6.491521280477716, "learning_rate": 2.0750049758091778e-06, "loss": 0.12940426170825958, "step": 1777 }, { "epoch": 2.189655172413793, "grad_norm": 12.137046868295176, "learning_rate": 2.0691960784142143e-06, "loss": 0.7501548528671265, "step": 1778 }, { "epoch": 2.1908866995073892, "grad_norm": 8.28614035816523, "learning_rate": 2.063393201141846e-06, "loss": 0.43730083107948303, "step": 1779 }, { "epoch": 2.1921182266009853, "grad_norm": 7.426728577487124, "learning_rate": 2.0575963559116823e-06, "loss": 0.3335978388786316, "step": 1780 }, { "epoch": 2.1933497536945814, "grad_norm": 7.727814229698406, "learning_rate": 2.0518055546309362e-06, "loss": 0.3262137174606323, "step": 1781 }, { "epoch": 2.1945812807881775, "grad_norm": 12.218163734992793, "learning_rate": 2.0460208091944122e-06, "loss": 0.3336663544178009, "step": 1782 }, { "epoch": 2.1958128078817736, "grad_norm": 12.61978263562606, "learning_rate": 2.0402421314844774e-06, "loss": 0.6050255298614502, "step": 1783 }, { "epoch": 2.1970443349753697, "grad_norm": 10.058297792191603, "learning_rate": 2.0344695333710234e-06, "loss": 0.33584898710250854, "step": 1784 }, { "epoch": 2.1982758620689653, "grad_norm": 7.629807101727278, "learning_rate": 2.0287030267114665e-06, "loss": 0.4711458683013916, "step": 1785 }, { "epoch": 2.1995073891625614, "grad_norm": 7.348268103503395, "learning_rate": 2.0229426233507067e-06, "loss": 0.6127311587333679, "step": 1786 }, { "epoch": 2.2007389162561575, "grad_norm": 8.230284472347915, "learning_rate": 2.0171883351211038e-06, "loss": 0.7195362448692322, "step": 1787 }, { "epoch": 2.2019704433497536, "grad_norm": 20.032548588100823, "learning_rate": 2.0114401738424618e-06, "loss": 1.412251591682434, "step": 1788 }, { "epoch": 2.2032019704433496, "grad_norm": 11.361862300830705, "learning_rate": 2.0056981513219944e-06, "loss": 0.48954465985298157, "step": 1789 }, { "epoch": 2.2044334975369457, "grad_norm": 10.14335903404985, "learning_rate": 1.999962279354311e-06, "loss": 0.32414451241493225, "step": 1790 }, { "epoch": 2.205665024630542, "grad_norm": 11.365030809564745, "learning_rate": 1.9942325697213817e-06, "loss": 0.4072822034358978, "step": 1791 }, { "epoch": 2.206896551724138, "grad_norm": 9.518825727757552, "learning_rate": 1.988509034192522e-06, "loss": 0.25958192348480225, "step": 1792 }, { "epoch": 2.208128078817734, "grad_norm": 7.689606665993246, "learning_rate": 1.9827916845243687e-06, "loss": 0.2943662405014038, "step": 1793 }, { "epoch": 2.20935960591133, "grad_norm": 11.749853788306439, "learning_rate": 1.9770805324608446e-06, "loss": 0.6713488698005676, "step": 1794 }, { "epoch": 2.210591133004926, "grad_norm": 8.987827629233262, "learning_rate": 1.971375589733145e-06, "loss": 0.5103387236595154, "step": 1795 }, { "epoch": 2.2118226600985222, "grad_norm": 14.84712925009146, "learning_rate": 1.965676868059714e-06, "loss": 0.4981153905391693, "step": 1796 }, { "epoch": 2.2130541871921183, "grad_norm": 9.829434549611708, "learning_rate": 1.9599843791462123e-06, "loss": 0.2828434407711029, "step": 1797 }, { "epoch": 2.2142857142857144, "grad_norm": 11.531079285990483, "learning_rate": 1.9542981346855015e-06, "loss": 0.36899659037590027, "step": 1798 }, { "epoch": 2.2155172413793105, "grad_norm": 10.264635301771921, "learning_rate": 1.9486181463576176e-06, "loss": 0.46039581298828125, "step": 1799 }, { "epoch": 2.2167487684729066, "grad_norm": 7.994315710714336, "learning_rate": 1.942944425829741e-06, "loss": 0.611553966999054, "step": 1800 }, { "epoch": 2.2179802955665027, "grad_norm": 10.64295367375575, "learning_rate": 1.937276984756179e-06, "loss": 0.23928876221179962, "step": 1801 }, { "epoch": 2.2192118226600988, "grad_norm": 11.919180580141987, "learning_rate": 1.9316158347783436e-06, "loss": 0.3270934820175171, "step": 1802 }, { "epoch": 2.2204433497536944, "grad_norm": 9.438403907761801, "learning_rate": 1.925960987524724e-06, "loss": 0.30926424264907837, "step": 1803 }, { "epoch": 2.2216748768472905, "grad_norm": 11.903671185207038, "learning_rate": 1.9203124546108583e-06, "loss": 0.6049486994743347, "step": 1804 }, { "epoch": 2.2229064039408866, "grad_norm": 14.861992075187999, "learning_rate": 1.91467024763932e-06, "loss": 0.7592355012893677, "step": 1805 }, { "epoch": 2.2241379310344827, "grad_norm": 11.790018718519686, "learning_rate": 1.9090343781996828e-06, "loss": 0.26057887077331543, "step": 1806 }, { "epoch": 2.2253694581280787, "grad_norm": 17.03673279052151, "learning_rate": 1.9034048578685099e-06, "loss": 0.4014609754085541, "step": 1807 }, { "epoch": 2.226600985221675, "grad_norm": 10.412774433531801, "learning_rate": 1.897781698209315e-06, "loss": 0.26397138833999634, "step": 1808 }, { "epoch": 2.227832512315271, "grad_norm": 11.809020308728643, "learning_rate": 1.8921649107725525e-06, "loss": 0.8727256059646606, "step": 1809 }, { "epoch": 2.229064039408867, "grad_norm": 8.838116472787092, "learning_rate": 1.8865545070955882e-06, "loss": 0.45729875564575195, "step": 1810 }, { "epoch": 2.230295566502463, "grad_norm": 13.341384604613445, "learning_rate": 1.880950498702666e-06, "loss": 0.3261849880218506, "step": 1811 }, { "epoch": 2.231527093596059, "grad_norm": 16.210141929264246, "learning_rate": 1.875352897104903e-06, "loss": 0.682532787322998, "step": 1812 }, { "epoch": 2.2327586206896552, "grad_norm": 16.44333196476405, "learning_rate": 1.8697617138002545e-06, "loss": 0.4255359470844269, "step": 1813 }, { "epoch": 2.2339901477832513, "grad_norm": 8.460123548003127, "learning_rate": 1.8641769602734872e-06, "loss": 0.3307432234287262, "step": 1814 }, { "epoch": 2.2352216748768474, "grad_norm": 9.96917434972206, "learning_rate": 1.8585986479961653e-06, "loss": 0.26837313175201416, "step": 1815 }, { "epoch": 2.2364532019704435, "grad_norm": 12.410587151566334, "learning_rate": 1.8530267884266228e-06, "loss": 0.5036531686782837, "step": 1816 }, { "epoch": 2.2376847290640396, "grad_norm": 13.229449859916322, "learning_rate": 1.8474613930099356e-06, "loss": 0.4444383680820465, "step": 1817 }, { "epoch": 2.2389162561576357, "grad_norm": 10.366174513602477, "learning_rate": 1.8419024731779e-06, "loss": 0.24592629075050354, "step": 1818 }, { "epoch": 2.2401477832512313, "grad_norm": 21.212742320307363, "learning_rate": 1.8363500403490175e-06, "loss": 0.9310093522071838, "step": 1819 }, { "epoch": 2.2413793103448274, "grad_norm": 10.041916938686702, "learning_rate": 1.8308041059284621e-06, "loss": 0.3252318799495697, "step": 1820 }, { "epoch": 2.2426108374384235, "grad_norm": 10.169102582875109, "learning_rate": 1.8252646813080566e-06, "loss": 0.44218361377716064, "step": 1821 }, { "epoch": 2.2438423645320196, "grad_norm": 13.658159402672133, "learning_rate": 1.8197317778662533e-06, "loss": 0.631632924079895, "step": 1822 }, { "epoch": 2.2450738916256157, "grad_norm": 11.284192076783485, "learning_rate": 1.814205406968112e-06, "loss": 0.2570488154888153, "step": 1823 }, { "epoch": 2.2463054187192117, "grad_norm": 10.661610786830831, "learning_rate": 1.8086855799652737e-06, "loss": 0.6113500595092773, "step": 1824 }, { "epoch": 2.247536945812808, "grad_norm": 9.883591422459872, "learning_rate": 1.8031723081959334e-06, "loss": 0.5997953414916992, "step": 1825 }, { "epoch": 2.248768472906404, "grad_norm": 12.888281661513009, "learning_rate": 1.7976656029848271e-06, "loss": 0.501262903213501, "step": 1826 }, { "epoch": 2.25, "grad_norm": 9.87397702836225, "learning_rate": 1.792165475643199e-06, "loss": 0.9116629362106323, "step": 1827 }, { "epoch": 2.251231527093596, "grad_norm": 8.421237466791723, "learning_rate": 1.786671937468779e-06, "loss": 0.3302918076515198, "step": 1828 }, { "epoch": 2.252463054187192, "grad_norm": 9.25026361639238, "learning_rate": 1.7811849997457681e-06, "loss": 0.26528751850128174, "step": 1829 }, { "epoch": 2.2536945812807883, "grad_norm": 11.490820404812338, "learning_rate": 1.775704673744809e-06, "loss": 0.25929901003837585, "step": 1830 }, { "epoch": 2.2549261083743843, "grad_norm": 13.127115940994786, "learning_rate": 1.7702309707229576e-06, "loss": 0.4980836808681488, "step": 1831 }, { "epoch": 2.2561576354679804, "grad_norm": 16.054819413361866, "learning_rate": 1.764763901923673e-06, "loss": 0.5196325182914734, "step": 1832 }, { "epoch": 2.2573891625615765, "grad_norm": 8.101995143129717, "learning_rate": 1.7593034785767788e-06, "loss": 0.20513209700584412, "step": 1833 }, { "epoch": 2.2586206896551726, "grad_norm": 11.005823004560217, "learning_rate": 1.753849711898457e-06, "loss": 0.3052961826324463, "step": 1834 }, { "epoch": 2.2598522167487687, "grad_norm": 14.916636143940408, "learning_rate": 1.7484026130912097e-06, "loss": 0.32289302349090576, "step": 1835 }, { "epoch": 2.2610837438423648, "grad_norm": 10.783629716557854, "learning_rate": 1.742962193343845e-06, "loss": 0.5892568826675415, "step": 1836 }, { "epoch": 2.2623152709359604, "grad_norm": 8.680159409558001, "learning_rate": 1.737528463831456e-06, "loss": 0.24824300408363342, "step": 1837 }, { "epoch": 2.2635467980295565, "grad_norm": 28.059213249121456, "learning_rate": 1.7321014357153815e-06, "loss": 0.23833397030830383, "step": 1838 }, { "epoch": 2.2647783251231526, "grad_norm": 10.866697094389515, "learning_rate": 1.726681120143207e-06, "loss": 0.4855925738811493, "step": 1839 }, { "epoch": 2.2660098522167487, "grad_norm": 11.048047137574908, "learning_rate": 1.7212675282487269e-06, "loss": 0.44992727041244507, "step": 1840 }, { "epoch": 2.2672413793103448, "grad_norm": 19.236329816785574, "learning_rate": 1.7158606711519193e-06, "loss": 0.41251128911972046, "step": 1841 }, { "epoch": 2.268472906403941, "grad_norm": 8.021805078822515, "learning_rate": 1.7104605599589353e-06, "loss": 0.4418972134590149, "step": 1842 }, { "epoch": 2.269704433497537, "grad_norm": 14.577958176696848, "learning_rate": 1.7050672057620666e-06, "loss": 0.4425298571586609, "step": 1843 }, { "epoch": 2.270935960591133, "grad_norm": 13.33684949043127, "learning_rate": 1.6996806196397243e-06, "loss": 0.3141231834888458, "step": 1844 }, { "epoch": 2.272167487684729, "grad_norm": 14.191190475097011, "learning_rate": 1.6943008126564164e-06, "loss": 0.2843426764011383, "step": 1845 }, { "epoch": 2.273399014778325, "grad_norm": 8.774563230877245, "learning_rate": 1.6889277958627293e-06, "loss": 0.36104702949523926, "step": 1846 }, { "epoch": 2.2746305418719213, "grad_norm": 8.915062589804638, "learning_rate": 1.6835615802953026e-06, "loss": 0.3061131536960602, "step": 1847 }, { "epoch": 2.2758620689655173, "grad_norm": 14.006563372468205, "learning_rate": 1.6782021769768015e-06, "loss": 0.26009926199913025, "step": 1848 }, { "epoch": 2.2770935960591134, "grad_norm": 8.127500944165664, "learning_rate": 1.6728495969158976e-06, "loss": 0.33785128593444824, "step": 1849 }, { "epoch": 2.2783251231527095, "grad_norm": 13.84769147602863, "learning_rate": 1.6675038511072518e-06, "loss": 0.675277829170227, "step": 1850 }, { "epoch": 2.2795566502463056, "grad_norm": 10.2024379894797, "learning_rate": 1.6621649505314853e-06, "loss": 0.30536460876464844, "step": 1851 }, { "epoch": 2.2807881773399012, "grad_norm": 13.905669065241, "learning_rate": 1.6568329061551552e-06, "loss": 0.483297735452652, "step": 1852 }, { "epoch": 2.2820197044334973, "grad_norm": 13.831832440802502, "learning_rate": 1.6515077289307391e-06, "loss": 1.2728561162948608, "step": 1853 }, { "epoch": 2.2832512315270934, "grad_norm": 12.809334971632179, "learning_rate": 1.6461894297966113e-06, "loss": 1.2634159326553345, "step": 1854 }, { "epoch": 2.2844827586206895, "grad_norm": 7.191323391539922, "learning_rate": 1.640878019677008e-06, "loss": 0.2823532819747925, "step": 1855 }, { "epoch": 2.2857142857142856, "grad_norm": 10.11071089918571, "learning_rate": 1.6355735094820236e-06, "loss": 0.34143221378326416, "step": 1856 }, { "epoch": 2.2869458128078817, "grad_norm": 21.093284752390208, "learning_rate": 1.6302759101075788e-06, "loss": 1.6820435523986816, "step": 1857 }, { "epoch": 2.2881773399014778, "grad_norm": 10.354309593440153, "learning_rate": 1.6249852324353943e-06, "loss": 0.5194296836853027, "step": 1858 }, { "epoch": 2.289408866995074, "grad_norm": 17.44623842314838, "learning_rate": 1.619701487332978e-06, "loss": 0.5637781023979187, "step": 1859 }, { "epoch": 2.29064039408867, "grad_norm": 25.69777716112705, "learning_rate": 1.6144246856535933e-06, "loss": 0.34875303506851196, "step": 1860 }, { "epoch": 2.291871921182266, "grad_norm": 12.072258734899453, "learning_rate": 1.609154838236246e-06, "loss": 1.098509430885315, "step": 1861 }, { "epoch": 2.293103448275862, "grad_norm": 9.38995256932923, "learning_rate": 1.603891955905652e-06, "loss": 0.28303658962249756, "step": 1862 }, { "epoch": 2.294334975369458, "grad_norm": 8.876257541157115, "learning_rate": 1.5986360494722237e-06, "loss": 0.2923981547355652, "step": 1863 }, { "epoch": 2.2955665024630543, "grad_norm": 12.816591257478263, "learning_rate": 1.5933871297320458e-06, "loss": 0.7381842136383057, "step": 1864 }, { "epoch": 2.2967980295566504, "grad_norm": 11.151348038557627, "learning_rate": 1.5881452074668474e-06, "loss": 0.3092786371707916, "step": 1865 }, { "epoch": 2.2980295566502464, "grad_norm": 7.288277848225151, "learning_rate": 1.5829102934439855e-06, "loss": 0.23155847191810608, "step": 1866 }, { "epoch": 2.2992610837438425, "grad_norm": 6.9100983038059685, "learning_rate": 1.577682398416424e-06, "loss": 0.28587496280670166, "step": 1867 }, { "epoch": 2.3004926108374386, "grad_norm": 10.179482607383743, "learning_rate": 1.572461533122709e-06, "loss": 0.28047090768814087, "step": 1868 }, { "epoch": 2.3017241379310347, "grad_norm": 9.853152635402589, "learning_rate": 1.567247708286942e-06, "loss": 0.23015758395195007, "step": 1869 }, { "epoch": 2.302955665024631, "grad_norm": 11.277401391934358, "learning_rate": 1.5620409346187697e-06, "loss": 0.4323405623435974, "step": 1870 }, { "epoch": 2.3041871921182264, "grad_norm": 11.297467766496554, "learning_rate": 1.5568412228133506e-06, "loss": 0.23572880029678345, "step": 1871 }, { "epoch": 2.3054187192118225, "grad_norm": 13.421885123492197, "learning_rate": 1.5516485835513368e-06, "loss": 0.3727877140045166, "step": 1872 }, { "epoch": 2.3066502463054186, "grad_norm": 12.62430001790282, "learning_rate": 1.5464630274988558e-06, "loss": 0.45042985677719116, "step": 1873 }, { "epoch": 2.3078817733990147, "grad_norm": 14.933222032568711, "learning_rate": 1.5412845653074871e-06, "loss": 0.2898573875427246, "step": 1874 }, { "epoch": 2.3091133004926108, "grad_norm": 13.678732792764093, "learning_rate": 1.5361132076142316e-06, "loss": 0.5285981893539429, "step": 1875 }, { "epoch": 2.310344827586207, "grad_norm": 11.195106285237618, "learning_rate": 1.5309489650415056e-06, "loss": 0.32582932710647583, "step": 1876 }, { "epoch": 2.311576354679803, "grad_norm": 10.519489956392377, "learning_rate": 1.5257918481971028e-06, "loss": 0.2169458121061325, "step": 1877 }, { "epoch": 2.312807881773399, "grad_norm": 13.764556882530254, "learning_rate": 1.5206418676741868e-06, "loss": 0.618523359298706, "step": 1878 }, { "epoch": 2.314039408866995, "grad_norm": 11.040931356433024, "learning_rate": 1.515499034051256e-06, "loss": 0.7014099359512329, "step": 1879 }, { "epoch": 2.315270935960591, "grad_norm": 13.213679491063276, "learning_rate": 1.510363357892133e-06, "loss": 0.44798558950424194, "step": 1880 }, { "epoch": 2.3165024630541873, "grad_norm": 77.68330951092015, "learning_rate": 1.50523484974594e-06, "loss": 0.4824434220790863, "step": 1881 }, { "epoch": 2.3177339901477834, "grad_norm": 5.871453538227446, "learning_rate": 1.5001135201470673e-06, "loss": 0.16904819011688232, "step": 1882 }, { "epoch": 2.3189655172413794, "grad_norm": 10.296708154719132, "learning_rate": 1.4949993796151675e-06, "loss": 0.8792778253555298, "step": 1883 }, { "epoch": 2.3201970443349755, "grad_norm": 12.549086016226653, "learning_rate": 1.4898924386551256e-06, "loss": 0.6592487096786499, "step": 1884 }, { "epoch": 2.3214285714285716, "grad_norm": 20.275701743724124, "learning_rate": 1.4847927077570324e-06, "loss": 1.6036354303359985, "step": 1885 }, { "epoch": 2.3226600985221673, "grad_norm": 9.24831145241808, "learning_rate": 1.4797001973961755e-06, "loss": 0.34490981698036194, "step": 1886 }, { "epoch": 2.3238916256157633, "grad_norm": 8.476000589981345, "learning_rate": 1.4746149180330082e-06, "loss": 0.3186146914958954, "step": 1887 }, { "epoch": 2.3251231527093594, "grad_norm": 18.44274912327115, "learning_rate": 1.4695368801131293e-06, "loss": 0.5050108432769775, "step": 1888 }, { "epoch": 2.3263546798029555, "grad_norm": 12.028503330268482, "learning_rate": 1.4644660940672628e-06, "loss": 0.3541644215583801, "step": 1889 }, { "epoch": 2.3275862068965516, "grad_norm": 6.910684312350736, "learning_rate": 1.4594025703112397e-06, "loss": 0.3495083749294281, "step": 1890 }, { "epoch": 2.3288177339901477, "grad_norm": 11.582636749838006, "learning_rate": 1.4543463192459728e-06, "loss": 0.9918674826622009, "step": 1891 }, { "epoch": 2.3300492610837438, "grad_norm": 12.929277927199294, "learning_rate": 1.4492973512574348e-06, "loss": 0.9601753950119019, "step": 1892 }, { "epoch": 2.33128078817734, "grad_norm": 8.289898772410082, "learning_rate": 1.4442556767166371e-06, "loss": 0.48341238498687744, "step": 1893 }, { "epoch": 2.332512315270936, "grad_norm": 11.044218498303557, "learning_rate": 1.4392213059796133e-06, "loss": 0.38372108340263367, "step": 1894 }, { "epoch": 2.333743842364532, "grad_norm": 17.672025418443823, "learning_rate": 1.4341942493873934e-06, "loss": 0.45662760734558105, "step": 1895 }, { "epoch": 2.334975369458128, "grad_norm": 8.57989944923008, "learning_rate": 1.4291745172659804e-06, "loss": 0.6601132154464722, "step": 1896 }, { "epoch": 2.336206896551724, "grad_norm": 10.831792328536467, "learning_rate": 1.4241621199263362e-06, "loss": 0.7569577097892761, "step": 1897 }, { "epoch": 2.3374384236453203, "grad_norm": 14.76295283801852, "learning_rate": 1.4191570676643573e-06, "loss": 0.7162508964538574, "step": 1898 }, { "epoch": 2.3386699507389164, "grad_norm": 16.808898262444146, "learning_rate": 1.4141593707608441e-06, "loss": 0.6121374368667603, "step": 1899 }, { "epoch": 2.3399014778325125, "grad_norm": 14.404980275639364, "learning_rate": 1.4091690394814989e-06, "loss": 0.550343930721283, "step": 1900 }, { "epoch": 2.3411330049261085, "grad_norm": 13.189507504332187, "learning_rate": 1.40418608407689e-06, "loss": 0.644547700881958, "step": 1901 }, { "epoch": 2.3423645320197046, "grad_norm": 10.144794457121083, "learning_rate": 1.3992105147824326e-06, "loss": 0.463761568069458, "step": 1902 }, { "epoch": 2.3435960591133007, "grad_norm": 9.21109140090456, "learning_rate": 1.3942423418183764e-06, "loss": 0.5593357682228088, "step": 1903 }, { "epoch": 2.344827586206897, "grad_norm": 12.967643967580644, "learning_rate": 1.3892815753897708e-06, "loss": 0.5090635418891907, "step": 1904 }, { "epoch": 2.3460591133004924, "grad_norm": 13.46983908302652, "learning_rate": 1.3843282256864599e-06, "loss": 0.4595394432544708, "step": 1905 }, { "epoch": 2.3472906403940885, "grad_norm": 11.392389994781835, "learning_rate": 1.379382302883044e-06, "loss": 0.8381729125976562, "step": 1906 }, { "epoch": 2.3485221674876846, "grad_norm": 8.85214424769499, "learning_rate": 1.3744438171388752e-06, "loss": 0.37937110662460327, "step": 1907 }, { "epoch": 2.3497536945812807, "grad_norm": 17.78975528440709, "learning_rate": 1.3695127785980279e-06, "loss": 0.4255325496196747, "step": 1908 }, { "epoch": 2.350985221674877, "grad_norm": 11.69369455239838, "learning_rate": 1.3645891973892772e-06, "loss": 1.1354942321777344, "step": 1909 }, { "epoch": 2.352216748768473, "grad_norm": 7.241901848192273, "learning_rate": 1.359673083626079e-06, "loss": 0.30018460750579834, "step": 1910 }, { "epoch": 2.353448275862069, "grad_norm": 10.130306855965305, "learning_rate": 1.3547644474065557e-06, "loss": 0.22174029052257538, "step": 1911 }, { "epoch": 2.354679802955665, "grad_norm": 10.818242567623516, "learning_rate": 1.349863298813464e-06, "loss": 0.27310076355934143, "step": 1912 }, { "epoch": 2.355911330049261, "grad_norm": 13.041781733429923, "learning_rate": 1.3449696479141855e-06, "loss": 0.39454638957977295, "step": 1913 }, { "epoch": 2.357142857142857, "grad_norm": 10.18283763523278, "learning_rate": 1.3400835047606997e-06, "loss": 0.39921119809150696, "step": 1914 }, { "epoch": 2.3583743842364533, "grad_norm": 10.365856020003331, "learning_rate": 1.3352048793895623e-06, "loss": 0.45110660791397095, "step": 1915 }, { "epoch": 2.3596059113300494, "grad_norm": 8.256618178243365, "learning_rate": 1.330333781821887e-06, "loss": 0.5453286170959473, "step": 1916 }, { "epoch": 2.3608374384236455, "grad_norm": 7.676268533106476, "learning_rate": 1.325470222063327e-06, "loss": 0.21928450465202332, "step": 1917 }, { "epoch": 2.3620689655172415, "grad_norm": 11.703145589738702, "learning_rate": 1.3206142101040525e-06, "loss": 0.8491370677947998, "step": 1918 }, { "epoch": 2.363300492610837, "grad_norm": 11.375579827407606, "learning_rate": 1.3157657559187264e-06, "loss": 0.5052551031112671, "step": 1919 }, { "epoch": 2.3645320197044333, "grad_norm": 14.124196950433179, "learning_rate": 1.3109248694664917e-06, "loss": 1.0034559965133667, "step": 1920 }, { "epoch": 2.3657635467980294, "grad_norm": 16.92878880493155, "learning_rate": 1.3060915606909413e-06, "loss": 0.3685661554336548, "step": 1921 }, { "epoch": 2.3669950738916254, "grad_norm": 9.744666272771802, "learning_rate": 1.301265839520109e-06, "loss": 0.33304983377456665, "step": 1922 }, { "epoch": 2.3682266009852215, "grad_norm": 9.861413232471296, "learning_rate": 1.2964477158664367e-06, "loss": 1.3396000862121582, "step": 1923 }, { "epoch": 2.3694581280788176, "grad_norm": 13.403135613317723, "learning_rate": 1.2916371996267656e-06, "loss": 0.3852962851524353, "step": 1924 }, { "epoch": 2.3706896551724137, "grad_norm": 12.989833739172669, "learning_rate": 1.2868343006823113e-06, "loss": 0.5070800185203552, "step": 1925 }, { "epoch": 2.37192118226601, "grad_norm": 10.592089371352348, "learning_rate": 1.2820390288986345e-06, "loss": 0.1917571723461151, "step": 1926 }, { "epoch": 2.373152709359606, "grad_norm": 6.248268258840329, "learning_rate": 1.2772513941256371e-06, "loss": 0.19884659349918365, "step": 1927 }, { "epoch": 2.374384236453202, "grad_norm": 13.319990126266617, "learning_rate": 1.2724714061975335e-06, "loss": 0.27710244059562683, "step": 1928 }, { "epoch": 2.375615763546798, "grad_norm": 12.638294589181001, "learning_rate": 1.2676990749328255e-06, "loss": 0.7216998338699341, "step": 1929 }, { "epoch": 2.376847290640394, "grad_norm": 7.68797287512978, "learning_rate": 1.262934410134292e-06, "loss": 0.35512983798980713, "step": 1930 }, { "epoch": 2.37807881773399, "grad_norm": 7.682504760826181, "learning_rate": 1.2581774215889653e-06, "loss": 0.21548208594322205, "step": 1931 }, { "epoch": 2.3793103448275863, "grad_norm": 10.576319148708158, "learning_rate": 1.2534281190681059e-06, "loss": 0.7191505432128906, "step": 1932 }, { "epoch": 2.3805418719211824, "grad_norm": 28.03273248427961, "learning_rate": 1.2486865123271868e-06, "loss": 0.5658040046691895, "step": 1933 }, { "epoch": 2.3817733990147785, "grad_norm": 7.429440108605395, "learning_rate": 1.243952611105877e-06, "loss": 0.42820805311203003, "step": 1934 }, { "epoch": 2.3830049261083746, "grad_norm": 8.913271204535084, "learning_rate": 1.2392264251280167e-06, "loss": 0.3223640024662018, "step": 1935 }, { "epoch": 2.3842364532019706, "grad_norm": 16.39061337542185, "learning_rate": 1.2345079641015955e-06, "loss": 0.5262437462806702, "step": 1936 }, { "epoch": 2.3854679802955667, "grad_norm": 12.040132799234067, "learning_rate": 1.2297972377187361e-06, "loss": 0.32022416591644287, "step": 1937 }, { "epoch": 2.386699507389163, "grad_norm": 10.197992684406291, "learning_rate": 1.2250942556556754e-06, "loss": 0.76932692527771, "step": 1938 }, { "epoch": 2.3879310344827585, "grad_norm": 9.459909563147203, "learning_rate": 1.2203990275727435e-06, "loss": 0.23026564717292786, "step": 1939 }, { "epoch": 2.3891625615763545, "grad_norm": 11.035875303455253, "learning_rate": 1.2157115631143384e-06, "loss": 0.4533492624759674, "step": 1940 }, { "epoch": 2.3903940886699506, "grad_norm": 10.823301129205994, "learning_rate": 1.211031871908916e-06, "loss": 0.6235211491584778, "step": 1941 }, { "epoch": 2.3916256157635467, "grad_norm": 9.073613663519735, "learning_rate": 1.206359963568966e-06, "loss": 0.2519042193889618, "step": 1942 }, { "epoch": 2.392857142857143, "grad_norm": 9.128265200465231, "learning_rate": 1.201695847690983e-06, "loss": 0.3229137659072876, "step": 1943 }, { "epoch": 2.394088669950739, "grad_norm": 11.336508477709275, "learning_rate": 1.1970395338554642e-06, "loss": 0.19324302673339844, "step": 1944 }, { "epoch": 2.395320197044335, "grad_norm": 11.07861313896692, "learning_rate": 1.1923910316268783e-06, "loss": 0.6342459917068481, "step": 1945 }, { "epoch": 2.396551724137931, "grad_norm": 11.018070634448504, "learning_rate": 1.1877503505536453e-06, "loss": 0.3010944724082947, "step": 1946 }, { "epoch": 2.397783251231527, "grad_norm": 8.241609243061369, "learning_rate": 1.183117500168125e-06, "loss": 0.40499716997146606, "step": 1947 }, { "epoch": 2.399014778325123, "grad_norm": 18.259844198245478, "learning_rate": 1.1784924899865856e-06, "loss": 0.9692997336387634, "step": 1948 }, { "epoch": 2.4002463054187193, "grad_norm": 15.459619863404178, "learning_rate": 1.1738753295091986e-06, "loss": 0.3848229646682739, "step": 1949 }, { "epoch": 2.4014778325123154, "grad_norm": 10.437656103417114, "learning_rate": 1.169266028220004e-06, "loss": 0.4472384750843048, "step": 1950 }, { "epoch": 2.4027093596059115, "grad_norm": 8.14141154883163, "learning_rate": 1.164664595586904e-06, "loss": 0.21374854445457458, "step": 1951 }, { "epoch": 2.4039408866995076, "grad_norm": 9.895182845073167, "learning_rate": 1.1600710410616367e-06, "loss": 0.4789981544017792, "step": 1952 }, { "epoch": 2.405172413793103, "grad_norm": 14.330046153248214, "learning_rate": 1.1554853740797556e-06, "loss": 0.6235543489456177, "step": 1953 }, { "epoch": 2.4064039408866993, "grad_norm": 11.28922905122106, "learning_rate": 1.1509076040606127e-06, "loss": 0.42575669288635254, "step": 1954 }, { "epoch": 2.4076354679802954, "grad_norm": 10.213241448714898, "learning_rate": 1.1463377404073433e-06, "loss": 0.22154280543327332, "step": 1955 }, { "epoch": 2.4088669950738915, "grad_norm": 9.867650979911392, "learning_rate": 1.1417757925068362e-06, "loss": 0.5722556114196777, "step": 1956 }, { "epoch": 2.4100985221674875, "grad_norm": 7.554394124376038, "learning_rate": 1.137221769729725e-06, "loss": 0.6502832174301147, "step": 1957 }, { "epoch": 2.4113300492610836, "grad_norm": 13.191804943156788, "learning_rate": 1.132675681430364e-06, "loss": 0.41717976331710815, "step": 1958 }, { "epoch": 2.4125615763546797, "grad_norm": 12.040721504656855, "learning_rate": 1.1281375369468078e-06, "loss": 0.3705020248889923, "step": 1959 }, { "epoch": 2.413793103448276, "grad_norm": 19.08924876929562, "learning_rate": 1.1236073456007928e-06, "loss": 0.8128242492675781, "step": 1960 }, { "epoch": 2.415024630541872, "grad_norm": 16.296662141524465, "learning_rate": 1.1190851166977218e-06, "loss": 0.7350403070449829, "step": 1961 }, { "epoch": 2.416256157635468, "grad_norm": 7.0582572680809195, "learning_rate": 1.1145708595266418e-06, "loss": 0.5837904214859009, "step": 1962 }, { "epoch": 2.417487684729064, "grad_norm": 8.875645426047061, "learning_rate": 1.1100645833602231e-06, "loss": 0.436983585357666, "step": 1963 }, { "epoch": 2.41871921182266, "grad_norm": 9.396076477777111, "learning_rate": 1.105566297454742e-06, "loss": 0.4708068370819092, "step": 1964 }, { "epoch": 2.4199507389162562, "grad_norm": 12.540961285951255, "learning_rate": 1.1010760110500652e-06, "loss": 0.37972012162208557, "step": 1965 }, { "epoch": 2.4211822660098523, "grad_norm": 9.511768233063343, "learning_rate": 1.0965937333696264e-06, "loss": 0.3167269229888916, "step": 1966 }, { "epoch": 2.4224137931034484, "grad_norm": 8.997618711574894, "learning_rate": 1.0921194736204066e-06, "loss": 0.3407049775123596, "step": 1967 }, { "epoch": 2.4236453201970445, "grad_norm": 26.50748327469745, "learning_rate": 1.0876532409929208e-06, "loss": 0.7673642635345459, "step": 1968 }, { "epoch": 2.4248768472906406, "grad_norm": 7.428296790887836, "learning_rate": 1.083195044661195e-06, "loss": 0.3029213845729828, "step": 1969 }, { "epoch": 2.4261083743842367, "grad_norm": 16.297521234369484, "learning_rate": 1.0787448937827428e-06, "loss": 0.5143488049507141, "step": 1970 }, { "epoch": 2.4273399014778327, "grad_norm": 9.838022492363262, "learning_rate": 1.0743027974985576e-06, "loss": 0.5086369514465332, "step": 1971 }, { "epoch": 2.4285714285714284, "grad_norm": 11.760234490761677, "learning_rate": 1.069868764933088e-06, "loss": 0.7999781966209412, "step": 1972 }, { "epoch": 2.4298029556650245, "grad_norm": 8.348930224912683, "learning_rate": 1.065442805194214e-06, "loss": 0.2686223089694977, "step": 1973 }, { "epoch": 2.4310344827586206, "grad_norm": 10.189321214439989, "learning_rate": 1.0610249273732393e-06, "loss": 0.2520446181297302, "step": 1974 }, { "epoch": 2.4322660098522166, "grad_norm": 11.006280468973555, "learning_rate": 1.056615140544861e-06, "loss": 0.28887757658958435, "step": 1975 }, { "epoch": 2.4334975369458127, "grad_norm": 17.908792965669562, "learning_rate": 1.0522134537671625e-06, "loss": 0.3709273338317871, "step": 1976 }, { "epoch": 2.434729064039409, "grad_norm": 8.261377574040777, "learning_rate": 1.0478198760815833e-06, "loss": 0.6718100309371948, "step": 1977 }, { "epoch": 2.435960591133005, "grad_norm": 8.787835782948932, "learning_rate": 1.0434344165129095e-06, "loss": 0.17143529653549194, "step": 1978 }, { "epoch": 2.437192118226601, "grad_norm": 15.115289039167425, "learning_rate": 1.0390570840692527e-06, "loss": 0.7128796577453613, "step": 1979 }, { "epoch": 2.438423645320197, "grad_norm": 13.46718512167487, "learning_rate": 1.034687887742028e-06, "loss": 0.24575555324554443, "step": 1980 }, { "epoch": 2.439655172413793, "grad_norm": 15.637303471440513, "learning_rate": 1.0303268365059383e-06, "loss": 0.5631250739097595, "step": 1981 }, { "epoch": 2.4408866995073892, "grad_norm": 10.921107789227744, "learning_rate": 1.0259739393189573e-06, "loss": 0.3094029128551483, "step": 1982 }, { "epoch": 2.4421182266009853, "grad_norm": 9.876371637108129, "learning_rate": 1.021629205122311e-06, "loss": 0.4754146635532379, "step": 1983 }, { "epoch": 2.4433497536945814, "grad_norm": 11.197843935010443, "learning_rate": 1.0172926428404527e-06, "loss": 0.18599992990493774, "step": 1984 }, { "epoch": 2.4445812807881775, "grad_norm": 11.60242134696919, "learning_rate": 1.0129642613810576e-06, "loss": 0.3831806480884552, "step": 1985 }, { "epoch": 2.4458128078817736, "grad_norm": 10.915359357263476, "learning_rate": 1.008644069634989e-06, "loss": 0.7717353105545044, "step": 1986 }, { "epoch": 2.447044334975369, "grad_norm": 16.40151326361354, "learning_rate": 1.0043320764762915e-06, "loss": 0.3248934745788574, "step": 1987 }, { "epoch": 2.4482758620689653, "grad_norm": 7.869645643343828, "learning_rate": 1.0000282907621694e-06, "loss": 0.27836111187934875, "step": 1988 }, { "epoch": 2.4495073891625614, "grad_norm": 10.609052698858209, "learning_rate": 9.957327213329687e-07, "loss": 0.20251630246639252, "step": 1989 }, { "epoch": 2.4507389162561575, "grad_norm": 15.802681481740834, "learning_rate": 9.914453770121557e-07, "loss": 0.6009274125099182, "step": 1990 }, { "epoch": 2.4519704433497536, "grad_norm": 12.5975867275524, "learning_rate": 9.871662666063054e-07, "loss": 0.3312684893608093, "step": 1991 }, { "epoch": 2.4532019704433496, "grad_norm": 11.710094793009787, "learning_rate": 9.828953989050744e-07, "loss": 0.38521629571914673, "step": 1992 }, { "epoch": 2.4544334975369457, "grad_norm": 7.249324950790913, "learning_rate": 9.786327826811942e-07, "loss": 0.2508774995803833, "step": 1993 }, { "epoch": 2.455665024630542, "grad_norm": 9.220463260574913, "learning_rate": 9.743784266904422e-07, "loss": 0.36097291111946106, "step": 1994 }, { "epoch": 2.456896551724138, "grad_norm": 22.22398053360695, "learning_rate": 9.701323396716312e-07, "loss": 0.6703237295150757, "step": 1995 }, { "epoch": 2.458128078817734, "grad_norm": 10.185390156514575, "learning_rate": 9.6589453034659e-07, "loss": 0.9553302526473999, "step": 1996 }, { "epoch": 2.45935960591133, "grad_norm": 10.103225854124274, "learning_rate": 9.616650074201383e-07, "loss": 0.3288821578025818, "step": 1997 }, { "epoch": 2.460591133004926, "grad_norm": 9.00369401838797, "learning_rate": 9.574437795800806e-07, "loss": 0.3195754885673523, "step": 1998 }, { "epoch": 2.4618226600985222, "grad_norm": 15.805795563779297, "learning_rate": 9.532308554971831e-07, "loss": 0.26505401730537415, "step": 1999 }, { "epoch": 2.4630541871921183, "grad_norm": 11.25947467258853, "learning_rate": 9.490262438251496e-07, "loss": 0.43558627367019653, "step": 2000 }, { "epoch": 2.4642857142857144, "grad_norm": 10.457734518302678, "learning_rate": 9.44829953200615e-07, "loss": 0.3582439720630646, "step": 2001 }, { "epoch": 2.4655172413793105, "grad_norm": 12.231152863168465, "learning_rate": 9.406419922431214e-07, "loss": 0.7142423987388611, "step": 2002 }, { "epoch": 2.4667487684729066, "grad_norm": 12.479544686562418, "learning_rate": 9.364623695550979e-07, "loss": 0.24947094917297363, "step": 2003 }, { "epoch": 2.4679802955665027, "grad_norm": 16.323337348543824, "learning_rate": 9.322910937218471e-07, "loss": 1.0376765727996826, "step": 2004 }, { "epoch": 2.4692118226600988, "grad_norm": 12.025786233159009, "learning_rate": 9.281281733115288e-07, "loss": 0.39291733503341675, "step": 2005 }, { "epoch": 2.4704433497536944, "grad_norm": 15.526509163555014, "learning_rate": 9.239736168751395e-07, "loss": 1.1038362979888916, "step": 2006 }, { "epoch": 2.4716748768472905, "grad_norm": 10.027251067087649, "learning_rate": 9.198274329464929e-07, "loss": 0.8542830944061279, "step": 2007 }, { "epoch": 2.4729064039408866, "grad_norm": 20.306111450694207, "learning_rate": 9.156896300422053e-07, "loss": 0.807994544506073, "step": 2008 }, { "epoch": 2.4741379310344827, "grad_norm": 5.653479787843331, "learning_rate": 9.115602166616805e-07, "loss": 0.17016081511974335, "step": 2009 }, { "epoch": 2.4753694581280787, "grad_norm": 11.492766886926658, "learning_rate": 9.07439201287088e-07, "loss": 0.7831156849861145, "step": 2010 }, { "epoch": 2.476600985221675, "grad_norm": 9.3732349373237, "learning_rate": 9.033265923833446e-07, "loss": 0.5146660804748535, "step": 2011 }, { "epoch": 2.477832512315271, "grad_norm": 13.78559435557381, "learning_rate": 8.992223983981035e-07, "loss": 0.5641926527023315, "step": 2012 }, { "epoch": 2.479064039408867, "grad_norm": 7.867545716232377, "learning_rate": 8.951266277617326e-07, "loss": 0.2155514359474182, "step": 2013 }, { "epoch": 2.480295566502463, "grad_norm": 11.172087233714553, "learning_rate": 8.91039288887292e-07, "loss": 0.28125351667404175, "step": 2014 }, { "epoch": 2.481527093596059, "grad_norm": 10.827596711387834, "learning_rate": 8.869603901705287e-07, "loss": 0.5349509716033936, "step": 2015 }, { "epoch": 2.4827586206896552, "grad_norm": 10.652684351436065, "learning_rate": 8.82889939989851e-07, "loss": 0.43747422099113464, "step": 2016 }, { "epoch": 2.4839901477832513, "grad_norm": 8.656359342370678, "learning_rate": 8.78827946706311e-07, "loss": 0.4629102647304535, "step": 2017 }, { "epoch": 2.4852216748768474, "grad_norm": 9.302169561481923, "learning_rate": 8.747744186635932e-07, "loss": 0.41271477937698364, "step": 2018 }, { "epoch": 2.4864532019704435, "grad_norm": 7.585718354318216, "learning_rate": 8.707293641879888e-07, "loss": 0.27247580885887146, "step": 2019 }, { "epoch": 2.4876847290640396, "grad_norm": 11.7662978456361, "learning_rate": 8.666927915883905e-07, "loss": 1.4255273342132568, "step": 2020 }, { "epoch": 2.4889162561576352, "grad_norm": 12.62783666106837, "learning_rate": 8.626647091562612e-07, "loss": 0.8762021660804749, "step": 2021 }, { "epoch": 2.4901477832512313, "grad_norm": 7.781392053224673, "learning_rate": 8.586451251656286e-07, "loss": 0.43475109338760376, "step": 2022 }, { "epoch": 2.4913793103448274, "grad_norm": 8.647004326334777, "learning_rate": 8.546340478730647e-07, "loss": 0.16091346740722656, "step": 2023 }, { "epoch": 2.4926108374384235, "grad_norm": 10.050856051691818, "learning_rate": 8.506314855176651e-07, "loss": 0.491144061088562, "step": 2024 }, { "epoch": 2.4938423645320196, "grad_norm": 15.049291696206959, "learning_rate": 8.466374463210348e-07, "loss": 0.792976438999176, "step": 2025 }, { "epoch": 2.4950738916256157, "grad_norm": 13.192276803646186, "learning_rate": 8.426519384872733e-07, "loss": 0.8023815155029297, "step": 2026 }, { "epoch": 2.4963054187192117, "grad_norm": 10.183319190154988, "learning_rate": 8.386749702029578e-07, "loss": 0.7008549571037292, "step": 2027 }, { "epoch": 2.497536945812808, "grad_norm": 9.306826775675583, "learning_rate": 8.347065496371193e-07, "loss": 0.3158326745033264, "step": 2028 }, { "epoch": 2.498768472906404, "grad_norm": 11.439845656368037, "learning_rate": 8.307466849412365e-07, "loss": 0.4847475588321686, "step": 2029 }, { "epoch": 2.5, "grad_norm": 8.392845077442193, "learning_rate": 8.2679538424921e-07, "loss": 0.42490729689598083, "step": 2030 }, { "epoch": 2.501231527093596, "grad_norm": 8.86668163556195, "learning_rate": 8.228526556773486e-07, "loss": 0.4303053021430969, "step": 2031 }, { "epoch": 2.502463054187192, "grad_norm": 9.647239720582808, "learning_rate": 8.18918507324356e-07, "loss": 0.20669305324554443, "step": 2032 }, { "epoch": 2.5036945812807883, "grad_norm": 14.868819185388821, "learning_rate": 8.149929472713126e-07, "loss": 0.4146193265914917, "step": 2033 }, { "epoch": 2.5049261083743843, "grad_norm": 8.521845217294674, "learning_rate": 8.110759835816518e-07, "loss": 0.2852465510368347, "step": 2034 }, { "epoch": 2.5061576354679804, "grad_norm": 9.65764576867383, "learning_rate": 8.071676243011556e-07, "loss": 0.5811144113540649, "step": 2035 }, { "epoch": 2.5073891625615765, "grad_norm": 13.619550034189677, "learning_rate": 8.032678774579272e-07, "loss": 0.6767745614051819, "step": 2036 }, { "epoch": 2.5086206896551726, "grad_norm": 10.986185907881213, "learning_rate": 7.993767510623834e-07, "loss": 0.5063849687576294, "step": 2037 }, { "epoch": 2.5098522167487687, "grad_norm": 11.539593137413142, "learning_rate": 7.954942531072285e-07, "loss": 0.534786581993103, "step": 2038 }, { "epoch": 2.5110837438423648, "grad_norm": 12.505177711554532, "learning_rate": 7.91620391567448e-07, "loss": 0.45122361183166504, "step": 2039 }, { "epoch": 2.512315270935961, "grad_norm": 8.839741542848381, "learning_rate": 7.877551744002881e-07, "loss": 0.2832280099391937, "step": 2040 }, { "epoch": 2.5135467980295565, "grad_norm": 11.718433441522615, "learning_rate": 7.838986095452311e-07, "loss": 0.8926963806152344, "step": 2041 }, { "epoch": 2.5147783251231526, "grad_norm": 9.73145152883671, "learning_rate": 7.800507049239947e-07, "loss": 0.9263632893562317, "step": 2042 }, { "epoch": 2.5160098522167487, "grad_norm": 16.48224794173804, "learning_rate": 7.762114684405064e-07, "loss": 0.3994196653366089, "step": 2043 }, { "epoch": 2.5172413793103448, "grad_norm": 10.084446546675132, "learning_rate": 7.723809079808842e-07, "loss": 0.3273079991340637, "step": 2044 }, { "epoch": 2.518472906403941, "grad_norm": 19.899209678081235, "learning_rate": 7.685590314134294e-07, "loss": 0.4566258192062378, "step": 2045 }, { "epoch": 2.519704433497537, "grad_norm": 16.13317422246351, "learning_rate": 7.647458465886055e-07, "loss": 0.4199177026748657, "step": 2046 }, { "epoch": 2.520935960591133, "grad_norm": 7.584665550484686, "learning_rate": 7.609413613390199e-07, "loss": 0.2789694666862488, "step": 2047 }, { "epoch": 2.522167487684729, "grad_norm": 12.08003380462593, "learning_rate": 7.571455834794095e-07, "loss": 0.39359426498413086, "step": 2048 }, { "epoch": 2.523399014778325, "grad_norm": 16.766513036441403, "learning_rate": 7.533585208066302e-07, "loss": 0.38510677218437195, "step": 2049 }, { "epoch": 2.5246305418719213, "grad_norm": 14.332573036568608, "learning_rate": 7.495801810996334e-07, "loss": 1.0861276388168335, "step": 2050 }, { "epoch": 2.5258620689655173, "grad_norm": 13.180696978229305, "learning_rate": 7.458105721194525e-07, "loss": 0.35866010189056396, "step": 2051 }, { "epoch": 2.5270935960591134, "grad_norm": 8.80983116890946, "learning_rate": 7.420497016091866e-07, "loss": 0.3436219394207001, "step": 2052 }, { "epoch": 2.5283251231527095, "grad_norm": 12.383092324048317, "learning_rate": 7.382975772939866e-07, "loss": 0.3687105178833008, "step": 2053 }, { "epoch": 2.529556650246305, "grad_norm": 8.240739854437226, "learning_rate": 7.34554206881039e-07, "loss": 0.32671070098876953, "step": 2054 }, { "epoch": 2.5307881773399012, "grad_norm": 11.575392957436732, "learning_rate": 7.308195980595462e-07, "loss": 0.7302184104919434, "step": 2055 }, { "epoch": 2.5320197044334973, "grad_norm": 13.7288446044892, "learning_rate": 7.270937585007149e-07, "loss": 0.7430564761161804, "step": 2056 }, { "epoch": 2.5332512315270934, "grad_norm": 8.666358783874388, "learning_rate": 7.233766958577421e-07, "loss": 0.305151104927063, "step": 2057 }, { "epoch": 2.5344827586206895, "grad_norm": 17.881705697560324, "learning_rate": 7.196684177657887e-07, "loss": 0.4311235547065735, "step": 2058 }, { "epoch": 2.5357142857142856, "grad_norm": 13.989195036115625, "learning_rate": 7.159689318419777e-07, "loss": 0.29697108268737793, "step": 2059 }, { "epoch": 2.5369458128078817, "grad_norm": 10.004375359602093, "learning_rate": 7.122782456853722e-07, "loss": 0.5012999176979065, "step": 2060 }, { "epoch": 2.5381773399014778, "grad_norm": 10.441122865704237, "learning_rate": 7.085963668769552e-07, "loss": 0.24754227697849274, "step": 2061 }, { "epoch": 2.539408866995074, "grad_norm": 7.415294238465162, "learning_rate": 7.049233029796243e-07, "loss": 0.1311894953250885, "step": 2062 }, { "epoch": 2.54064039408867, "grad_norm": 11.745936375906483, "learning_rate": 7.012590615381654e-07, "loss": 0.3458009958267212, "step": 2063 }, { "epoch": 2.541871921182266, "grad_norm": 19.579629082198277, "learning_rate": 6.976036500792466e-07, "loss": 0.6216360330581665, "step": 2064 }, { "epoch": 2.543103448275862, "grad_norm": 17.511409594621433, "learning_rate": 6.939570761113939e-07, "loss": 0.41114604473114014, "step": 2065 }, { "epoch": 2.544334975369458, "grad_norm": 12.769592062525021, "learning_rate": 6.903193471249853e-07, "loss": 0.35362619161605835, "step": 2066 }, { "epoch": 2.5455665024630543, "grad_norm": 15.37068507816602, "learning_rate": 6.866904705922284e-07, "loss": 1.7280857563018799, "step": 2067 }, { "epoch": 2.5467980295566504, "grad_norm": 12.864848425460373, "learning_rate": 6.830704539671462e-07, "loss": 1.3645777702331543, "step": 2068 }, { "epoch": 2.5480295566502464, "grad_norm": 8.663375537691056, "learning_rate": 6.794593046855613e-07, "loss": 0.46488872170448303, "step": 2069 }, { "epoch": 2.5492610837438425, "grad_norm": 11.746641376676559, "learning_rate": 6.758570301650869e-07, "loss": 0.9913250803947449, "step": 2070 }, { "epoch": 2.5504926108374386, "grad_norm": 14.714182423444447, "learning_rate": 6.722636378051011e-07, "loss": 0.8180273771286011, "step": 2071 }, { "epoch": 2.5517241379310347, "grad_norm": 7.848050259431333, "learning_rate": 6.686791349867422e-07, "loss": 0.5234679579734802, "step": 2072 }, { "epoch": 2.552955665024631, "grad_norm": 6.903410737354236, "learning_rate": 6.651035290728858e-07, "loss": 0.08975313603878021, "step": 2073 }, { "epoch": 2.554187192118227, "grad_norm": 11.27527783341364, "learning_rate": 6.615368274081335e-07, "loss": 0.35545456409454346, "step": 2074 }, { "epoch": 2.5554187192118225, "grad_norm": 11.726857926860664, "learning_rate": 6.579790373187944e-07, "loss": 1.192006230354309, "step": 2075 }, { "epoch": 2.5566502463054186, "grad_norm": 18.37387229568444, "learning_rate": 6.54430166112876e-07, "loss": 0.35069915652275085, "step": 2076 }, { "epoch": 2.5578817733990147, "grad_norm": 9.620718531681447, "learning_rate": 6.508902210800649e-07, "loss": 0.20691820979118347, "step": 2077 }, { "epoch": 2.5591133004926108, "grad_norm": 16.343394062782135, "learning_rate": 6.473592094917092e-07, "loss": 0.4561042785644531, "step": 2078 }, { "epoch": 2.560344827586207, "grad_norm": 11.889860706895831, "learning_rate": 6.43837138600813e-07, "loss": 0.32198822498321533, "step": 2079 }, { "epoch": 2.561576354679803, "grad_norm": 10.519181625251578, "learning_rate": 6.403240156420087e-07, "loss": 0.35681653022766113, "step": 2080 }, { "epoch": 2.562807881773399, "grad_norm": 9.426944191114051, "learning_rate": 6.36819847831554e-07, "loss": 0.5826268196105957, "step": 2081 }, { "epoch": 2.564039408866995, "grad_norm": 10.18400417142911, "learning_rate": 6.333246423673096e-07, "loss": 0.23084279894828796, "step": 2082 }, { "epoch": 2.565270935960591, "grad_norm": 8.146966381833735, "learning_rate": 6.298384064287261e-07, "loss": 0.5527750253677368, "step": 2083 }, { "epoch": 2.5665024630541873, "grad_norm": 7.581778739386861, "learning_rate": 6.263611471768349e-07, "loss": 0.4125085175037384, "step": 2084 }, { "epoch": 2.5677339901477834, "grad_norm": 9.31385960486644, "learning_rate": 6.228928717542205e-07, "loss": 0.37431174516677856, "step": 2085 }, { "epoch": 2.5689655172413794, "grad_norm": 9.72676402112677, "learning_rate": 6.194335872850188e-07, "loss": 0.17119471728801727, "step": 2086 }, { "epoch": 2.5701970443349755, "grad_norm": 11.790310632986847, "learning_rate": 6.159833008748988e-07, "loss": 0.9465748071670532, "step": 2087 }, { "epoch": 2.571428571428571, "grad_norm": 25.018614409312026, "learning_rate": 6.125420196110426e-07, "loss": 0.48980847001075745, "step": 2088 }, { "epoch": 2.5726600985221673, "grad_norm": 8.85280166601153, "learning_rate": 6.091097505621374e-07, "loss": 0.7195557951927185, "step": 2089 }, { "epoch": 2.5738916256157633, "grad_norm": 12.112085029881426, "learning_rate": 6.056865007783602e-07, "loss": 1.83125638961792, "step": 2090 }, { "epoch": 2.5751231527093594, "grad_norm": 9.94028667902401, "learning_rate": 6.022722772913581e-07, "loss": 0.3298517167568207, "step": 2091 }, { "epoch": 2.5763546798029555, "grad_norm": 11.18503180129702, "learning_rate": 5.988670871142377e-07, "loss": 0.47125905752182007, "step": 2092 }, { "epoch": 2.5775862068965516, "grad_norm": 9.413844300619951, "learning_rate": 5.954709372415524e-07, "loss": 0.288496196269989, "step": 2093 }, { "epoch": 2.5788177339901477, "grad_norm": 7.1811144983138675, "learning_rate": 5.920838346492874e-07, "loss": 0.3627285957336426, "step": 2094 }, { "epoch": 2.5800492610837438, "grad_norm": 14.830294096591077, "learning_rate": 5.887057862948403e-07, "loss": 0.7072806358337402, "step": 2095 }, { "epoch": 2.58128078817734, "grad_norm": 10.644924386002677, "learning_rate": 5.853367991170106e-07, "loss": 0.3386034071445465, "step": 2096 }, { "epoch": 2.582512315270936, "grad_norm": 14.094564220777247, "learning_rate": 5.819768800359882e-07, "loss": 0.4901737570762634, "step": 2097 }, { "epoch": 2.583743842364532, "grad_norm": 10.630160715256755, "learning_rate": 5.786260359533369e-07, "loss": 1.683629035949707, "step": 2098 }, { "epoch": 2.584975369458128, "grad_norm": 8.221455266315619, "learning_rate": 5.752842737519743e-07, "loss": 0.4275779128074646, "step": 2099 }, { "epoch": 2.586206896551724, "grad_norm": 8.989808316079593, "learning_rate": 5.7195160029617e-07, "loss": 0.6892256736755371, "step": 2100 }, { "epoch": 2.5874384236453203, "grad_norm": 10.390493407130242, "learning_rate": 5.686280224315189e-07, "loss": 0.6548988819122314, "step": 2101 }, { "epoch": 2.5886699507389164, "grad_norm": 8.365114703591324, "learning_rate": 5.653135469849347e-07, "loss": 0.4431142807006836, "step": 2102 }, { "epoch": 2.5899014778325125, "grad_norm": 20.296284889046316, "learning_rate": 5.62008180764635e-07, "loss": 0.5730191469192505, "step": 2103 }, { "epoch": 2.5911330049261085, "grad_norm": 7.886033521206941, "learning_rate": 5.587119305601263e-07, "loss": 0.8734421730041504, "step": 2104 }, { "epoch": 2.5923645320197046, "grad_norm": 7.851476190792639, "learning_rate": 5.554248031421872e-07, "loss": 0.30810514092445374, "step": 2105 }, { "epoch": 2.5935960591133007, "grad_norm": 10.114012805058133, "learning_rate": 5.521468052628615e-07, "loss": 0.5941227078437805, "step": 2106 }, { "epoch": 2.594827586206897, "grad_norm": 11.5276807645432, "learning_rate": 5.488779436554359e-07, "loss": 0.32648181915283203, "step": 2107 }, { "epoch": 2.596059113300493, "grad_norm": 12.384461199116616, "learning_rate": 5.456182250344349e-07, "loss": 0.2934610843658447, "step": 2108 }, { "epoch": 2.5972906403940885, "grad_norm": 9.420595645239136, "learning_rate": 5.423676560955976e-07, "loss": 0.20387941598892212, "step": 2109 }, { "epoch": 2.5985221674876846, "grad_norm": 10.459297088933635, "learning_rate": 5.391262435158722e-07, "loss": 0.6115235090255737, "step": 2110 }, { "epoch": 2.5997536945812807, "grad_norm": 13.891885044549888, "learning_rate": 5.358939939534002e-07, "loss": 0.45280611515045166, "step": 2111 }, { "epoch": 2.600985221674877, "grad_norm": 8.172861215602202, "learning_rate": 5.326709140474962e-07, "loss": 0.29169538617134094, "step": 2112 }, { "epoch": 2.602216748768473, "grad_norm": 6.844042320685791, "learning_rate": 5.294570104186436e-07, "loss": 0.4924798011779785, "step": 2113 }, { "epoch": 2.603448275862069, "grad_norm": 12.392169249298135, "learning_rate": 5.262522896684774e-07, "loss": 0.6751348376274109, "step": 2114 }, { "epoch": 2.604679802955665, "grad_norm": 13.993739996881734, "learning_rate": 5.230567583797674e-07, "loss": 0.6676002740859985, "step": 2115 }, { "epoch": 2.605911330049261, "grad_norm": 12.746427038097593, "learning_rate": 5.198704231164093e-07, "loss": 0.3112475275993347, "step": 2116 }, { "epoch": 2.607142857142857, "grad_norm": 9.88854663199865, "learning_rate": 5.166932904234101e-07, "loss": 0.5024739503860474, "step": 2117 }, { "epoch": 2.6083743842364533, "grad_norm": 18.4856178419616, "learning_rate": 5.135253668268724e-07, "loss": 2.6769824028015137, "step": 2118 }, { "epoch": 2.6096059113300494, "grad_norm": 12.280278924091732, "learning_rate": 5.103666588339812e-07, "loss": 0.4120222330093384, "step": 2119 }, { "epoch": 2.6108374384236455, "grad_norm": 8.106704210398478, "learning_rate": 5.072171729329944e-07, "loss": 0.3238741457462311, "step": 2120 }, { "epoch": 2.612068965517241, "grad_norm": 9.476233543897594, "learning_rate": 5.040769155932285e-07, "loss": 0.41853106021881104, "step": 2121 }, { "epoch": 2.613300492610837, "grad_norm": 9.382868411266552, "learning_rate": 5.00945893265039e-07, "loss": 0.5511228442192078, "step": 2122 }, { "epoch": 2.6145320197044333, "grad_norm": 10.011756541997418, "learning_rate": 4.978241123798133e-07, "loss": 0.6076939105987549, "step": 2123 }, { "epoch": 2.6157635467980294, "grad_norm": 11.969458383094386, "learning_rate": 4.94711579349959e-07, "loss": 0.32137832045555115, "step": 2124 }, { "epoch": 2.6169950738916254, "grad_norm": 9.120309940189742, "learning_rate": 4.916083005688865e-07, "loss": 0.2919730246067047, "step": 2125 }, { "epoch": 2.6182266009852215, "grad_norm": 11.012298283555321, "learning_rate": 4.885142824109946e-07, "loss": 0.3521897792816162, "step": 2126 }, { "epoch": 2.6194581280788176, "grad_norm": 10.719771585992975, "learning_rate": 4.85429531231662e-07, "loss": 0.5645777583122253, "step": 2127 }, { "epoch": 2.6206896551724137, "grad_norm": 8.564760545887571, "learning_rate": 4.823540533672355e-07, "loss": 0.21364668011665344, "step": 2128 }, { "epoch": 2.62192118226601, "grad_norm": 10.461100625681352, "learning_rate": 4.792878551350055e-07, "loss": 0.3472633957862854, "step": 2129 }, { "epoch": 2.623152709359606, "grad_norm": 7.7796379590314295, "learning_rate": 4.7623094283320905e-07, "loss": 0.2312706857919693, "step": 2130 }, { "epoch": 2.624384236453202, "grad_norm": 10.908716191951015, "learning_rate": 4.7318332274100595e-07, "loss": 0.4227292835712433, "step": 2131 }, { "epoch": 2.625615763546798, "grad_norm": 11.077941430018797, "learning_rate": 4.701450011184677e-07, "loss": 0.4835679531097412, "step": 2132 }, { "epoch": 2.626847290640394, "grad_norm": 8.011667181424437, "learning_rate": 4.671159842065698e-07, "loss": 0.30153489112854004, "step": 2133 }, { "epoch": 2.62807881773399, "grad_norm": 9.961423240887521, "learning_rate": 4.640962782271707e-07, "loss": 0.19820570945739746, "step": 2134 }, { "epoch": 2.6293103448275863, "grad_norm": 18.168474918209572, "learning_rate": 4.6108588938300725e-07, "loss": 0.5798308253288269, "step": 2135 }, { "epoch": 2.6305418719211824, "grad_norm": 14.982461578988175, "learning_rate": 4.5808482385767407e-07, "loss": 0.4840395450592041, "step": 2136 }, { "epoch": 2.6317733990147785, "grad_norm": 12.540506897781501, "learning_rate": 4.5509308781561846e-07, "loss": 0.33036884665489197, "step": 2137 }, { "epoch": 2.6330049261083746, "grad_norm": 10.69964555424519, "learning_rate": 4.521106874021242e-07, "loss": 0.4032250642776489, "step": 2138 }, { "epoch": 2.6342364532019706, "grad_norm": 10.190070867602095, "learning_rate": 4.4913762874329527e-07, "loss": 0.5196541547775269, "step": 2139 }, { "epoch": 2.6354679802955667, "grad_norm": 15.414254295489695, "learning_rate": 4.4617391794604946e-07, "loss": 0.5049697160720825, "step": 2140 }, { "epoch": 2.636699507389163, "grad_norm": 11.232489708483897, "learning_rate": 4.4321956109810327e-07, "loss": 0.6910302639007568, "step": 2141 }, { "epoch": 2.637931034482759, "grad_norm": 17.874353794074672, "learning_rate": 4.4027456426796014e-07, "loss": 0.8860565423965454, "step": 2142 }, { "epoch": 2.6391625615763545, "grad_norm": 8.315561152824909, "learning_rate": 4.3733893350489386e-07, "loss": 0.3347795307636261, "step": 2143 }, { "epoch": 2.6403940886699506, "grad_norm": 8.406655821874109, "learning_rate": 4.344126748389438e-07, "loss": 0.5979218482971191, "step": 2144 }, { "epoch": 2.6416256157635467, "grad_norm": 10.633642678256232, "learning_rate": 4.314957942808956e-07, "loss": 0.6724722385406494, "step": 2145 }, { "epoch": 2.642857142857143, "grad_norm": 11.37770126439957, "learning_rate": 4.2858829782227107e-07, "loss": 0.23655423521995544, "step": 2146 }, { "epoch": 2.644088669950739, "grad_norm": 13.564798867932334, "learning_rate": 4.2569019143531845e-07, "loss": 0.7535929679870605, "step": 2147 }, { "epoch": 2.645320197044335, "grad_norm": 7.225057762729149, "learning_rate": 4.228014810729963e-07, "loss": 0.5065590143203735, "step": 2148 }, { "epoch": 2.646551724137931, "grad_norm": 11.646047154930116, "learning_rate": 4.199221726689634e-07, "loss": 0.8232078552246094, "step": 2149 }, { "epoch": 2.647783251231527, "grad_norm": 12.627075206048184, "learning_rate": 4.170522721375669e-07, "loss": 0.3928985595703125, "step": 2150 }, { "epoch": 2.649014778325123, "grad_norm": 11.823044988035218, "learning_rate": 4.1419178537382756e-07, "loss": 0.6924771070480347, "step": 2151 }, { "epoch": 2.6502463054187193, "grad_norm": 8.99171598727701, "learning_rate": 4.1134071825343124e-07, "loss": 0.3323458135128021, "step": 2152 }, { "epoch": 2.6514778325123154, "grad_norm": 8.020309669901565, "learning_rate": 4.0849907663271346e-07, "loss": 0.6068896651268005, "step": 2153 }, { "epoch": 2.6527093596059115, "grad_norm": 9.698785865473045, "learning_rate": 4.0566686634865016e-07, "loss": 0.2112211287021637, "step": 2154 }, { "epoch": 2.653940886699507, "grad_norm": 8.70939943207942, "learning_rate": 4.028440932188465e-07, "loss": 0.3340219259262085, "step": 2155 }, { "epoch": 2.655172413793103, "grad_norm": 16.06563756982883, "learning_rate": 4.0003076304151624e-07, "loss": 0.4172120690345764, "step": 2156 }, { "epoch": 2.6564039408866993, "grad_norm": 10.448504154619048, "learning_rate": 3.972268815954833e-07, "loss": 0.3891775608062744, "step": 2157 }, { "epoch": 2.6576354679802954, "grad_norm": 14.733135115767965, "learning_rate": 3.944324546401607e-07, "loss": 0.4906957149505615, "step": 2158 }, { "epoch": 2.6588669950738915, "grad_norm": 9.613272858024363, "learning_rate": 3.916474879155402e-07, "loss": 0.8216167688369751, "step": 2159 }, { "epoch": 2.6600985221674875, "grad_norm": 10.257611413751764, "learning_rate": 3.8887198714218255e-07, "loss": 0.2030409872531891, "step": 2160 }, { "epoch": 2.6613300492610836, "grad_norm": 7.648297896745766, "learning_rate": 3.8610595802120564e-07, "loss": 0.24565047025680542, "step": 2161 }, { "epoch": 2.6625615763546797, "grad_norm": 10.822762486642535, "learning_rate": 3.833494062342691e-07, "loss": 0.3111516833305359, "step": 2162 }, { "epoch": 2.663793103448276, "grad_norm": 7.318326050197103, "learning_rate": 3.8060233744356634e-07, "loss": 0.32978883385658264, "step": 2163 }, { "epoch": 2.665024630541872, "grad_norm": 12.599543466460439, "learning_rate": 3.7786475729181314e-07, "loss": 0.5468876361846924, "step": 2164 }, { "epoch": 2.666256157635468, "grad_norm": 8.338604416987764, "learning_rate": 3.751366714022342e-07, "loss": 0.25511908531188965, "step": 2165 }, { "epoch": 2.667487684729064, "grad_norm": 10.389301741607085, "learning_rate": 3.724180853785514e-07, "loss": 0.9938629269599915, "step": 2166 }, { "epoch": 2.66871921182266, "grad_norm": 12.267953130443164, "learning_rate": 3.6970900480497287e-07, "loss": 0.4233144223690033, "step": 2167 }, { "epoch": 2.6699507389162562, "grad_norm": 11.571711586702998, "learning_rate": 3.6700943524618284e-07, "loss": 0.39373546838760376, "step": 2168 }, { "epoch": 2.6711822660098523, "grad_norm": 9.063048538209927, "learning_rate": 3.643193822473301e-07, "loss": 0.40346717834472656, "step": 2169 }, { "epoch": 2.6724137931034484, "grad_norm": 14.384271085159352, "learning_rate": 3.616388513340124e-07, "loss": 0.35343194007873535, "step": 2170 }, { "epoch": 2.6736453201970445, "grad_norm": 16.277411971018296, "learning_rate": 3.5896784801227046e-07, "loss": 0.38300061225891113, "step": 2171 }, { "epoch": 2.6748768472906406, "grad_norm": 7.950757575573031, "learning_rate": 3.56306377768576e-07, "loss": 0.5319961905479431, "step": 2172 }, { "epoch": 2.6761083743842367, "grad_norm": 19.004855778838706, "learning_rate": 3.5365444606981434e-07, "loss": 0.45474281907081604, "step": 2173 }, { "epoch": 2.6773399014778327, "grad_norm": 13.211081908527799, "learning_rate": 3.5101205836328144e-07, "loss": 0.41422080993652344, "step": 2174 }, { "epoch": 2.678571428571429, "grad_norm": 12.892521639907137, "learning_rate": 3.4837922007667e-07, "loss": 0.5486617088317871, "step": 2175 }, { "epoch": 2.6798029556650245, "grad_norm": 10.113357639811962, "learning_rate": 3.4575593661805296e-07, "loss": 0.27931463718414307, "step": 2176 }, { "epoch": 2.6810344827586206, "grad_norm": 9.357499790574233, "learning_rate": 3.4314221337588217e-07, "loss": 0.45936134457588196, "step": 2177 }, { "epoch": 2.6822660098522166, "grad_norm": 12.597881278175105, "learning_rate": 3.405380557189669e-07, "loss": 0.5659298896789551, "step": 2178 }, { "epoch": 2.6834975369458127, "grad_norm": 16.9103130329337, "learning_rate": 3.379434689964728e-07, "loss": 0.3952332139015198, "step": 2179 }, { "epoch": 2.684729064039409, "grad_norm": 13.280154300410791, "learning_rate": 3.3535845853790105e-07, "loss": 0.36344432830810547, "step": 2180 }, { "epoch": 2.685960591133005, "grad_norm": 8.267427758719474, "learning_rate": 3.3278302965308593e-07, "loss": 0.29526573419570923, "step": 2181 }, { "epoch": 2.687192118226601, "grad_norm": 14.172270303989801, "learning_rate": 3.3021718763218025e-07, "loss": 0.35098952054977417, "step": 2182 }, { "epoch": 2.688423645320197, "grad_norm": 15.442089142249914, "learning_rate": 3.276609377456419e-07, "loss": 0.9407736659049988, "step": 2183 }, { "epoch": 2.689655172413793, "grad_norm": 10.545470371926038, "learning_rate": 3.2511428524422793e-07, "loss": 0.29226356744766235, "step": 2184 }, { "epoch": 2.6908866995073892, "grad_norm": 11.590832336497728, "learning_rate": 3.2257723535898177e-07, "loss": 0.78415846824646, "step": 2185 }, { "epoch": 2.6921182266009853, "grad_norm": 10.523504017171055, "learning_rate": 3.200497933012198e-07, "loss": 0.22600015997886658, "step": 2186 }, { "epoch": 2.6933497536945814, "grad_norm": 16.18317423891681, "learning_rate": 3.1753196426252573e-07, "loss": 0.3907809853553772, "step": 2187 }, { "epoch": 2.6945812807881775, "grad_norm": 12.272867485671698, "learning_rate": 3.150237534147366e-07, "loss": 0.7056915760040283, "step": 2188 }, { "epoch": 2.695812807881773, "grad_norm": 11.590493499262351, "learning_rate": 3.125251659099332e-07, "loss": 0.35921359062194824, "step": 2189 }, { "epoch": 2.697044334975369, "grad_norm": 7.139507013908415, "learning_rate": 3.1003620688042636e-07, "loss": 0.17715278267860413, "step": 2190 }, { "epoch": 2.6982758620689653, "grad_norm": 6.945336769527092, "learning_rate": 3.0755688143875253e-07, "loss": 0.20512376725673676, "step": 2191 }, { "epoch": 2.6995073891625614, "grad_norm": 11.666932414854655, "learning_rate": 3.050871946776596e-07, "loss": 0.38939356803894043, "step": 2192 }, { "epoch": 2.7007389162561575, "grad_norm": 8.970559885182587, "learning_rate": 3.026271516700946e-07, "loss": 0.3292514681816101, "step": 2193 }, { "epoch": 2.7019704433497536, "grad_norm": 8.920484564263525, "learning_rate": 3.0017675746919883e-07, "loss": 0.2732661962509155, "step": 2194 }, { "epoch": 2.7032019704433496, "grad_norm": 14.273169657648177, "learning_rate": 2.9773601710828937e-07, "loss": 0.3058941960334778, "step": 2195 }, { "epoch": 2.7044334975369457, "grad_norm": 16.20827847981958, "learning_rate": 2.953049356008586e-07, "loss": 0.7454397082328796, "step": 2196 }, { "epoch": 2.705665024630542, "grad_norm": 17.54054653840535, "learning_rate": 2.928835179405548e-07, "loss": 0.3679504692554474, "step": 2197 }, { "epoch": 2.706896551724138, "grad_norm": 9.77472352239386, "learning_rate": 2.9047176910117824e-07, "loss": 0.2241794466972351, "step": 2198 }, { "epoch": 2.708128078817734, "grad_norm": 8.561542797938362, "learning_rate": 2.8806969403666897e-07, "loss": 0.19927407801151276, "step": 2199 }, { "epoch": 2.70935960591133, "grad_norm": 7.0959519302312195, "learning_rate": 2.856772976810929e-07, "loss": 0.2808955907821655, "step": 2200 }, { "epoch": 2.710591133004926, "grad_norm": 21.456216648925764, "learning_rate": 2.8329458494863846e-07, "loss": 0.7279784083366394, "step": 2201 }, { "epoch": 2.7118226600985222, "grad_norm": 8.853404617031957, "learning_rate": 2.809215607336024e-07, "loss": 0.47690945863723755, "step": 2202 }, { "epoch": 2.7130541871921183, "grad_norm": 9.19562501308832, "learning_rate": 2.7855822991037895e-07, "loss": 0.1997358649969101, "step": 2203 }, { "epoch": 2.7142857142857144, "grad_norm": 12.418182947084489, "learning_rate": 2.762045973334526e-07, "loss": 0.3269602954387665, "step": 2204 }, { "epoch": 2.7155172413793105, "grad_norm": 9.253477256115538, "learning_rate": 2.738606678373873e-07, "loss": 0.5450934767723083, "step": 2205 }, { "epoch": 2.7167487684729066, "grad_norm": 12.029880579085864, "learning_rate": 2.7152644623681503e-07, "loss": 0.4732050895690918, "step": 2206 }, { "epoch": 2.7179802955665027, "grad_norm": 13.561046323857816, "learning_rate": 2.6920193732642594e-07, "loss": 0.26588505506515503, "step": 2207 }, { "epoch": 2.7192118226600988, "grad_norm": 4.326966860689474, "learning_rate": 2.668871458809613e-07, "loss": 0.09280772507190704, "step": 2208 }, { "epoch": 2.720443349753695, "grad_norm": 12.851246166510439, "learning_rate": 2.6458207665520266e-07, "loss": 0.3763241767883301, "step": 2209 }, { "epoch": 2.7216748768472905, "grad_norm": 11.562947215162826, "learning_rate": 2.6228673438395804e-07, "loss": 0.46730220317840576, "step": 2210 }, { "epoch": 2.7229064039408866, "grad_norm": 11.5850144160988, "learning_rate": 2.600011237820577e-07, "loss": 0.42677825689315796, "step": 2211 }, { "epoch": 2.7241379310344827, "grad_norm": 15.077683389725815, "learning_rate": 2.577252495443422e-07, "loss": 0.4460552930831909, "step": 2212 }, { "epoch": 2.7253694581280787, "grad_norm": 8.23073307445448, "learning_rate": 2.5545911634565266e-07, "loss": 0.5031150579452515, "step": 2213 }, { "epoch": 2.726600985221675, "grad_norm": 11.590947176695321, "learning_rate": 2.5320272884081955e-07, "loss": 0.18559831380844116, "step": 2214 }, { "epoch": 2.727832512315271, "grad_norm": 10.364105747898172, "learning_rate": 2.5095609166465805e-07, "loss": 0.2087395340204239, "step": 2215 }, { "epoch": 2.729064039408867, "grad_norm": 7.72131921454244, "learning_rate": 2.4871920943195404e-07, "loss": 0.21503375470638275, "step": 2216 }, { "epoch": 2.730295566502463, "grad_norm": 13.07348837914591, "learning_rate": 2.4649208673745317e-07, "loss": 0.20347240567207336, "step": 2217 }, { "epoch": 2.731527093596059, "grad_norm": 7.396681990877147, "learning_rate": 2.442747281558572e-07, "loss": 0.20019523799419403, "step": 2218 }, { "epoch": 2.7327586206896552, "grad_norm": 7.384056914568049, "learning_rate": 2.420671382418122e-07, "loss": 0.6672437191009521, "step": 2219 }, { "epoch": 2.7339901477832513, "grad_norm": 9.4227706186618, "learning_rate": 2.398693215298953e-07, "loss": 0.28304070234298706, "step": 2220 }, { "epoch": 2.7352216748768474, "grad_norm": 13.10398470275865, "learning_rate": 2.3768128253461253e-07, "loss": 0.7915571331977844, "step": 2221 }, { "epoch": 2.7364532019704435, "grad_norm": 14.271199864374358, "learning_rate": 2.3550302575038154e-07, "loss": 0.2920302152633667, "step": 2222 }, { "epoch": 2.737684729064039, "grad_norm": 9.98798818011476, "learning_rate": 2.333345556515304e-07, "loss": 0.7924119830131531, "step": 2223 }, { "epoch": 2.7389162561576352, "grad_norm": 16.52502448354582, "learning_rate": 2.311758766922806e-07, "loss": 2.4264345169067383, "step": 2224 }, { "epoch": 2.7401477832512313, "grad_norm": 11.115670896935416, "learning_rate": 2.290269933067457e-07, "loss": 0.6286523342132568, "step": 2225 }, { "epoch": 2.7413793103448274, "grad_norm": 10.041583417397344, "learning_rate": 2.2688790990891606e-07, "loss": 0.4733774662017822, "step": 2226 }, { "epoch": 2.7426108374384235, "grad_norm": 9.613596914422414, "learning_rate": 2.2475863089265193e-07, "loss": 0.41262203454971313, "step": 2227 }, { "epoch": 2.7438423645320196, "grad_norm": 12.211203634057204, "learning_rate": 2.2263916063167523e-07, "loss": 0.9069987535476685, "step": 2228 }, { "epoch": 2.7450738916256157, "grad_norm": 8.477983222031407, "learning_rate": 2.205295034795596e-07, "loss": 0.33371949195861816, "step": 2229 }, { "epoch": 2.7463054187192117, "grad_norm": 10.672673009053705, "learning_rate": 2.1842966376972142e-07, "loss": 0.2515576183795929, "step": 2230 }, { "epoch": 2.747536945812808, "grad_norm": 15.919489094243, "learning_rate": 2.1633964581541212e-07, "loss": 0.5854448080062866, "step": 2231 }, { "epoch": 2.748768472906404, "grad_norm": 8.34813593109363, "learning_rate": 2.1425945390970816e-07, "loss": 0.36172378063201904, "step": 2232 }, { "epoch": 2.75, "grad_norm": 13.095561050747872, "learning_rate": 2.1218909232550156e-07, "loss": 0.8217978477478027, "step": 2233 }, { "epoch": 2.751231527093596, "grad_norm": 10.987521536719951, "learning_rate": 2.1012856531549163e-07, "loss": 0.5560616850852966, "step": 2234 }, { "epoch": 2.752463054187192, "grad_norm": 15.220877022032928, "learning_rate": 2.0807787711217887e-07, "loss": 0.3503821790218353, "step": 2235 }, { "epoch": 2.7536945812807883, "grad_norm": 17.985871130679012, "learning_rate": 2.0603703192785264e-07, "loss": 0.6000460982322693, "step": 2236 }, { "epoch": 2.7549261083743843, "grad_norm": 10.345272170286153, "learning_rate": 2.0400603395458408e-07, "loss": 0.20410886406898499, "step": 2237 }, { "epoch": 2.7561576354679804, "grad_norm": 10.777826560400182, "learning_rate": 2.0198488736421607e-07, "loss": 0.2497151494026184, "step": 2238 }, { "epoch": 2.7573891625615765, "grad_norm": 9.330808767879285, "learning_rate": 1.999735963083571e-07, "loss": 0.2881111800670624, "step": 2239 }, { "epoch": 2.7586206896551726, "grad_norm": 19.301319480093145, "learning_rate": 1.9797216491837356e-07, "loss": 0.38934653997421265, "step": 2240 }, { "epoch": 2.7598522167487687, "grad_norm": 13.511728912052765, "learning_rate": 1.9598059730537465e-07, "loss": 0.3553803563117981, "step": 2241 }, { "epoch": 2.7610837438423648, "grad_norm": 13.74634988747894, "learning_rate": 1.9399889756021196e-07, "loss": 0.3653762936592102, "step": 2242 }, { "epoch": 2.762315270935961, "grad_norm": 9.247962499458838, "learning_rate": 1.9202706975346875e-07, "loss": 0.2600834369659424, "step": 2243 }, { "epoch": 2.7635467980295565, "grad_norm": 11.458094202817868, "learning_rate": 1.9006511793544458e-07, "loss": 0.4601256847381592, "step": 2244 }, { "epoch": 2.7647783251231526, "grad_norm": 17.193961086363156, "learning_rate": 1.881130461361591e-07, "loss": 0.33677470684051514, "step": 2245 }, { "epoch": 2.7660098522167487, "grad_norm": 8.524927066266194, "learning_rate": 1.8617085836533544e-07, "loss": 0.8099600672721863, "step": 2246 }, { "epoch": 2.7672413793103448, "grad_norm": 15.804119634424612, "learning_rate": 1.8423855861239238e-07, "loss": 0.6992620229721069, "step": 2247 }, { "epoch": 2.768472906403941, "grad_norm": 9.647846553411064, "learning_rate": 1.8231615084644105e-07, "loss": 0.3640286326408386, "step": 2248 }, { "epoch": 2.769704433497537, "grad_norm": 8.955751617734634, "learning_rate": 1.8040363901627001e-07, "loss": 0.2996286451816559, "step": 2249 }, { "epoch": 2.770935960591133, "grad_norm": 11.938038283583609, "learning_rate": 1.7850102705034455e-07, "loss": 0.43687328696250916, "step": 2250 }, { "epoch": 2.772167487684729, "grad_norm": 17.093390601969645, "learning_rate": 1.7660831885679074e-07, "loss": 0.7942696809768677, "step": 2251 }, { "epoch": 2.773399014778325, "grad_norm": 13.100096515382093, "learning_rate": 1.747255183233948e-07, "loss": 1.1030818223953247, "step": 2252 }, { "epoch": 2.7746305418719213, "grad_norm": 8.873613224852555, "learning_rate": 1.7285262931759084e-07, "loss": 0.5030316114425659, "step": 2253 }, { "epoch": 2.7758620689655173, "grad_norm": 12.14741952725113, "learning_rate": 1.7098965568645264e-07, "loss": 0.6707223653793335, "step": 2254 }, { "epoch": 2.7770935960591134, "grad_norm": 11.75778232712136, "learning_rate": 1.6913660125668806e-07, "loss": 0.2983396351337433, "step": 2255 }, { "epoch": 2.7783251231527095, "grad_norm": 14.41974913977501, "learning_rate": 1.6729346983462957e-07, "loss": 0.6233869791030884, "step": 2256 }, { "epoch": 2.779556650246305, "grad_norm": 13.000501735636352, "learning_rate": 1.654602652062276e-07, "loss": 0.2838573455810547, "step": 2257 }, { "epoch": 2.7807881773399012, "grad_norm": 8.269339223606165, "learning_rate": 1.636369911370417e-07, "loss": 0.516904354095459, "step": 2258 }, { "epoch": 2.7820197044334973, "grad_norm": 12.228570926666848, "learning_rate": 1.6182365137223266e-07, "loss": 0.2637355625629425, "step": 2259 }, { "epoch": 2.7832512315270934, "grad_norm": 12.77963989317756, "learning_rate": 1.600202496365566e-07, "loss": 0.2973381280899048, "step": 2260 }, { "epoch": 2.7844827586206895, "grad_norm": 12.028070410415097, "learning_rate": 1.5822678963435479e-07, "loss": 0.731842041015625, "step": 2261 }, { "epoch": 2.7857142857142856, "grad_norm": 16.480537506483405, "learning_rate": 1.564432750495476e-07, "loss": 0.9091979265213013, "step": 2262 }, { "epoch": 2.7869458128078817, "grad_norm": 14.778758482272446, "learning_rate": 1.5466970954562786e-07, "loss": 0.9223085641860962, "step": 2263 }, { "epoch": 2.7881773399014778, "grad_norm": 12.767601072668027, "learning_rate": 1.5290609676564982e-07, "loss": 0.35786327719688416, "step": 2264 }, { "epoch": 2.789408866995074, "grad_norm": 10.468097971683415, "learning_rate": 1.5115244033222732e-07, "loss": 0.7312544584274292, "step": 2265 }, { "epoch": 2.79064039408867, "grad_norm": 9.834986856814911, "learning_rate": 1.4940874384751947e-07, "loss": 0.8420913219451904, "step": 2266 }, { "epoch": 2.791871921182266, "grad_norm": 16.21429528610728, "learning_rate": 1.47675010893229e-07, "loss": 0.3239392042160034, "step": 2267 }, { "epoch": 2.793103448275862, "grad_norm": 8.629439268560123, "learning_rate": 1.4595124503059165e-07, "loss": 0.3498873710632324, "step": 2268 }, { "epoch": 2.794334975369458, "grad_norm": 6.690308017489741, "learning_rate": 1.4423744980037068e-07, "loss": 0.22733798623085022, "step": 2269 }, { "epoch": 2.7955665024630543, "grad_norm": 8.212515181619986, "learning_rate": 1.425336287228496e-07, "loss": 0.2721923291683197, "step": 2270 }, { "epoch": 2.7967980295566504, "grad_norm": 9.080877903298425, "learning_rate": 1.408397852978205e-07, "loss": 0.344375342130661, "step": 2271 }, { "epoch": 2.7980295566502464, "grad_norm": 9.45480785329488, "learning_rate": 1.391559230045847e-07, "loss": 0.4529953896999359, "step": 2272 }, { "epoch": 2.7992610837438425, "grad_norm": 9.214190080042984, "learning_rate": 1.3748204530193987e-07, "loss": 0.1639999896287918, "step": 2273 }, { "epoch": 2.8004926108374386, "grad_norm": 13.6280899298915, "learning_rate": 1.3581815562817402e-07, "loss": 0.23326484858989716, "step": 2274 }, { "epoch": 2.8017241379310347, "grad_norm": 8.920482755226637, "learning_rate": 1.341642574010582e-07, "loss": 0.22694149613380432, "step": 2275 }, { "epoch": 2.802955665024631, "grad_norm": 8.710884196173295, "learning_rate": 1.3252035401784324e-07, "loss": 0.3588021993637085, "step": 2276 }, { "epoch": 2.804187192118227, "grad_norm": 11.632314435280234, "learning_rate": 1.3088644885524637e-07, "loss": 0.4335256516933441, "step": 2277 }, { "epoch": 2.8054187192118225, "grad_norm": 6.272067777885255, "learning_rate": 1.2926254526944904e-07, "loss": 0.1874769926071167, "step": 2278 }, { "epoch": 2.8066502463054186, "grad_norm": 8.936224496797552, "learning_rate": 1.27648646596088e-07, "loss": 0.3144474923610687, "step": 2279 }, { "epoch": 2.8078817733990147, "grad_norm": 19.58883398368707, "learning_rate": 1.2604475615025092e-07, "loss": 0.7241795063018799, "step": 2280 }, { "epoch": 2.8091133004926108, "grad_norm": 16.726363332544537, "learning_rate": 1.2445087722646576e-07, "loss": 0.5169468522071838, "step": 2281 }, { "epoch": 2.810344827586207, "grad_norm": 30.94634458747577, "learning_rate": 1.228670130986953e-07, "loss": 1.6869860887527466, "step": 2282 }, { "epoch": 2.811576354679803, "grad_norm": 10.707666993688912, "learning_rate": 1.212931670203338e-07, "loss": 0.47550255060195923, "step": 2283 }, { "epoch": 2.812807881773399, "grad_norm": 9.540335234729794, "learning_rate": 1.197293422241952e-07, "loss": 0.2437782883644104, "step": 2284 }, { "epoch": 2.814039408866995, "grad_norm": 6.665490888518648, "learning_rate": 1.1817554192251002e-07, "loss": 0.37867432832717896, "step": 2285 }, { "epoch": 2.815270935960591, "grad_norm": 9.667222509113516, "learning_rate": 1.1663176930691744e-07, "loss": 0.8604614734649658, "step": 2286 }, { "epoch": 2.8165024630541873, "grad_norm": 12.759555548828967, "learning_rate": 1.1509802754845978e-07, "loss": 1.1947153806686401, "step": 2287 }, { "epoch": 2.8177339901477834, "grad_norm": 9.33176290924216, "learning_rate": 1.1357431979757194e-07, "loss": 0.30131372809410095, "step": 2288 }, { "epoch": 2.8189655172413794, "grad_norm": 10.72676065785706, "learning_rate": 1.1206064918408143e-07, "loss": 0.47112587094306946, "step": 2289 }, { "epoch": 2.8201970443349755, "grad_norm": 11.488110070600202, "learning_rate": 1.1055701881719838e-07, "loss": 0.2062550187110901, "step": 2290 }, { "epoch": 2.821428571428571, "grad_norm": 8.859910558029405, "learning_rate": 1.0906343178550715e-07, "loss": 0.30918222665786743, "step": 2291 }, { "epoch": 2.8226600985221673, "grad_norm": 7.645494812767514, "learning_rate": 1.0757989115696421e-07, "loss": 0.46675896644592285, "step": 2292 }, { "epoch": 2.8238916256157633, "grad_norm": 7.696373009746994, "learning_rate": 1.0610639997888917e-07, "loss": 0.2514066696166992, "step": 2293 }, { "epoch": 2.8251231527093594, "grad_norm": 20.301202253116305, "learning_rate": 1.0464296127795926e-07, "loss": 0.37799739837646484, "step": 2294 }, { "epoch": 2.8263546798029555, "grad_norm": 10.51342866650685, "learning_rate": 1.0318957806020269e-07, "loss": 1.170919418334961, "step": 2295 }, { "epoch": 2.8275862068965516, "grad_norm": 10.322546313834785, "learning_rate": 1.0174625331099363e-07, "loss": 0.34683138132095337, "step": 2296 }, { "epoch": 2.8288177339901477, "grad_norm": 13.218925485338286, "learning_rate": 1.0031298999504557e-07, "loss": 0.24154211580753326, "step": 2297 }, { "epoch": 2.8300492610837438, "grad_norm": 11.94151576403668, "learning_rate": 9.888979105640295e-08, "loss": 0.3270137906074524, "step": 2298 }, { "epoch": 2.83128078817734, "grad_norm": 10.157922840931477, "learning_rate": 9.747665941843953e-08, "loss": 0.33205774426460266, "step": 2299 }, { "epoch": 2.832512315270936, "grad_norm": 15.674554832536234, "learning_rate": 9.607359798384785e-08, "loss": 1.5672454833984375, "step": 2300 }, { "epoch": 2.833743842364532, "grad_norm": 7.89425528282641, "learning_rate": 9.468060963463754e-08, "loss": 0.1868615597486496, "step": 2301 }, { "epoch": 2.834975369458128, "grad_norm": 16.06809449939127, "learning_rate": 9.329769723212478e-08, "loss": 0.3485974371433258, "step": 2302 }, { "epoch": 2.836206896551724, "grad_norm": 22.06944110945676, "learning_rate": 9.192486361693175e-08, "loss": 0.5702242851257324, "step": 2303 }, { "epoch": 2.8374384236453203, "grad_norm": 13.611203107193855, "learning_rate": 9.056211160897555e-08, "loss": 0.7004730105400085, "step": 2304 }, { "epoch": 2.8386699507389164, "grad_norm": 10.23772277567979, "learning_rate": 8.920944400746589e-08, "loss": 0.29311710596084595, "step": 2305 }, { "epoch": 2.8399014778325125, "grad_norm": 7.167372063418741, "learning_rate": 8.786686359089747e-08, "loss": 0.18041157722473145, "step": 2306 }, { "epoch": 2.8411330049261085, "grad_norm": 8.672887051600437, "learning_rate": 8.653437311704648e-08, "loss": 0.2873387634754181, "step": 2307 }, { "epoch": 2.8423645320197046, "grad_norm": 9.699021546064241, "learning_rate": 8.521197532296188e-08, "loss": 0.23781178891658783, "step": 2308 }, { "epoch": 2.8435960591133007, "grad_norm": 11.643059711853965, "learning_rate": 8.38996729249636e-08, "loss": 0.5913131833076477, "step": 2309 }, { "epoch": 2.844827586206897, "grad_norm": 12.799008291574818, "learning_rate": 8.259746861863094e-08, "loss": 0.9139914512634277, "step": 2310 }, { "epoch": 2.846059113300493, "grad_norm": 10.980579183559623, "learning_rate": 8.130536507880538e-08, "loss": 0.22883841395378113, "step": 2311 }, { "epoch": 2.8472906403940885, "grad_norm": 9.488904590414009, "learning_rate": 8.002336495957664e-08, "loss": 0.6467199325561523, "step": 2312 }, { "epoch": 2.8485221674876846, "grad_norm": 17.044793614561804, "learning_rate": 7.875147089428436e-08, "loss": 0.48100385069847107, "step": 2313 }, { "epoch": 2.8497536945812807, "grad_norm": 6.232324566569768, "learning_rate": 7.748968549550761e-08, "loss": 0.22535499930381775, "step": 2314 }, { "epoch": 2.850985221674877, "grad_norm": 16.357795976490426, "learning_rate": 7.623801135506148e-08, "loss": 0.7971012592315674, "step": 2315 }, { "epoch": 2.852216748768473, "grad_norm": 10.56546293503534, "learning_rate": 7.499645104399156e-08, "loss": 0.6965846419334412, "step": 2316 }, { "epoch": 2.853448275862069, "grad_norm": 10.699552582949096, "learning_rate": 7.376500711257062e-08, "loss": 0.2827698588371277, "step": 2317 }, { "epoch": 2.854679802955665, "grad_norm": 11.75504997847818, "learning_rate": 7.254368209028862e-08, "loss": 0.4453064203262329, "step": 2318 }, { "epoch": 2.855911330049261, "grad_norm": 10.373311779049724, "learning_rate": 7.133247848585268e-08, "loss": 0.5363994836807251, "step": 2319 }, { "epoch": 2.857142857142857, "grad_norm": 10.742091428994968, "learning_rate": 7.013139878717934e-08, "loss": 0.33071067929267883, "step": 2320 }, { "epoch": 2.8583743842364533, "grad_norm": 10.02135718464731, "learning_rate": 6.894044546138845e-08, "loss": 0.6118582487106323, "step": 2321 }, { "epoch": 2.8596059113300494, "grad_norm": 11.952226631897975, "learning_rate": 6.775962095480037e-08, "loss": 0.4941851496696472, "step": 2322 }, { "epoch": 2.8608374384236455, "grad_norm": 12.467253293652027, "learning_rate": 6.65889276929299e-08, "loss": 0.9043294191360474, "step": 2323 }, { "epoch": 2.862068965517241, "grad_norm": 9.372107033246923, "learning_rate": 6.542836808048181e-08, "loss": 0.5352662801742554, "step": 2324 }, { "epoch": 2.863300492610837, "grad_norm": 13.465637997675985, "learning_rate": 6.427794450134529e-08, "loss": 0.622706413269043, "step": 2325 }, { "epoch": 2.8645320197044333, "grad_norm": 10.951531479275452, "learning_rate": 6.313765931858785e-08, "loss": 0.32065168023109436, "step": 2326 }, { "epoch": 2.8657635467980294, "grad_norm": 11.940905797523131, "learning_rate": 6.200751487445367e-08, "loss": 0.5308477878570557, "step": 2327 }, { "epoch": 2.8669950738916254, "grad_norm": 12.032315008603385, "learning_rate": 6.088751349035693e-08, "loss": 0.4006965756416321, "step": 2328 }, { "epoch": 2.8682266009852215, "grad_norm": 14.936202143915887, "learning_rate": 5.977765746687569e-08, "loss": 0.29346001148223877, "step": 2329 }, { "epoch": 2.8694581280788176, "grad_norm": 12.39243720991369, "learning_rate": 5.8677949083749686e-08, "loss": 0.17921757698059082, "step": 2330 }, { "epoch": 2.8706896551724137, "grad_norm": 9.58038552158238, "learning_rate": 5.758839059987531e-08, "loss": 0.3909390866756439, "step": 2331 }, { "epoch": 2.87192118226601, "grad_norm": 15.9782663440221, "learning_rate": 5.650898425329676e-08, "loss": 0.2947097420692444, "step": 2332 }, { "epoch": 2.873152709359606, "grad_norm": 10.207214673211949, "learning_rate": 5.5439732261209356e-08, "loss": 0.27580755949020386, "step": 2333 }, { "epoch": 2.874384236453202, "grad_norm": 10.944513423861029, "learning_rate": 5.438063681994732e-08, "loss": 0.5352618098258972, "step": 2334 }, { "epoch": 2.875615763546798, "grad_norm": 11.026909219005717, "learning_rate": 5.333170010498434e-08, "loss": 0.4425346553325653, "step": 2335 }, { "epoch": 2.876847290640394, "grad_norm": 10.718057032304046, "learning_rate": 5.229292427092525e-08, "loss": 0.3107433319091797, "step": 2336 }, { "epoch": 2.87807881773399, "grad_norm": 12.247326551233483, "learning_rate": 5.126431145150546e-08, "loss": 0.8459264039993286, "step": 2337 }, { "epoch": 2.8793103448275863, "grad_norm": 9.9858024833323, "learning_rate": 5.024586375958429e-08, "loss": 0.6122205257415771, "step": 2338 }, { "epoch": 2.8805418719211824, "grad_norm": 8.326107009918898, "learning_rate": 4.9237583287139454e-08, "loss": 0.28234463930130005, "step": 2339 }, { "epoch": 2.8817733990147785, "grad_norm": 9.707118891697133, "learning_rate": 4.823947210526647e-08, "loss": 0.26258403062820435, "step": 2340 }, { "epoch": 2.8830049261083746, "grad_norm": 11.37690573459154, "learning_rate": 4.72515322641709e-08, "loss": 0.16676993668079376, "step": 2341 }, { "epoch": 2.8842364532019706, "grad_norm": 10.744107147683183, "learning_rate": 4.627376579316667e-08, "loss": 0.5982980132102966, "step": 2342 }, { "epoch": 2.8854679802955667, "grad_norm": 11.814730049244856, "learning_rate": 4.530617470066834e-08, "loss": 0.3576871156692505, "step": 2343 }, { "epoch": 2.886699507389163, "grad_norm": 7.558098865292991, "learning_rate": 4.4348760974192715e-08, "loss": 0.22213858366012573, "step": 2344 }, { "epoch": 2.887931034482759, "grad_norm": 31.227769055767126, "learning_rate": 4.340152658034835e-08, "loss": 0.7075624465942383, "step": 2345 }, { "epoch": 2.8891625615763545, "grad_norm": 13.602269942674353, "learning_rate": 4.246447346483662e-08, "loss": 0.35476282238960266, "step": 2346 }, { "epoch": 2.8903940886699506, "grad_norm": 11.66167288478714, "learning_rate": 4.153760355244507e-08, "loss": 0.4569534659385681, "step": 2347 }, { "epoch": 2.8916256157635467, "grad_norm": 12.232619433370953, "learning_rate": 4.062091874704355e-08, "loss": 0.8425757884979248, "step": 2348 }, { "epoch": 2.892857142857143, "grad_norm": 15.584381566055246, "learning_rate": 3.971442093158195e-08, "loss": 0.6543349623680115, "step": 2349 }, { "epoch": 2.894088669950739, "grad_norm": 12.232909525407603, "learning_rate": 3.8818111968083607e-08, "loss": 0.4949587285518646, "step": 2350 }, { "epoch": 2.895320197044335, "grad_norm": 28.009977519758436, "learning_rate": 3.7931993697644664e-08, "loss": 1.0205111503601074, "step": 2351 }, { "epoch": 2.896551724137931, "grad_norm": 8.083430035021566, "learning_rate": 3.7056067940427484e-08, "loss": 0.429599404335022, "step": 2352 }, { "epoch": 2.897783251231527, "grad_norm": 11.304307823971973, "learning_rate": 3.6190336495659504e-08, "loss": 0.6471319198608398, "step": 2353 }, { "epoch": 2.899014778325123, "grad_norm": 11.052274245265034, "learning_rate": 3.533480114162713e-08, "loss": 0.6227458715438843, "step": 2354 }, { "epoch": 2.9002463054187193, "grad_norm": 10.145305358695179, "learning_rate": 3.448946363567296e-08, "loss": 0.35620149970054626, "step": 2355 }, { "epoch": 2.9014778325123154, "grad_norm": 9.735362530555188, "learning_rate": 3.365432571419247e-08, "loss": 0.41157659888267517, "step": 2356 }, { "epoch": 2.9027093596059115, "grad_norm": 16.113614254695477, "learning_rate": 3.282938909263122e-08, "loss": 0.39660418033599854, "step": 2357 }, { "epoch": 2.903940886699507, "grad_norm": 12.303598539070832, "learning_rate": 3.201465546547988e-08, "loss": 0.37891146540641785, "step": 2358 }, { "epoch": 2.905172413793103, "grad_norm": 11.49013243084427, "learning_rate": 3.121012650627031e-08, "loss": 0.4459425210952759, "step": 2359 }, { "epoch": 2.9064039408866993, "grad_norm": 12.062068468114942, "learning_rate": 3.041580386757448e-08, "loss": 0.4933587610721588, "step": 2360 }, { "epoch": 2.9076354679802954, "grad_norm": 7.691939807180967, "learning_rate": 2.9631689180999457e-08, "loss": 0.16229723393917084, "step": 2361 }, { "epoch": 2.9088669950738915, "grad_norm": 11.649633348013484, "learning_rate": 2.885778405718409e-08, "loss": 0.4784936308860779, "step": 2362 }, { "epoch": 2.9100985221674875, "grad_norm": 20.64984541908695, "learning_rate": 2.8094090085795112e-08, "loss": 0.6622560620307922, "step": 2363 }, { "epoch": 2.9113300492610836, "grad_norm": 9.783513206502265, "learning_rate": 2.7340608835526584e-08, "loss": 0.3672278821468353, "step": 2364 }, { "epoch": 2.9125615763546797, "grad_norm": 6.04349473256102, "learning_rate": 2.6597341854092685e-08, "loss": 0.3247770667076111, "step": 2365 }, { "epoch": 2.913793103448276, "grad_norm": 11.650085297412613, "learning_rate": 2.586429066822771e-08, "loss": 0.3467229902744293, "step": 2366 }, { "epoch": 2.915024630541872, "grad_norm": 11.842612737683362, "learning_rate": 2.514145678368163e-08, "loss": 0.6725019812583923, "step": 2367 }, { "epoch": 2.916256157635468, "grad_norm": 8.454338307427385, "learning_rate": 2.4428841685217863e-08, "loss": 0.6760755777359009, "step": 2368 }, { "epoch": 2.917487684729064, "grad_norm": 13.555178809367312, "learning_rate": 2.3726446836608298e-08, "loss": 0.5354422330856323, "step": 2369 }, { "epoch": 2.91871921182266, "grad_norm": 11.004737348047312, "learning_rate": 2.3034273680632157e-08, "loss": 0.3656280040740967, "step": 2370 }, { "epoch": 2.9199507389162562, "grad_norm": 9.99595612427158, "learning_rate": 2.235232363907269e-08, "loss": 0.28186920285224915, "step": 2371 }, { "epoch": 2.9211822660098523, "grad_norm": 16.789031513751276, "learning_rate": 2.168059811271439e-08, "loss": 0.31556010246276855, "step": 2372 }, { "epoch": 2.9224137931034484, "grad_norm": 7.870447962098653, "learning_rate": 2.101909848133743e-08, "loss": 0.33978280425071716, "step": 2373 }, { "epoch": 2.9236453201970445, "grad_norm": 13.322556254888749, "learning_rate": 2.0367826103720457e-08, "loss": 0.5645813941955566, "step": 2374 }, { "epoch": 2.9248768472906406, "grad_norm": 6.936377752521131, "learning_rate": 1.9726782317632255e-08, "loss": 0.21976767480373383, "step": 2375 }, { "epoch": 2.9261083743842367, "grad_norm": 16.201679118604396, "learning_rate": 1.9095968439830637e-08, "loss": 0.6068276166915894, "step": 2376 }, { "epoch": 2.9273399014778327, "grad_norm": 10.683769815067068, "learning_rate": 1.8475385766063002e-08, "loss": 0.2844882607460022, "step": 2377 }, { "epoch": 2.928571428571429, "grad_norm": 22.182288301690132, "learning_rate": 1.786503557105912e-08, "loss": 1.1885827779769897, "step": 2378 }, { "epoch": 2.9298029556650245, "grad_norm": 8.221573464179809, "learning_rate": 1.7264919108529455e-08, "loss": 0.4241114854812622, "step": 2379 }, { "epoch": 2.9310344827586206, "grad_norm": 10.23479597630979, "learning_rate": 1.6675037611165735e-08, "loss": 0.9062713980674744, "step": 2380 }, { "epoch": 2.9322660098522166, "grad_norm": 9.83143734077978, "learning_rate": 1.6095392290635393e-08, "loss": 0.29996055364608765, "step": 2381 }, { "epoch": 2.9334975369458127, "grad_norm": 9.191744534619497, "learning_rate": 1.552598433757879e-08, "loss": 0.3901692032814026, "step": 2382 }, { "epoch": 2.934729064039409, "grad_norm": 10.314975796862411, "learning_rate": 1.4966814921608674e-08, "loss": 0.36974531412124634, "step": 2383 }, { "epoch": 2.935960591133005, "grad_norm": 10.965587726479475, "learning_rate": 1.441788519130738e-08, "loss": 0.2913818359375, "step": 2384 }, { "epoch": 2.937192118226601, "grad_norm": 26.225721932440074, "learning_rate": 1.3879196274224626e-08, "loss": 2.8897290229797363, "step": 2385 }, { "epoch": 2.938423645320197, "grad_norm": 16.567199226805975, "learning_rate": 1.335074927687141e-08, "loss": 0.7396224141120911, "step": 2386 }, { "epoch": 2.939655172413793, "grad_norm": 10.384159480919202, "learning_rate": 1.2832545284724995e-08, "loss": 0.2923913896083832, "step": 2387 }, { "epoch": 2.9408866995073892, "grad_norm": 12.315507900916186, "learning_rate": 1.2324585362220032e-08, "loss": 0.60726398229599, "step": 2388 }, { "epoch": 2.9421182266009853, "grad_norm": 10.077538225946919, "learning_rate": 1.1826870552749669e-08, "loss": 0.3081626892089844, "step": 2389 }, { "epoch": 2.9433497536945814, "grad_norm": 15.192636407836343, "learning_rate": 1.1339401878663337e-08, "loss": 0.7774905562400818, "step": 2390 }, { "epoch": 2.9445812807881775, "grad_norm": 12.649581445218459, "learning_rate": 1.0862180341263962e-08, "loss": 0.5568622350692749, "step": 2391 }, { "epoch": 2.945812807881773, "grad_norm": 11.4557765341612, "learning_rate": 1.039520692080409e-08, "loss": 0.42753443121910095, "step": 2392 }, { "epoch": 2.947044334975369, "grad_norm": 12.049826060673517, "learning_rate": 9.938482576487551e-09, "loss": 0.33313125371932983, "step": 2393 }, { "epoch": 2.9482758620689653, "grad_norm": 11.358169603413613, "learning_rate": 9.492008246466122e-09, "loss": 0.4345099925994873, "step": 2394 }, { "epoch": 2.9495073891625614, "grad_norm": 15.061185553672066, "learning_rate": 9.055784847836202e-09, "loss": 0.6844139695167542, "step": 2395 }, { "epoch": 2.9507389162561575, "grad_norm": 12.25434358933355, "learning_rate": 8.629813276637144e-09, "loss": 0.4944530725479126, "step": 2396 }, { "epoch": 2.9519704433497536, "grad_norm": 7.240836775147592, "learning_rate": 8.214094407851814e-09, "loss": 0.1517336368560791, "step": 2397 }, { "epoch": 2.9532019704433496, "grad_norm": 11.570980194113849, "learning_rate": 7.808629095402697e-09, "loss": 0.24804279208183289, "step": 2398 }, { "epoch": 2.9544334975369457, "grad_norm": 15.785024108321435, "learning_rate": 7.413418172149689e-09, "loss": 1.2773240804672241, "step": 2399 }, { "epoch": 2.955665024630542, "grad_norm": 12.516388230034497, "learning_rate": 7.028462449889528e-09, "loss": 0.20905320346355438, "step": 2400 }, { "epoch": 2.956896551724138, "grad_norm": 6.362652358430743, "learning_rate": 6.6537627193558055e-09, "loss": 0.24830211699008942, "step": 2401 }, { "epoch": 2.958128078817734, "grad_norm": 9.391013644944394, "learning_rate": 6.289319750212852e-09, "loss": 0.30148234963417053, "step": 2402 }, { "epoch": 2.95935960591133, "grad_norm": 11.036169214095409, "learning_rate": 5.93513429105741e-09, "loss": 0.7273882031440735, "step": 2403 }, { "epoch": 2.960591133004926, "grad_norm": 10.956019864515577, "learning_rate": 5.591207069417515e-09, "loss": 0.4958484172821045, "step": 2404 }, { "epoch": 2.9618226600985222, "grad_norm": 13.272684139309336, "learning_rate": 5.257538791749173e-09, "loss": 0.5852301120758057, "step": 2405 }, { "epoch": 2.9630541871921183, "grad_norm": 15.300683310135565, "learning_rate": 4.934130143435245e-09, "loss": 0.5483534336090088, "step": 2406 }, { "epoch": 2.9642857142857144, "grad_norm": 9.624016617554009, "learning_rate": 4.6209817887848955e-09, "loss": 0.49854928255081177, "step": 2407 }, { "epoch": 2.9655172413793105, "grad_norm": 8.615173379839112, "learning_rate": 4.318094371031922e-09, "loss": 0.9770829677581787, "step": 2408 }, { "epoch": 2.9667487684729066, "grad_norm": 15.370084776473758, "learning_rate": 4.025468512333098e-09, "loss": 0.4265647530555725, "step": 2409 }, { "epoch": 2.9679802955665027, "grad_norm": 12.632393723486729, "learning_rate": 3.743104813767051e-09, "loss": 0.6890873908996582, "step": 2410 }, { "epoch": 2.9692118226600988, "grad_norm": 8.772985107195037, "learning_rate": 3.471003855332611e-09, "loss": 0.28604504466056824, "step": 2411 }, { "epoch": 2.970443349753695, "grad_norm": 9.587235477416659, "learning_rate": 3.2091661959487986e-09, "loss": 0.3280025124549866, "step": 2412 }, { "epoch": 2.9716748768472905, "grad_norm": 9.74052346916064, "learning_rate": 2.9575923734520562e-09, "loss": 0.23375985026359558, "step": 2413 }, { "epoch": 2.9729064039408866, "grad_norm": 14.377712378651319, "learning_rate": 2.7162829045979113e-09, "loss": 0.5062013864517212, "step": 2414 }, { "epoch": 2.9741379310344827, "grad_norm": 10.486023439825937, "learning_rate": 2.4852382850554245e-09, "loss": 0.46517398953437805, "step": 2415 }, { "epoch": 2.9753694581280787, "grad_norm": 7.705201332847603, "learning_rate": 2.264458989410523e-09, "loss": 0.43281105160713196, "step": 2416 }, { "epoch": 2.976600985221675, "grad_norm": 9.481633319521942, "learning_rate": 2.0539454711626663e-09, "loss": 0.6278485655784607, "step": 2417 }, { "epoch": 2.977832512315271, "grad_norm": 12.691647261969463, "learning_rate": 1.8536981627254036e-09, "loss": 0.3320518136024475, "step": 2418 }, { "epoch": 2.979064039408867, "grad_norm": 9.582038617142, "learning_rate": 1.6637174754230435e-09, "loss": 0.4568738341331482, "step": 2419 }, { "epoch": 2.980295566502463, "grad_norm": 10.563009615677867, "learning_rate": 1.4840037994923173e-09, "loss": 0.24025380611419678, "step": 2420 }, { "epoch": 2.981527093596059, "grad_norm": 14.650292148384931, "learning_rate": 1.3145575040801605e-09, "loss": 0.33217573165893555, "step": 2421 }, { "epoch": 2.9827586206896552, "grad_norm": 23.286828169967034, "learning_rate": 1.1553789372453771e-09, "loss": 1.5295354127883911, "step": 2422 }, { "epoch": 2.9839901477832513, "grad_norm": 16.800662700378666, "learning_rate": 1.0064684259525337e-09, "loss": 0.6207250952720642, "step": 2423 }, { "epoch": 2.9852216748768474, "grad_norm": 20.655163645870832, "learning_rate": 8.678262760775102e-10, "loss": 0.4011062681674957, "step": 2424 }, { "epoch": 2.9864532019704435, "grad_norm": 12.812116716093689, "learning_rate": 7.394527724030598e-10, "loss": 0.8355351090431213, "step": 2425 }, { "epoch": 2.987684729064039, "grad_norm": 13.524667045497342, "learning_rate": 6.213481786199182e-10, "loss": 0.6552157998085022, "step": 2426 }, { "epoch": 2.9889162561576352, "grad_norm": 9.071239617590464, "learning_rate": 5.13512737324029e-10, "loss": 0.4416411519050598, "step": 2427 }, { "epoch": 2.9901477832512313, "grad_norm": 12.103653519709662, "learning_rate": 4.159466700187631e-10, "loss": 0.3720128834247589, "step": 2428 }, { "epoch": 2.9913793103448274, "grad_norm": 7.981239501743612, "learning_rate": 3.2865017711380955e-10, "loss": 0.6710848212242126, "step": 2429 }, { "epoch": 2.9926108374384235, "grad_norm": 11.769326063023964, "learning_rate": 2.516234379235094e-10, "loss": 0.7640970349311829, "step": 2430 }, { "epoch": 2.9938423645320196, "grad_norm": 11.664052062324599, "learning_rate": 1.848666106674113e-10, "loss": 0.5783921480178833, "step": 2431 }, { "epoch": 2.9950738916256157, "grad_norm": 11.283478806003906, "learning_rate": 1.2837983246916098e-10, "loss": 0.411626935005188, "step": 2432 }, { "epoch": 2.9963054187192117, "grad_norm": 11.703360380276939, "learning_rate": 8.216321935816673e-11, "loss": 0.529446005821228, "step": 2433 }, { "epoch": 2.997536945812808, "grad_norm": 9.632699414961296, "learning_rate": 4.6216866266823867e-11, "loss": 0.44549500942230225, "step": 2434 }, { "epoch": 2.998768472906404, "grad_norm": 9.699682514575105, "learning_rate": 2.0540847032179955e-11, "loss": 0.2854122519493103, "step": 2435 }, { "epoch": 3.0, "grad_norm": 6.925750902905979, "learning_rate": 5.135214394824672e-12, "loss": 0.4455873966217041, "step": 2436 }, { "epoch": 3.0, "step": 2436, "total_flos": 6456127242240.0, "train_loss": 1.6602046456561104, "train_runtime": 2865.3381, "train_samples_per_second": 3.4, "train_steps_per_second": 0.85 } ], "logging_steps": 1, "max_steps": 2436, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6456127242240.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }