Files
llama3-8b-full-pretrain-was…/trainer_state.json
ModelHub XC 91bb47112e 初始化项目,由ModelHub XC社区提供模型
Model: shuoxing/llama3-8b-full-pretrain-wash-c4-1-5m-bs4
Source: Original Platform
2026-06-12 17:08:20 +08:00

17096 lines
441 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2436,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0012315270935960591,
"grad_norm": 36.7600685768779,
"learning_rate": 0.0,
"loss": 4.157936096191406,
"step": 1
},
{
"epoch": 0.0024630541871921183,
"grad_norm": 37.664654386111934,
"learning_rate": 4.098360655737705e-08,
"loss": 3.8494455814361572,
"step": 2
},
{
"epoch": 0.003694581280788177,
"grad_norm": 38.23654519991739,
"learning_rate": 8.19672131147541e-08,
"loss": 3.7497382164001465,
"step": 3
},
{
"epoch": 0.0049261083743842365,
"grad_norm": 49.1212230676838,
"learning_rate": 1.2295081967213116e-07,
"loss": 4.874395847320557,
"step": 4
},
{
"epoch": 0.006157635467980296,
"grad_norm": 51.23013396325368,
"learning_rate": 1.639344262295082e-07,
"loss": 5.729328155517578,
"step": 5
},
{
"epoch": 0.007389162561576354,
"grad_norm": 33.06662236870545,
"learning_rate": 2.0491803278688524e-07,
"loss": 3.968146324157715,
"step": 6
},
{
"epoch": 0.008620689655172414,
"grad_norm": 33.94224964860029,
"learning_rate": 2.459016393442623e-07,
"loss": 4.092198848724365,
"step": 7
},
{
"epoch": 0.009852216748768473,
"grad_norm": 28.585037517248036,
"learning_rate": 2.8688524590163937e-07,
"loss": 3.4101109504699707,
"step": 8
},
{
"epoch": 0.011083743842364532,
"grad_norm": 39.512646004891735,
"learning_rate": 3.278688524590164e-07,
"loss": 4.387180805206299,
"step": 9
},
{
"epoch": 0.012315270935960592,
"grad_norm": 29.487139965581328,
"learning_rate": 3.6885245901639347e-07,
"loss": 3.4985814094543457,
"step": 10
},
{
"epoch": 0.013546798029556651,
"grad_norm": 35.1254398727907,
"learning_rate": 4.0983606557377047e-07,
"loss": 5.157108306884766,
"step": 11
},
{
"epoch": 0.014778325123152709,
"grad_norm": 33.7037580376338,
"learning_rate": 4.508196721311476e-07,
"loss": 4.057161808013916,
"step": 12
},
{
"epoch": 0.01600985221674877,
"grad_norm": 35.136997816960864,
"learning_rate": 4.918032786885246e-07,
"loss": 4.237695693969727,
"step": 13
},
{
"epoch": 0.017241379310344827,
"grad_norm": 39.34259468640213,
"learning_rate": 5.327868852459017e-07,
"loss": 4.635364532470703,
"step": 14
},
{
"epoch": 0.01847290640394089,
"grad_norm": 33.5811322334086,
"learning_rate": 5.737704918032787e-07,
"loss": 3.3291709423065186,
"step": 15
},
{
"epoch": 0.019704433497536946,
"grad_norm": 33.93459885987163,
"learning_rate": 6.147540983606558e-07,
"loss": 3.8693442344665527,
"step": 16
},
{
"epoch": 0.020935960591133004,
"grad_norm": 25.605142057165235,
"learning_rate": 6.557377049180328e-07,
"loss": 3.4419002532958984,
"step": 17
},
{
"epoch": 0.022167487684729065,
"grad_norm": 33.566059151369195,
"learning_rate": 6.967213114754098e-07,
"loss": 3.8446784019470215,
"step": 18
},
{
"epoch": 0.023399014778325122,
"grad_norm": 29.72848721122937,
"learning_rate": 7.377049180327869e-07,
"loss": 3.5930001735687256,
"step": 19
},
{
"epoch": 0.024630541871921183,
"grad_norm": 26.393927957123275,
"learning_rate": 7.78688524590164e-07,
"loss": 3.638699531555176,
"step": 20
},
{
"epoch": 0.02586206896551724,
"grad_norm": 26.06446386508918,
"learning_rate": 8.196721311475409e-07,
"loss": 3.6789143085479736,
"step": 21
},
{
"epoch": 0.027093596059113302,
"grad_norm": 35.2733178056508,
"learning_rate": 8.606557377049181e-07,
"loss": 3.959703207015991,
"step": 22
},
{
"epoch": 0.02832512315270936,
"grad_norm": 33.03896583989334,
"learning_rate": 9.016393442622952e-07,
"loss": 3.8822054862976074,
"step": 23
},
{
"epoch": 0.029556650246305417,
"grad_norm": 33.57337166473473,
"learning_rate": 9.426229508196721e-07,
"loss": 3.8448376655578613,
"step": 24
},
{
"epoch": 0.03078817733990148,
"grad_norm": 20.141759958099808,
"learning_rate": 9.836065573770493e-07,
"loss": 3.372765064239502,
"step": 25
},
{
"epoch": 0.03201970443349754,
"grad_norm": 23.420906015149534,
"learning_rate": 1.0245901639344263e-06,
"loss": 3.4989559650421143,
"step": 26
},
{
"epoch": 0.0332512315270936,
"grad_norm": 33.133583346249836,
"learning_rate": 1.0655737704918034e-06,
"loss": 3.6318516731262207,
"step": 27
},
{
"epoch": 0.034482758620689655,
"grad_norm": 18.99907077955952,
"learning_rate": 1.1065573770491804e-06,
"loss": 3.351621627807617,
"step": 28
},
{
"epoch": 0.03571428571428571,
"grad_norm": 18.353082575411992,
"learning_rate": 1.1475409836065575e-06,
"loss": 3.1978442668914795,
"step": 29
},
{
"epoch": 0.03694581280788178,
"grad_norm": 26.628518248775677,
"learning_rate": 1.1885245901639345e-06,
"loss": 4.033670902252197,
"step": 30
},
{
"epoch": 0.038177339901477834,
"grad_norm": 16.452853960671934,
"learning_rate": 1.2295081967213116e-06,
"loss": 3.626315116882324,
"step": 31
},
{
"epoch": 0.03940886699507389,
"grad_norm": 16.372280561150735,
"learning_rate": 1.2704918032786886e-06,
"loss": 3.385767936706543,
"step": 32
},
{
"epoch": 0.04064039408866995,
"grad_norm": 23.073122100098054,
"learning_rate": 1.3114754098360657e-06,
"loss": 3.946913719177246,
"step": 33
},
{
"epoch": 0.04187192118226601,
"grad_norm": 11.580002792760054,
"learning_rate": 1.352459016393443e-06,
"loss": 3.3034565448760986,
"step": 34
},
{
"epoch": 0.04310344827586207,
"grad_norm": 17.961230909917667,
"learning_rate": 1.3934426229508196e-06,
"loss": 3.2368359565734863,
"step": 35
},
{
"epoch": 0.04433497536945813,
"grad_norm": 11.543206406321579,
"learning_rate": 1.4344262295081968e-06,
"loss": 3.728569984436035,
"step": 36
},
{
"epoch": 0.04556650246305419,
"grad_norm": 14.762221765187595,
"learning_rate": 1.4754098360655739e-06,
"loss": 3.3756117820739746,
"step": 37
},
{
"epoch": 0.046798029556650245,
"grad_norm": 13.981113216433073,
"learning_rate": 1.516393442622951e-06,
"loss": 3.399596691131592,
"step": 38
},
{
"epoch": 0.0480295566502463,
"grad_norm": 24.184372796013783,
"learning_rate": 1.557377049180328e-06,
"loss": 4.209182262420654,
"step": 39
},
{
"epoch": 0.04926108374384237,
"grad_norm": 11.628888477605962,
"learning_rate": 1.5983606557377053e-06,
"loss": 2.797691822052002,
"step": 40
},
{
"epoch": 0.050492610837438424,
"grad_norm": 16.948512477650098,
"learning_rate": 1.6393442622950819e-06,
"loss": 3.630617141723633,
"step": 41
},
{
"epoch": 0.05172413793103448,
"grad_norm": 14.186312302659116,
"learning_rate": 1.6803278688524592e-06,
"loss": 3.182535171508789,
"step": 42
},
{
"epoch": 0.05295566502463054,
"grad_norm": 13.666441097834594,
"learning_rate": 1.7213114754098362e-06,
"loss": 3.554767370223999,
"step": 43
},
{
"epoch": 0.054187192118226604,
"grad_norm": 16.91458664100256,
"learning_rate": 1.7622950819672133e-06,
"loss": 3.675961494445801,
"step": 44
},
{
"epoch": 0.05541871921182266,
"grad_norm": 16.161861225550066,
"learning_rate": 1.8032786885245903e-06,
"loss": 3.346269369125366,
"step": 45
},
{
"epoch": 0.05665024630541872,
"grad_norm": 14.040742605132769,
"learning_rate": 1.8442622950819674e-06,
"loss": 3.4892683029174805,
"step": 46
},
{
"epoch": 0.05788177339901478,
"grad_norm": 14.981644166015332,
"learning_rate": 1.8852459016393442e-06,
"loss": 3.3602352142333984,
"step": 47
},
{
"epoch": 0.059113300492610835,
"grad_norm": 9.346123052417639,
"learning_rate": 1.9262295081967215e-06,
"loss": 3.301713228225708,
"step": 48
},
{
"epoch": 0.0603448275862069,
"grad_norm": 22.6894652203607,
"learning_rate": 1.9672131147540985e-06,
"loss": 3.7745046615600586,
"step": 49
},
{
"epoch": 0.06157635467980296,
"grad_norm": 8.465817304604528,
"learning_rate": 2.0081967213114756e-06,
"loss": 3.0452070236206055,
"step": 50
},
{
"epoch": 0.06280788177339902,
"grad_norm": 20.560185363485036,
"learning_rate": 2.0491803278688526e-06,
"loss": 3.7955079078674316,
"step": 51
},
{
"epoch": 0.06403940886699508,
"grad_norm": 8.75621229547506,
"learning_rate": 2.0901639344262297e-06,
"loss": 3.1644039154052734,
"step": 52
},
{
"epoch": 0.06527093596059114,
"grad_norm": 13.679443353464602,
"learning_rate": 2.1311475409836067e-06,
"loss": 3.2459874153137207,
"step": 53
},
{
"epoch": 0.0665024630541872,
"grad_norm": 12.278683741598382,
"learning_rate": 2.1721311475409838e-06,
"loss": 3.61742901802063,
"step": 54
},
{
"epoch": 0.06773399014778325,
"grad_norm": 12.717536959646948,
"learning_rate": 2.213114754098361e-06,
"loss": 3.3136467933654785,
"step": 55
},
{
"epoch": 0.06896551724137931,
"grad_norm": 15.543240982145285,
"learning_rate": 2.254098360655738e-06,
"loss": 3.272696018218994,
"step": 56
},
{
"epoch": 0.07019704433497537,
"grad_norm": 13.101250342680272,
"learning_rate": 2.295081967213115e-06,
"loss": 3.041365385055542,
"step": 57
},
{
"epoch": 0.07142857142857142,
"grad_norm": 11.7077150462335,
"learning_rate": 2.336065573770492e-06,
"loss": 3.309293746948242,
"step": 58
},
{
"epoch": 0.07266009852216748,
"grad_norm": 26.32874973946408,
"learning_rate": 2.377049180327869e-06,
"loss": 3.4676990509033203,
"step": 59
},
{
"epoch": 0.07389162561576355,
"grad_norm": 16.588748060840203,
"learning_rate": 2.418032786885246e-06,
"loss": 2.8236446380615234,
"step": 60
},
{
"epoch": 0.07512315270935961,
"grad_norm": 8.18040938852151,
"learning_rate": 2.459016393442623e-06,
"loss": 2.716705083847046,
"step": 61
},
{
"epoch": 0.07635467980295567,
"grad_norm": 20.07190390154421,
"learning_rate": 2.5e-06,
"loss": 2.5590922832489014,
"step": 62
},
{
"epoch": 0.07758620689655173,
"grad_norm": 11.418876796774995,
"learning_rate": 2.5409836065573773e-06,
"loss": 2.6987993717193604,
"step": 63
},
{
"epoch": 0.07881773399014778,
"grad_norm": 13.315536498724418,
"learning_rate": 2.5819672131147543e-06,
"loss": 4.340274810791016,
"step": 64
},
{
"epoch": 0.08004926108374384,
"grad_norm": 17.075484530853824,
"learning_rate": 2.6229508196721314e-06,
"loss": 4.166017532348633,
"step": 65
},
{
"epoch": 0.0812807881773399,
"grad_norm": 9.586520693266204,
"learning_rate": 2.6639344262295084e-06,
"loss": 2.664743185043335,
"step": 66
},
{
"epoch": 0.08251231527093596,
"grad_norm": 11.154276667212649,
"learning_rate": 2.704918032786886e-06,
"loss": 3.4285409450531006,
"step": 67
},
{
"epoch": 0.08374384236453201,
"grad_norm": 23.203683210215114,
"learning_rate": 2.745901639344263e-06,
"loss": 2.613044023513794,
"step": 68
},
{
"epoch": 0.08497536945812807,
"grad_norm": 13.748249566024421,
"learning_rate": 2.786885245901639e-06,
"loss": 3.1923232078552246,
"step": 69
},
{
"epoch": 0.08620689655172414,
"grad_norm": 23.6456335605133,
"learning_rate": 2.8278688524590166e-06,
"loss": 3.881509780883789,
"step": 70
},
{
"epoch": 0.0874384236453202,
"grad_norm": 12.242314523228817,
"learning_rate": 2.8688524590163937e-06,
"loss": 3.3872318267822266,
"step": 71
},
{
"epoch": 0.08866995073891626,
"grad_norm": 10.174962303917177,
"learning_rate": 2.9098360655737707e-06,
"loss": 3.1114461421966553,
"step": 72
},
{
"epoch": 0.08990147783251232,
"grad_norm": 9.979115596445391,
"learning_rate": 2.9508196721311478e-06,
"loss": 3.182547092437744,
"step": 73
},
{
"epoch": 0.09113300492610837,
"grad_norm": 10.437140873327547,
"learning_rate": 2.991803278688525e-06,
"loss": 3.488222599029541,
"step": 74
},
{
"epoch": 0.09236453201970443,
"grad_norm": 9.422729886318432,
"learning_rate": 3.032786885245902e-06,
"loss": 3.0836119651794434,
"step": 75
},
{
"epoch": 0.09359605911330049,
"grad_norm": 9.576987414129725,
"learning_rate": 3.073770491803279e-06,
"loss": 2.965284824371338,
"step": 76
},
{
"epoch": 0.09482758620689655,
"grad_norm": 9.051063368959207,
"learning_rate": 3.114754098360656e-06,
"loss": 3.0366950035095215,
"step": 77
},
{
"epoch": 0.0960591133004926,
"grad_norm": 19.769081445901076,
"learning_rate": 3.155737704918033e-06,
"loss": 3.7336153984069824,
"step": 78
},
{
"epoch": 0.09729064039408868,
"grad_norm": 17.150697728192082,
"learning_rate": 3.1967213114754105e-06,
"loss": 3.3801069259643555,
"step": 79
},
{
"epoch": 0.09852216748768473,
"grad_norm": 11.029522805215215,
"learning_rate": 3.2377049180327876e-06,
"loss": 3.1140761375427246,
"step": 80
},
{
"epoch": 0.09975369458128079,
"grad_norm": 9.099280236883942,
"learning_rate": 3.2786885245901638e-06,
"loss": 3.1199679374694824,
"step": 81
},
{
"epoch": 0.10098522167487685,
"grad_norm": 10.894555994753386,
"learning_rate": 3.3196721311475413e-06,
"loss": 2.919370651245117,
"step": 82
},
{
"epoch": 0.1022167487684729,
"grad_norm": 10.246835888516838,
"learning_rate": 3.3606557377049183e-06,
"loss": 3.0058987140655518,
"step": 83
},
{
"epoch": 0.10344827586206896,
"grad_norm": 8.315907792605513,
"learning_rate": 3.4016393442622954e-06,
"loss": 3.201812744140625,
"step": 84
},
{
"epoch": 0.10467980295566502,
"grad_norm": 10.55746200109404,
"learning_rate": 3.4426229508196724e-06,
"loss": 2.8387913703918457,
"step": 85
},
{
"epoch": 0.10591133004926108,
"grad_norm": 23.69077930997652,
"learning_rate": 3.4836065573770495e-06,
"loss": 3.565217971801758,
"step": 86
},
{
"epoch": 0.10714285714285714,
"grad_norm": 17.752023971892026,
"learning_rate": 3.5245901639344265e-06,
"loss": 3.563566207885742,
"step": 87
},
{
"epoch": 0.10837438423645321,
"grad_norm": 7.328374103560201,
"learning_rate": 3.5655737704918036e-06,
"loss": 3.3282840251922607,
"step": 88
},
{
"epoch": 0.10960591133004927,
"grad_norm": 9.307632619059875,
"learning_rate": 3.6065573770491806e-06,
"loss": 2.693999767303467,
"step": 89
},
{
"epoch": 0.11083743842364532,
"grad_norm": 9.537047052971076,
"learning_rate": 3.6475409836065577e-06,
"loss": 3.0820372104644775,
"step": 90
},
{
"epoch": 0.11206896551724138,
"grad_norm": 11.895652602739977,
"learning_rate": 3.6885245901639347e-06,
"loss": 2.5853302478790283,
"step": 91
},
{
"epoch": 0.11330049261083744,
"grad_norm": 19.909007675751152,
"learning_rate": 3.729508196721312e-06,
"loss": 3.622239589691162,
"step": 92
},
{
"epoch": 0.1145320197044335,
"grad_norm": 9.562243449141407,
"learning_rate": 3.7704918032786884e-06,
"loss": 3.269063949584961,
"step": 93
},
{
"epoch": 0.11576354679802955,
"grad_norm": 10.402493100303827,
"learning_rate": 3.811475409836066e-06,
"loss": 2.932877540588379,
"step": 94
},
{
"epoch": 0.11699507389162561,
"grad_norm": 7.9937288583052,
"learning_rate": 3.852459016393443e-06,
"loss": 2.8118062019348145,
"step": 95
},
{
"epoch": 0.11822660098522167,
"grad_norm": 12.161021036700474,
"learning_rate": 3.8934426229508196e-06,
"loss": 2.977217674255371,
"step": 96
},
{
"epoch": 0.11945812807881774,
"grad_norm": 9.48055025878799,
"learning_rate": 3.934426229508197e-06,
"loss": 2.534318685531616,
"step": 97
},
{
"epoch": 0.1206896551724138,
"grad_norm": 8.971246829575332,
"learning_rate": 3.975409836065574e-06,
"loss": 2.888187885284424,
"step": 98
},
{
"epoch": 0.12192118226600986,
"grad_norm": 9.005963079459367,
"learning_rate": 4.016393442622951e-06,
"loss": 2.6558847427368164,
"step": 99
},
{
"epoch": 0.12315270935960591,
"grad_norm": 9.651575487247985,
"learning_rate": 4.057377049180329e-06,
"loss": 2.707779884338379,
"step": 100
},
{
"epoch": 0.12438423645320197,
"grad_norm": 8.8113086796363,
"learning_rate": 4.098360655737705e-06,
"loss": 3.2292768955230713,
"step": 101
},
{
"epoch": 0.12561576354679804,
"grad_norm": 13.438004585842267,
"learning_rate": 4.139344262295083e-06,
"loss": 2.9476242065429688,
"step": 102
},
{
"epoch": 0.1268472906403941,
"grad_norm": 9.014089316100105,
"learning_rate": 4.180327868852459e-06,
"loss": 2.9598989486694336,
"step": 103
},
{
"epoch": 0.12807881773399016,
"grad_norm": 8.84790292690003,
"learning_rate": 4.221311475409837e-06,
"loss": 2.593669891357422,
"step": 104
},
{
"epoch": 0.12931034482758622,
"grad_norm": 9.732549020932908,
"learning_rate": 4.2622950819672135e-06,
"loss": 2.884164810180664,
"step": 105
},
{
"epoch": 0.13054187192118227,
"grad_norm": 16.843882776588455,
"learning_rate": 4.30327868852459e-06,
"loss": 3.091454267501831,
"step": 106
},
{
"epoch": 0.13177339901477833,
"grad_norm": 11.588593389024608,
"learning_rate": 4.3442622950819676e-06,
"loss": 2.913923740386963,
"step": 107
},
{
"epoch": 0.1330049261083744,
"grad_norm": 18.29569166468431,
"learning_rate": 4.385245901639344e-06,
"loss": 2.779545307159424,
"step": 108
},
{
"epoch": 0.13423645320197045,
"grad_norm": 9.202902461418143,
"learning_rate": 4.426229508196722e-06,
"loss": 1.8711936473846436,
"step": 109
},
{
"epoch": 0.1354679802955665,
"grad_norm": 13.481452134492262,
"learning_rate": 4.467213114754098e-06,
"loss": 2.892902374267578,
"step": 110
},
{
"epoch": 0.13669950738916256,
"grad_norm": 12.958399723073786,
"learning_rate": 4.508196721311476e-06,
"loss": 3.0064496994018555,
"step": 111
},
{
"epoch": 0.13793103448275862,
"grad_norm": 13.016721832572243,
"learning_rate": 4.549180327868853e-06,
"loss": 2.8515172004699707,
"step": 112
},
{
"epoch": 0.13916256157635468,
"grad_norm": 8.374489861175874,
"learning_rate": 4.59016393442623e-06,
"loss": 3.2504403591156006,
"step": 113
},
{
"epoch": 0.14039408866995073,
"grad_norm": 7.893218569270328,
"learning_rate": 4.631147540983607e-06,
"loss": 2.67405366897583,
"step": 114
},
{
"epoch": 0.1416256157635468,
"grad_norm": 10.146133271952388,
"learning_rate": 4.672131147540984e-06,
"loss": 3.079516887664795,
"step": 115
},
{
"epoch": 0.14285714285714285,
"grad_norm": 19.354096600007853,
"learning_rate": 4.7131147540983615e-06,
"loss": 2.8897287845611572,
"step": 116
},
{
"epoch": 0.1440886699507389,
"grad_norm": 13.276953948761626,
"learning_rate": 4.754098360655738e-06,
"loss": 2.7275729179382324,
"step": 117
},
{
"epoch": 0.14532019704433496,
"grad_norm": 9.682874064462416,
"learning_rate": 4.795081967213115e-06,
"loss": 2.9996538162231445,
"step": 118
},
{
"epoch": 0.14655172413793102,
"grad_norm": 7.397102570298892,
"learning_rate": 4.836065573770492e-06,
"loss": 3.307245969772339,
"step": 119
},
{
"epoch": 0.1477832512315271,
"grad_norm": 12.665703486872426,
"learning_rate": 4.877049180327869e-06,
"loss": 3.475133180618286,
"step": 120
},
{
"epoch": 0.14901477832512317,
"grad_norm": 11.317195785901513,
"learning_rate": 4.918032786885246e-06,
"loss": 3.0947790145874023,
"step": 121
},
{
"epoch": 0.15024630541871922,
"grad_norm": 7.236267930218516,
"learning_rate": 4.959016393442623e-06,
"loss": 2.9675135612487793,
"step": 122
},
{
"epoch": 0.15147783251231528,
"grad_norm": 8.759893869589918,
"learning_rate": 5e-06,
"loss": 2.7873148918151855,
"step": 123
},
{
"epoch": 0.15270935960591134,
"grad_norm": 10.395692764487977,
"learning_rate": 5.040983606557377e-06,
"loss": 3.10044264793396,
"step": 124
},
{
"epoch": 0.1539408866995074,
"grad_norm": 10.40007835832301,
"learning_rate": 5.0819672131147545e-06,
"loss": 3.755798101425171,
"step": 125
},
{
"epoch": 0.15517241379310345,
"grad_norm": 13.715148535872732,
"learning_rate": 5.122950819672131e-06,
"loss": 3.0117135047912598,
"step": 126
},
{
"epoch": 0.1564039408866995,
"grad_norm": 12.668410235183005,
"learning_rate": 5.163934426229509e-06,
"loss": 2.944417953491211,
"step": 127
},
{
"epoch": 0.15763546798029557,
"grad_norm": 14.317219715469237,
"learning_rate": 5.204918032786885e-06,
"loss": 2.672874927520752,
"step": 128
},
{
"epoch": 0.15886699507389163,
"grad_norm": 16.489459603874575,
"learning_rate": 5.245901639344263e-06,
"loss": 2.7205734252929688,
"step": 129
},
{
"epoch": 0.16009852216748768,
"grad_norm": 16.41932178225047,
"learning_rate": 5.286885245901639e-06,
"loss": 2.883897304534912,
"step": 130
},
{
"epoch": 0.16133004926108374,
"grad_norm": 15.043569897203326,
"learning_rate": 5.327868852459017e-06,
"loss": 2.782104253768921,
"step": 131
},
{
"epoch": 0.1625615763546798,
"grad_norm": 8.98371180872493,
"learning_rate": 5.3688524590163935e-06,
"loss": 2.6445870399475098,
"step": 132
},
{
"epoch": 0.16379310344827586,
"grad_norm": 11.815392040561601,
"learning_rate": 5.409836065573772e-06,
"loss": 2.9319727420806885,
"step": 133
},
{
"epoch": 0.16502463054187191,
"grad_norm": 10.152797634103624,
"learning_rate": 5.4508196721311476e-06,
"loss": 3.169668674468994,
"step": 134
},
{
"epoch": 0.16625615763546797,
"grad_norm": 14.778160076043047,
"learning_rate": 5.491803278688526e-06,
"loss": 2.8588128089904785,
"step": 135
},
{
"epoch": 0.16748768472906403,
"grad_norm": 10.175583728158522,
"learning_rate": 5.5327868852459025e-06,
"loss": 2.9894580841064453,
"step": 136
},
{
"epoch": 0.1687192118226601,
"grad_norm": 9.056737222762985,
"learning_rate": 5.573770491803278e-06,
"loss": 2.5721185207366943,
"step": 137
},
{
"epoch": 0.16995073891625614,
"grad_norm": 13.273464461148466,
"learning_rate": 5.614754098360657e-06,
"loss": 2.927572727203369,
"step": 138
},
{
"epoch": 0.17118226600985223,
"grad_norm": 6.55893818610158,
"learning_rate": 5.655737704918033e-06,
"loss": 2.1956796646118164,
"step": 139
},
{
"epoch": 0.1724137931034483,
"grad_norm": 29.225445444647217,
"learning_rate": 5.696721311475411e-06,
"loss": 2.9739363193511963,
"step": 140
},
{
"epoch": 0.17364532019704434,
"grad_norm": 11.15274917433196,
"learning_rate": 5.737704918032787e-06,
"loss": 2.9413986206054688,
"step": 141
},
{
"epoch": 0.1748768472906404,
"grad_norm": 10.26279112360335,
"learning_rate": 5.778688524590165e-06,
"loss": 3.267493724822998,
"step": 142
},
{
"epoch": 0.17610837438423646,
"grad_norm": 10.574770426769376,
"learning_rate": 5.8196721311475415e-06,
"loss": 3.355569362640381,
"step": 143
},
{
"epoch": 0.17733990147783252,
"grad_norm": 30.57215689151005,
"learning_rate": 5.860655737704919e-06,
"loss": 1.9742871522903442,
"step": 144
},
{
"epoch": 0.17857142857142858,
"grad_norm": 12.842491765573998,
"learning_rate": 5.9016393442622956e-06,
"loss": 3.571032762527466,
"step": 145
},
{
"epoch": 0.17980295566502463,
"grad_norm": 12.726974439363154,
"learning_rate": 5.942622950819673e-06,
"loss": 3.3115599155426025,
"step": 146
},
{
"epoch": 0.1810344827586207,
"grad_norm": 17.55458268041124,
"learning_rate": 5.98360655737705e-06,
"loss": 2.781893730163574,
"step": 147
},
{
"epoch": 0.18226600985221675,
"grad_norm": 21.115989900825127,
"learning_rate": 6.024590163934426e-06,
"loss": 3.5053911209106445,
"step": 148
},
{
"epoch": 0.1834975369458128,
"grad_norm": 14.601719954400593,
"learning_rate": 6.065573770491804e-06,
"loss": 2.797297477722168,
"step": 149
},
{
"epoch": 0.18472906403940886,
"grad_norm": 11.706500964440364,
"learning_rate": 6.10655737704918e-06,
"loss": 2.995811939239502,
"step": 150
},
{
"epoch": 0.18596059113300492,
"grad_norm": 15.414506649569596,
"learning_rate": 6.147540983606558e-06,
"loss": 3.028142213821411,
"step": 151
},
{
"epoch": 0.18719211822660098,
"grad_norm": 16.893206406115734,
"learning_rate": 6.1885245901639345e-06,
"loss": 3.092806816101074,
"step": 152
},
{
"epoch": 0.18842364532019704,
"grad_norm": 15.790657692703299,
"learning_rate": 6.229508196721312e-06,
"loss": 3.4657726287841797,
"step": 153
},
{
"epoch": 0.1896551724137931,
"grad_norm": 14.336314687505745,
"learning_rate": 6.270491803278689e-06,
"loss": 2.888990879058838,
"step": 154
},
{
"epoch": 0.19088669950738915,
"grad_norm": 8.384597105554349,
"learning_rate": 6.311475409836066e-06,
"loss": 2.21640682220459,
"step": 155
},
{
"epoch": 0.1921182266009852,
"grad_norm": 15.11144998304732,
"learning_rate": 6.352459016393443e-06,
"loss": 3.1153030395507812,
"step": 156
},
{
"epoch": 0.1933497536945813,
"grad_norm": 10.552333909396582,
"learning_rate": 6.393442622950821e-06,
"loss": 3.5814146995544434,
"step": 157
},
{
"epoch": 0.19458128078817735,
"grad_norm": 16.968338748229492,
"learning_rate": 6.434426229508197e-06,
"loss": 3.3865175247192383,
"step": 158
},
{
"epoch": 0.1958128078817734,
"grad_norm": 18.57431273466726,
"learning_rate": 6.475409836065575e-06,
"loss": 3.2125191688537598,
"step": 159
},
{
"epoch": 0.19704433497536947,
"grad_norm": 6.884951933192958,
"learning_rate": 6.516393442622952e-06,
"loss": 3.137500286102295,
"step": 160
},
{
"epoch": 0.19827586206896552,
"grad_norm": 14.232532156130397,
"learning_rate": 6.5573770491803276e-06,
"loss": 2.63275408744812,
"step": 161
},
{
"epoch": 0.19950738916256158,
"grad_norm": 8.457248873163048,
"learning_rate": 6.598360655737706e-06,
"loss": 3.1714844703674316,
"step": 162
},
{
"epoch": 0.20073891625615764,
"grad_norm": 8.202663921028103,
"learning_rate": 6.6393442622950825e-06,
"loss": 2.2414371967315674,
"step": 163
},
{
"epoch": 0.2019704433497537,
"grad_norm": 21.716160496341246,
"learning_rate": 6.68032786885246e-06,
"loss": 2.4281110763549805,
"step": 164
},
{
"epoch": 0.20320197044334976,
"grad_norm": 14.06837422573523,
"learning_rate": 6.721311475409837e-06,
"loss": 2.6953632831573486,
"step": 165
},
{
"epoch": 0.2044334975369458,
"grad_norm": 12.440616463990054,
"learning_rate": 6.762295081967214e-06,
"loss": 2.7645516395568848,
"step": 166
},
{
"epoch": 0.20566502463054187,
"grad_norm": 9.155924284482328,
"learning_rate": 6.803278688524591e-06,
"loss": 2.676801919937134,
"step": 167
},
{
"epoch": 0.20689655172413793,
"grad_norm": 18.399209140322007,
"learning_rate": 6.844262295081968e-06,
"loss": 3.2417163848876953,
"step": 168
},
{
"epoch": 0.20812807881773399,
"grad_norm": 10.633235724872472,
"learning_rate": 6.885245901639345e-06,
"loss": 3.1967976093292236,
"step": 169
},
{
"epoch": 0.20935960591133004,
"grad_norm": 9.001521768789516,
"learning_rate": 6.926229508196722e-06,
"loss": 3.4212145805358887,
"step": 170
},
{
"epoch": 0.2105911330049261,
"grad_norm": 19.131341549460146,
"learning_rate": 6.967213114754099e-06,
"loss": 3.0731911659240723,
"step": 171
},
{
"epoch": 0.21182266009852216,
"grad_norm": 24.78027708091891,
"learning_rate": 7.0081967213114756e-06,
"loss": 3.8659727573394775,
"step": 172
},
{
"epoch": 0.21305418719211822,
"grad_norm": 7.256951095872975,
"learning_rate": 7.049180327868853e-06,
"loss": 3.036478042602539,
"step": 173
},
{
"epoch": 0.21428571428571427,
"grad_norm": 13.753177425595323,
"learning_rate": 7.09016393442623e-06,
"loss": 2.489211082458496,
"step": 174
},
{
"epoch": 0.21551724137931033,
"grad_norm": 15.568690129763258,
"learning_rate": 7.131147540983607e-06,
"loss": 3.8306775093078613,
"step": 175
},
{
"epoch": 0.21674876847290642,
"grad_norm": 14.053955715138319,
"learning_rate": 7.172131147540984e-06,
"loss": 3.0287742614746094,
"step": 176
},
{
"epoch": 0.21798029556650247,
"grad_norm": 7.402046078874498,
"learning_rate": 7.213114754098361e-06,
"loss": 2.767753839492798,
"step": 177
},
{
"epoch": 0.21921182266009853,
"grad_norm": 7.607064770644376,
"learning_rate": 7.254098360655738e-06,
"loss": 2.8400726318359375,
"step": 178
},
{
"epoch": 0.2204433497536946,
"grad_norm": 9.218463959135196,
"learning_rate": 7.295081967213115e-06,
"loss": 2.9013113975524902,
"step": 179
},
{
"epoch": 0.22167487684729065,
"grad_norm": 14.207394035741054,
"learning_rate": 7.336065573770492e-06,
"loss": 3.1111714839935303,
"step": 180
},
{
"epoch": 0.2229064039408867,
"grad_norm": 22.91981906121516,
"learning_rate": 7.3770491803278695e-06,
"loss": 2.968287229537964,
"step": 181
},
{
"epoch": 0.22413793103448276,
"grad_norm": 25.20920899192849,
"learning_rate": 7.418032786885246e-06,
"loss": 3.2560596466064453,
"step": 182
},
{
"epoch": 0.22536945812807882,
"grad_norm": 11.263908332317076,
"learning_rate": 7.459016393442624e-06,
"loss": 2.6196365356445312,
"step": 183
},
{
"epoch": 0.22660098522167488,
"grad_norm": 9.253114778490854,
"learning_rate": 7.500000000000001e-06,
"loss": 2.48789644241333,
"step": 184
},
{
"epoch": 0.22783251231527094,
"grad_norm": 10.894130133931592,
"learning_rate": 7.540983606557377e-06,
"loss": 3.492011308670044,
"step": 185
},
{
"epoch": 0.229064039408867,
"grad_norm": 10.265317756792616,
"learning_rate": 7.581967213114755e-06,
"loss": 2.643688917160034,
"step": 186
},
{
"epoch": 0.23029556650246305,
"grad_norm": 18.91537781193984,
"learning_rate": 7.622950819672132e-06,
"loss": 3.291731834411621,
"step": 187
},
{
"epoch": 0.2315270935960591,
"grad_norm": 8.094549723224802,
"learning_rate": 7.66393442622951e-06,
"loss": 2.9554359912872314,
"step": 188
},
{
"epoch": 0.23275862068965517,
"grad_norm": 8.032083532292669,
"learning_rate": 7.704918032786886e-06,
"loss": 2.634860038757324,
"step": 189
},
{
"epoch": 0.23399014778325122,
"grad_norm": 12.421064936443088,
"learning_rate": 7.745901639344263e-06,
"loss": 3.505284309387207,
"step": 190
},
{
"epoch": 0.23522167487684728,
"grad_norm": 9.73160074977933,
"learning_rate": 7.786885245901639e-06,
"loss": 2.8865461349487305,
"step": 191
},
{
"epoch": 0.23645320197044334,
"grad_norm": 9.154882618515046,
"learning_rate": 7.827868852459017e-06,
"loss": 2.804072618484497,
"step": 192
},
{
"epoch": 0.2376847290640394,
"grad_norm": 19.13061642741136,
"learning_rate": 7.868852459016394e-06,
"loss": 2.830981969833374,
"step": 193
},
{
"epoch": 0.23891625615763548,
"grad_norm": 15.563283146640595,
"learning_rate": 7.909836065573772e-06,
"loss": 2.2295336723327637,
"step": 194
},
{
"epoch": 0.24014778325123154,
"grad_norm": 12.225259694302743,
"learning_rate": 7.950819672131147e-06,
"loss": 2.338548183441162,
"step": 195
},
{
"epoch": 0.2413793103448276,
"grad_norm": 9.892040827483035,
"learning_rate": 7.991803278688526e-06,
"loss": 3.0856008529663086,
"step": 196
},
{
"epoch": 0.24261083743842365,
"grad_norm": 7.694617498251832,
"learning_rate": 8.032786885245902e-06,
"loss": 2.8032941818237305,
"step": 197
},
{
"epoch": 0.2438423645320197,
"grad_norm": 14.517107480578428,
"learning_rate": 8.073770491803279e-06,
"loss": 2.793623924255371,
"step": 198
},
{
"epoch": 0.24507389162561577,
"grad_norm": 14.257539519236145,
"learning_rate": 8.114754098360657e-06,
"loss": 3.316802740097046,
"step": 199
},
{
"epoch": 0.24630541871921183,
"grad_norm": 9.345732169704513,
"learning_rate": 8.155737704918034e-06,
"loss": 2.7230677604675293,
"step": 200
},
{
"epoch": 0.24753694581280788,
"grad_norm": 15.629904452590212,
"learning_rate": 8.19672131147541e-06,
"loss": 3.3343541622161865,
"step": 201
},
{
"epoch": 0.24876847290640394,
"grad_norm": 15.523761255621764,
"learning_rate": 8.237704918032787e-06,
"loss": 2.6796741485595703,
"step": 202
},
{
"epoch": 0.25,
"grad_norm": 19.56220339462512,
"learning_rate": 8.278688524590165e-06,
"loss": 3.5974526405334473,
"step": 203
},
{
"epoch": 0.2512315270935961,
"grad_norm": 13.897070581153926,
"learning_rate": 8.319672131147542e-06,
"loss": 2.2697930335998535,
"step": 204
},
{
"epoch": 0.2524630541871921,
"grad_norm": 58.73834156491825,
"learning_rate": 8.360655737704919e-06,
"loss": 3.692251682281494,
"step": 205
},
{
"epoch": 0.2536945812807882,
"grad_norm": 7.38409958845656,
"learning_rate": 8.401639344262295e-06,
"loss": 1.9303261041641235,
"step": 206
},
{
"epoch": 0.25492610837438423,
"grad_norm": 9.965151267955871,
"learning_rate": 8.442622950819674e-06,
"loss": 2.538956880569458,
"step": 207
},
{
"epoch": 0.2561576354679803,
"grad_norm": 9.12744959101674,
"learning_rate": 8.48360655737705e-06,
"loss": 2.777608633041382,
"step": 208
},
{
"epoch": 0.25738916256157635,
"grad_norm": 7.651759491423955,
"learning_rate": 8.524590163934427e-06,
"loss": 2.5776896476745605,
"step": 209
},
{
"epoch": 0.25862068965517243,
"grad_norm": 7.384463920815584,
"learning_rate": 8.565573770491804e-06,
"loss": 2.9199795722961426,
"step": 210
},
{
"epoch": 0.25985221674876846,
"grad_norm": 20.103355409171535,
"learning_rate": 8.60655737704918e-06,
"loss": 3.515129566192627,
"step": 211
},
{
"epoch": 0.26108374384236455,
"grad_norm": 11.426838299111452,
"learning_rate": 8.647540983606559e-06,
"loss": 2.5549678802490234,
"step": 212
},
{
"epoch": 0.2623152709359606,
"grad_norm": 9.257633699344172,
"learning_rate": 8.688524590163935e-06,
"loss": 2.769425630569458,
"step": 213
},
{
"epoch": 0.26354679802955666,
"grad_norm": 10.532098802898833,
"learning_rate": 8.729508196721312e-06,
"loss": 3.369231700897217,
"step": 214
},
{
"epoch": 0.2647783251231527,
"grad_norm": 9.351621764685488,
"learning_rate": 8.770491803278688e-06,
"loss": 2.942309856414795,
"step": 215
},
{
"epoch": 0.2660098522167488,
"grad_norm": 13.925057065300786,
"learning_rate": 8.811475409836067e-06,
"loss": 2.7516608238220215,
"step": 216
},
{
"epoch": 0.2672413793103448,
"grad_norm": 36.50661601809998,
"learning_rate": 8.852459016393443e-06,
"loss": 2.8445613384246826,
"step": 217
},
{
"epoch": 0.2684729064039409,
"grad_norm": 22.25960453914331,
"learning_rate": 8.893442622950822e-06,
"loss": 2.987518787384033,
"step": 218
},
{
"epoch": 0.2697044334975369,
"grad_norm": 16.564591915051718,
"learning_rate": 8.934426229508197e-06,
"loss": 3.2499587535858154,
"step": 219
},
{
"epoch": 0.270935960591133,
"grad_norm": 17.28227853231096,
"learning_rate": 8.975409836065575e-06,
"loss": 2.926447868347168,
"step": 220
},
{
"epoch": 0.27216748768472904,
"grad_norm": 11.211927116407436,
"learning_rate": 9.016393442622952e-06,
"loss": 2.8910017013549805,
"step": 221
},
{
"epoch": 0.2733990147783251,
"grad_norm": 8.72596083956733,
"learning_rate": 9.057377049180328e-06,
"loss": 3.0613536834716797,
"step": 222
},
{
"epoch": 0.2746305418719212,
"grad_norm": 9.803135692376356,
"learning_rate": 9.098360655737707e-06,
"loss": 2.829414129257202,
"step": 223
},
{
"epoch": 0.27586206896551724,
"grad_norm": 12.92734853493422,
"learning_rate": 9.139344262295083e-06,
"loss": 2.7085399627685547,
"step": 224
},
{
"epoch": 0.2770935960591133,
"grad_norm": 9.4118708856159,
"learning_rate": 9.18032786885246e-06,
"loss": 2.6637799739837646,
"step": 225
},
{
"epoch": 0.27832512315270935,
"grad_norm": 18.83957093140758,
"learning_rate": 9.221311475409836e-06,
"loss": 2.845503807067871,
"step": 226
},
{
"epoch": 0.27955665024630544,
"grad_norm": 13.475569415500434,
"learning_rate": 9.262295081967215e-06,
"loss": 2.954394817352295,
"step": 227
},
{
"epoch": 0.28078817733990147,
"grad_norm": 8.290170639522628,
"learning_rate": 9.303278688524591e-06,
"loss": 2.640540838241577,
"step": 228
},
{
"epoch": 0.28201970443349755,
"grad_norm": 11.224559700746246,
"learning_rate": 9.344262295081968e-06,
"loss": 2.806300163269043,
"step": 229
},
{
"epoch": 0.2832512315270936,
"grad_norm": 7.885675569548075,
"learning_rate": 9.385245901639345e-06,
"loss": 2.6030101776123047,
"step": 230
},
{
"epoch": 0.28448275862068967,
"grad_norm": 24.236973973758758,
"learning_rate": 9.426229508196723e-06,
"loss": 2.7991466522216797,
"step": 231
},
{
"epoch": 0.2857142857142857,
"grad_norm": 8.845347044883379,
"learning_rate": 9.4672131147541e-06,
"loss": 3.106261968612671,
"step": 232
},
{
"epoch": 0.2869458128078818,
"grad_norm": 51.821805980416265,
"learning_rate": 9.508196721311476e-06,
"loss": 3.2630815505981445,
"step": 233
},
{
"epoch": 0.2881773399014778,
"grad_norm": 16.78742746550897,
"learning_rate": 9.549180327868853e-06,
"loss": 3.1156482696533203,
"step": 234
},
{
"epoch": 0.2894088669950739,
"grad_norm": 13.713777073631656,
"learning_rate": 9.59016393442623e-06,
"loss": 3.1271071434020996,
"step": 235
},
{
"epoch": 0.29064039408866993,
"grad_norm": 13.698738323083157,
"learning_rate": 9.631147540983608e-06,
"loss": 2.536348342895508,
"step": 236
},
{
"epoch": 0.291871921182266,
"grad_norm": 15.926322663194057,
"learning_rate": 9.672131147540984e-06,
"loss": 2.8055825233459473,
"step": 237
},
{
"epoch": 0.29310344827586204,
"grad_norm": 10.519363729962654,
"learning_rate": 9.713114754098361e-06,
"loss": 2.9949395656585693,
"step": 238
},
{
"epoch": 0.29433497536945813,
"grad_norm": 12.579584872972768,
"learning_rate": 9.754098360655738e-06,
"loss": 2.125136137008667,
"step": 239
},
{
"epoch": 0.2955665024630542,
"grad_norm": 11.391036061101172,
"learning_rate": 9.795081967213116e-06,
"loss": 2.830984592437744,
"step": 240
},
{
"epoch": 0.29679802955665024,
"grad_norm": 14.46789942529014,
"learning_rate": 9.836065573770493e-06,
"loss": 3.2255706787109375,
"step": 241
},
{
"epoch": 0.29802955665024633,
"grad_norm": 8.899469108078774,
"learning_rate": 9.87704918032787e-06,
"loss": 2.686436653137207,
"step": 242
},
{
"epoch": 0.29926108374384236,
"grad_norm": 10.094433891654246,
"learning_rate": 9.918032786885246e-06,
"loss": 2.497978687286377,
"step": 243
},
{
"epoch": 0.30049261083743845,
"grad_norm": 8.691385167763809,
"learning_rate": 9.959016393442624e-06,
"loss": 3.308448076248169,
"step": 244
},
{
"epoch": 0.3017241379310345,
"grad_norm": 15.757524580227669,
"learning_rate": 1e-05,
"loss": 3.2378220558166504,
"step": 245
},
{
"epoch": 0.30295566502463056,
"grad_norm": 8.671108255060687,
"learning_rate": 9.999994864785605e-06,
"loss": 2.4129133224487305,
"step": 246
},
{
"epoch": 0.3041871921182266,
"grad_norm": 13.501190126023713,
"learning_rate": 9.99997945915297e-06,
"loss": 2.938180923461914,
"step": 247
},
{
"epoch": 0.3054187192118227,
"grad_norm": 11.217667256673044,
"learning_rate": 9.999953783133733e-06,
"loss": 2.5165305137634277,
"step": 248
},
{
"epoch": 0.3066502463054187,
"grad_norm": 7.520771962392289,
"learning_rate": 9.999917836780642e-06,
"loss": 3.425577163696289,
"step": 249
},
{
"epoch": 0.3078817733990148,
"grad_norm": 13.889092280188136,
"learning_rate": 9.999871620167532e-06,
"loss": 2.876093626022339,
"step": 250
},
{
"epoch": 0.3091133004926108,
"grad_norm": 7.799661481860974,
"learning_rate": 9.999815133389334e-06,
"loss": 2.9071428775787354,
"step": 251
},
{
"epoch": 0.3103448275862069,
"grad_norm": 18.185225557276123,
"learning_rate": 9.999748376562078e-06,
"loss": 2.998086452484131,
"step": 252
},
{
"epoch": 0.31157635467980294,
"grad_norm": 27.086825836566575,
"learning_rate": 9.999671349822887e-06,
"loss": 2.1193456649780273,
"step": 253
},
{
"epoch": 0.312807881773399,
"grad_norm": 13.320934166458603,
"learning_rate": 9.999584053329983e-06,
"loss": 2.753380298614502,
"step": 254
},
{
"epoch": 0.31403940886699505,
"grad_norm": 14.498031739385082,
"learning_rate": 9.999486487262677e-06,
"loss": 2.876704216003418,
"step": 255
},
{
"epoch": 0.31527093596059114,
"grad_norm": 13.532410059083729,
"learning_rate": 9.999378651821381e-06,
"loss": 3.0882208347320557,
"step": 256
},
{
"epoch": 0.31650246305418717,
"grad_norm": 13.700484400761207,
"learning_rate": 9.999260547227599e-06,
"loss": 3.155285120010376,
"step": 257
},
{
"epoch": 0.31773399014778325,
"grad_norm": 12.6000984521867,
"learning_rate": 9.999132173723923e-06,
"loss": 2.7646055221557617,
"step": 258
},
{
"epoch": 0.31896551724137934,
"grad_norm": 15.115470197004113,
"learning_rate": 9.998993531574048e-06,
"loss": 2.7237563133239746,
"step": 259
},
{
"epoch": 0.32019704433497537,
"grad_norm": 20.594748113733633,
"learning_rate": 9.998844621062755e-06,
"loss": 3.3845739364624023,
"step": 260
},
{
"epoch": 0.32142857142857145,
"grad_norm": 10.767576295669059,
"learning_rate": 9.998685442495921e-06,
"loss": 3.8065264225006104,
"step": 261
},
{
"epoch": 0.3226600985221675,
"grad_norm": 20.754860824013544,
"learning_rate": 9.998515996200508e-06,
"loss": 2.8899989128112793,
"step": 262
},
{
"epoch": 0.32389162561576357,
"grad_norm": 15.819137797930164,
"learning_rate": 9.998336282524579e-06,
"loss": 3.253079414367676,
"step": 263
},
{
"epoch": 0.3251231527093596,
"grad_norm": 18.790797790728803,
"learning_rate": 9.998146301837274e-06,
"loss": 3.346510648727417,
"step": 264
},
{
"epoch": 0.3263546798029557,
"grad_norm": 23.146345527241454,
"learning_rate": 9.997946054528837e-06,
"loss": 3.4698657989501953,
"step": 265
},
{
"epoch": 0.3275862068965517,
"grad_norm": 14.512612088330997,
"learning_rate": 9.99773554101059e-06,
"loss": 3.174567699432373,
"step": 266
},
{
"epoch": 0.3288177339901478,
"grad_norm": 12.860516080892424,
"learning_rate": 9.997514761714946e-06,
"loss": 2.5275719165802,
"step": 267
},
{
"epoch": 0.33004926108374383,
"grad_norm": 9.43003857415246,
"learning_rate": 9.997283717095403e-06,
"loss": 2.9102673530578613,
"step": 268
},
{
"epoch": 0.3312807881773399,
"grad_norm": 11.178249951549107,
"learning_rate": 9.99704240762655e-06,
"loss": 2.865558624267578,
"step": 269
},
{
"epoch": 0.33251231527093594,
"grad_norm": 24.802063921828417,
"learning_rate": 9.996790833804053e-06,
"loss": 2.749305248260498,
"step": 270
},
{
"epoch": 0.33374384236453203,
"grad_norm": 24.70724769915988,
"learning_rate": 9.996528996144668e-06,
"loss": 2.0590691566467285,
"step": 271
},
{
"epoch": 0.33497536945812806,
"grad_norm": 14.115920333851845,
"learning_rate": 9.996256895186234e-06,
"loss": 3.0421628952026367,
"step": 272
},
{
"epoch": 0.33620689655172414,
"grad_norm": 12.058059347872495,
"learning_rate": 9.995974531487668e-06,
"loss": 2.8302841186523438,
"step": 273
},
{
"epoch": 0.3374384236453202,
"grad_norm": 12.632643288786921,
"learning_rate": 9.995681905628968e-06,
"loss": 2.7192673683166504,
"step": 274
},
{
"epoch": 0.33866995073891626,
"grad_norm": 15.484122360072316,
"learning_rate": 9.995379018211215e-06,
"loss": 2.3330166339874268,
"step": 275
},
{
"epoch": 0.3399014778325123,
"grad_norm": 13.2967377526589,
"learning_rate": 9.995065869856566e-06,
"loss": 2.5359480381011963,
"step": 276
},
{
"epoch": 0.3411330049261084,
"grad_norm": 15.221286627267526,
"learning_rate": 9.994742461208251e-06,
"loss": 3.049252986907959,
"step": 277
},
{
"epoch": 0.34236453201970446,
"grad_norm": 15.24270242699156,
"learning_rate": 9.994408792930584e-06,
"loss": 3.3440940380096436,
"step": 278
},
{
"epoch": 0.3435960591133005,
"grad_norm": 14.053973379642196,
"learning_rate": 9.994064865708944e-06,
"loss": 3.038376808166504,
"step": 279
},
{
"epoch": 0.3448275862068966,
"grad_norm": 22.631635572415856,
"learning_rate": 9.993710680249788e-06,
"loss": 3.6074423789978027,
"step": 280
},
{
"epoch": 0.3460591133004926,
"grad_norm": 20.559687915989883,
"learning_rate": 9.993346237280646e-06,
"loss": 2.686741352081299,
"step": 281
},
{
"epoch": 0.3472906403940887,
"grad_norm": 12.521946549290966,
"learning_rate": 9.992971537550112e-06,
"loss": 2.4198198318481445,
"step": 282
},
{
"epoch": 0.3485221674876847,
"grad_norm": 6.138840145200369,
"learning_rate": 9.992586581827853e-06,
"loss": 2.8091788291931152,
"step": 283
},
{
"epoch": 0.3497536945812808,
"grad_norm": 9.177811201919399,
"learning_rate": 9.992191370904599e-06,
"loss": 3.0199592113494873,
"step": 284
},
{
"epoch": 0.35098522167487683,
"grad_norm": 11.072879739046153,
"learning_rate": 9.991785905592149e-06,
"loss": 2.6372945308685303,
"step": 285
},
{
"epoch": 0.3522167487684729,
"grad_norm": 12.835701532770578,
"learning_rate": 9.991370186723363e-06,
"loss": 2.9127607345581055,
"step": 286
},
{
"epoch": 0.35344827586206895,
"grad_norm": 16.621843867679726,
"learning_rate": 9.990944215152166e-06,
"loss": 2.464376926422119,
"step": 287
},
{
"epoch": 0.35467980295566504,
"grad_norm": 9.777456171349527,
"learning_rate": 9.990507991753535e-06,
"loss": 2.8306374549865723,
"step": 288
},
{
"epoch": 0.35591133004926107,
"grad_norm": 11.701262899932036,
"learning_rate": 9.990061517423513e-06,
"loss": 2.9181313514709473,
"step": 289
},
{
"epoch": 0.35714285714285715,
"grad_norm": 12.914380903938605,
"learning_rate": 9.989604793079198e-06,
"loss": 3.1937739849090576,
"step": 290
},
{
"epoch": 0.3583743842364532,
"grad_norm": 25.41280169964493,
"learning_rate": 9.989137819658738e-06,
"loss": 4.190927028656006,
"step": 291
},
{
"epoch": 0.35960591133004927,
"grad_norm": 12.268585179317036,
"learning_rate": 9.988660598121337e-06,
"loss": 2.8343558311462402,
"step": 292
},
{
"epoch": 0.3608374384236453,
"grad_norm": 14.508602864953724,
"learning_rate": 9.988173129447251e-06,
"loss": 3.741821050643921,
"step": 293
},
{
"epoch": 0.3620689655172414,
"grad_norm": 8.935077328629724,
"learning_rate": 9.98767541463778e-06,
"loss": 2.484419345855713,
"step": 294
},
{
"epoch": 0.3633004926108374,
"grad_norm": 8.195009351092525,
"learning_rate": 9.987167454715277e-06,
"loss": 2.671337127685547,
"step": 295
},
{
"epoch": 0.3645320197044335,
"grad_norm": 11.197259917333458,
"learning_rate": 9.986649250723129e-06,
"loss": 3.118803024291992,
"step": 296
},
{
"epoch": 0.3657635467980296,
"grad_norm": 15.270785643435941,
"learning_rate": 9.986120803725776e-06,
"loss": 3.10141658782959,
"step": 297
},
{
"epoch": 0.3669950738916256,
"grad_norm": 11.19651727126236,
"learning_rate": 9.985582114808693e-06,
"loss": 2.7978734970092773,
"step": 298
},
{
"epoch": 0.3682266009852217,
"grad_norm": 14.058148431334251,
"learning_rate": 9.985033185078392e-06,
"loss": 2.5770411491394043,
"step": 299
},
{
"epoch": 0.3694581280788177,
"grad_norm": 9.544840021071943,
"learning_rate": 9.984474015662421e-06,
"loss": 3.0273873805999756,
"step": 300
},
{
"epoch": 0.3706896551724138,
"grad_norm": 8.198220678999139,
"learning_rate": 9.983904607709365e-06,
"loss": 2.9202780723571777,
"step": 301
},
{
"epoch": 0.37192118226600984,
"grad_norm": 12.107800006970532,
"learning_rate": 9.983324962388835e-06,
"loss": 2.9816439151763916,
"step": 302
},
{
"epoch": 0.3731527093596059,
"grad_norm": 7.601271321831279,
"learning_rate": 9.982735080891471e-06,
"loss": 2.5605852603912354,
"step": 303
},
{
"epoch": 0.37438423645320196,
"grad_norm": 13.035543237033318,
"learning_rate": 9.982134964428942e-06,
"loss": 2.9378490447998047,
"step": 304
},
{
"epoch": 0.37561576354679804,
"grad_norm": 7.731680542963359,
"learning_rate": 9.981524614233938e-06,
"loss": 2.410521984100342,
"step": 305
},
{
"epoch": 0.3768472906403941,
"grad_norm": 13.52353943681927,
"learning_rate": 9.98090403156017e-06,
"loss": 2.381927013397217,
"step": 306
},
{
"epoch": 0.37807881773399016,
"grad_norm": 17.35628297309107,
"learning_rate": 9.98027321768237e-06,
"loss": 3.1156816482543945,
"step": 307
},
{
"epoch": 0.3793103448275862,
"grad_norm": 8.977028820084396,
"learning_rate": 9.97963217389628e-06,
"loss": 3.2660152912139893,
"step": 308
},
{
"epoch": 0.3805418719211823,
"grad_norm": 14.66965301106164,
"learning_rate": 9.978980901518663e-06,
"loss": 3.1832613945007324,
"step": 309
},
{
"epoch": 0.3817733990147783,
"grad_norm": 27.78972817701185,
"learning_rate": 9.978319401887287e-06,
"loss": 2.719600200653076,
"step": 310
},
{
"epoch": 0.3830049261083744,
"grad_norm": 10.666579101176065,
"learning_rate": 9.977647676360927e-06,
"loss": 2.652092456817627,
"step": 311
},
{
"epoch": 0.3842364532019704,
"grad_norm": 8.005520537074315,
"learning_rate": 9.976965726319369e-06,
"loss": 2.5932788848876953,
"step": 312
},
{
"epoch": 0.3854679802955665,
"grad_norm": 15.690472287679249,
"learning_rate": 9.976273553163393e-06,
"loss": 2.558863401412964,
"step": 313
},
{
"epoch": 0.3866995073891626,
"grad_norm": 11.958180437694066,
"learning_rate": 9.975571158314783e-06,
"loss": 3.1973023414611816,
"step": 314
},
{
"epoch": 0.3879310344827586,
"grad_norm": 12.749275597057334,
"learning_rate": 9.974858543216319e-06,
"loss": 3.286236524581909,
"step": 315
},
{
"epoch": 0.3891625615763547,
"grad_norm": 16.985399241319477,
"learning_rate": 9.974135709331774e-06,
"loss": 3.5159969329833984,
"step": 316
},
{
"epoch": 0.39039408866995073,
"grad_norm": 10.457440991240187,
"learning_rate": 9.973402658145908e-06,
"loss": 2.647761821746826,
"step": 317
},
{
"epoch": 0.3916256157635468,
"grad_norm": 9.450705495020088,
"learning_rate": 9.972659391164473e-06,
"loss": 2.8499808311462402,
"step": 318
},
{
"epoch": 0.39285714285714285,
"grad_norm": 10.546244474419336,
"learning_rate": 9.971905909914206e-06,
"loss": 2.332852840423584,
"step": 319
},
{
"epoch": 0.39408866995073893,
"grad_norm": 10.2366500934473,
"learning_rate": 9.971142215942817e-06,
"loss": 2.627098560333252,
"step": 320
},
{
"epoch": 0.39532019704433496,
"grad_norm": 6.472838949640434,
"learning_rate": 9.970368310819e-06,
"loss": 2.302323341369629,
"step": 321
},
{
"epoch": 0.39655172413793105,
"grad_norm": 6.421471401290025,
"learning_rate": 9.969584196132427e-06,
"loss": 2.6783509254455566,
"step": 322
},
{
"epoch": 0.3977832512315271,
"grad_norm": 12.353934861805914,
"learning_rate": 9.96878987349373e-06,
"loss": 2.9487061500549316,
"step": 323
},
{
"epoch": 0.39901477832512317,
"grad_norm": 13.993445702154649,
"learning_rate": 9.967985344534521e-06,
"loss": 2.5883233547210693,
"step": 324
},
{
"epoch": 0.4002463054187192,
"grad_norm": 20.380213804590188,
"learning_rate": 9.96717061090737e-06,
"loss": 3.125821590423584,
"step": 325
},
{
"epoch": 0.4014778325123153,
"grad_norm": 6.812077926758059,
"learning_rate": 9.966345674285808e-06,
"loss": 2.829881191253662,
"step": 326
},
{
"epoch": 0.4027093596059113,
"grad_norm": 16.808551579421827,
"learning_rate": 9.965510536364329e-06,
"loss": 2.5988128185272217,
"step": 327
},
{
"epoch": 0.4039408866995074,
"grad_norm": 7.777965739175337,
"learning_rate": 9.964665198858375e-06,
"loss": 2.158940315246582,
"step": 328
},
{
"epoch": 0.4051724137931034,
"grad_norm": 10.632017505369658,
"learning_rate": 9.96380966350434e-06,
"loss": 2.716994285583496,
"step": 329
},
{
"epoch": 0.4064039408866995,
"grad_norm": 12.778378390552197,
"learning_rate": 9.962943932059573e-06,
"loss": 3.1283516883850098,
"step": 330
},
{
"epoch": 0.40763546798029554,
"grad_norm": 12.686658918372668,
"learning_rate": 9.962068006302357e-06,
"loss": 3.0957908630371094,
"step": 331
},
{
"epoch": 0.4088669950738916,
"grad_norm": 24.890731349370103,
"learning_rate": 9.961181888031917e-06,
"loss": 2.3027350902557373,
"step": 332
},
{
"epoch": 0.4100985221674877,
"grad_norm": 10.45514873243925,
"learning_rate": 9.960285579068419e-06,
"loss": 2.956791877746582,
"step": 333
},
{
"epoch": 0.41133004926108374,
"grad_norm": 28.23036034704062,
"learning_rate": 9.959379081252958e-06,
"loss": 2.5689826011657715,
"step": 334
},
{
"epoch": 0.4125615763546798,
"grad_norm": 8.031700376672275,
"learning_rate": 9.958462396447556e-06,
"loss": 3.1086199283599854,
"step": 335
},
{
"epoch": 0.41379310344827586,
"grad_norm": 15.790958589129726,
"learning_rate": 9.957535526535165e-06,
"loss": 3.134901285171509,
"step": 336
},
{
"epoch": 0.41502463054187194,
"grad_norm": 12.433447054233632,
"learning_rate": 9.956598473419652e-06,
"loss": 2.642225742340088,
"step": 337
},
{
"epoch": 0.41625615763546797,
"grad_norm": 9.36121478561991,
"learning_rate": 9.95565123902581e-06,
"loss": 2.828200340270996,
"step": 338
},
{
"epoch": 0.41748768472906406,
"grad_norm": 14.194698913635616,
"learning_rate": 9.954693825299333e-06,
"loss": 2.751354217529297,
"step": 339
},
{
"epoch": 0.4187192118226601,
"grad_norm": 13.475276856352862,
"learning_rate": 9.953726234206835e-06,
"loss": 2.818434715270996,
"step": 340
},
{
"epoch": 0.41995073891625617,
"grad_norm": 14.017642174434487,
"learning_rate": 9.95274846773583e-06,
"loss": 2.8631365299224854,
"step": 341
},
{
"epoch": 0.4211822660098522,
"grad_norm": 37.92442284518435,
"learning_rate": 9.951760527894733e-06,
"loss": 2.387998580932617,
"step": 342
},
{
"epoch": 0.4224137931034483,
"grad_norm": 8.636388354492292,
"learning_rate": 9.950762416712862e-06,
"loss": 2.366614580154419,
"step": 343
},
{
"epoch": 0.4236453201970443,
"grad_norm": 10.06521281831273,
"learning_rate": 9.949754136240416e-06,
"loss": 2.4502060413360596,
"step": 344
},
{
"epoch": 0.4248768472906404,
"grad_norm": 12.481723752818217,
"learning_rate": 9.948735688548496e-06,
"loss": 2.47091007232666,
"step": 345
},
{
"epoch": 0.42610837438423643,
"grad_norm": 8.973793469902368,
"learning_rate": 9.947707075729076e-06,
"loss": 3.0400021076202393,
"step": 346
},
{
"epoch": 0.4273399014778325,
"grad_norm": 10.331950331735893,
"learning_rate": 9.946668299895017e-06,
"loss": 2.622288227081299,
"step": 347
},
{
"epoch": 0.42857142857142855,
"grad_norm": 22.195871941281137,
"learning_rate": 9.945619363180054e-06,
"loss": 3.3773419857025146,
"step": 348
},
{
"epoch": 0.42980295566502463,
"grad_norm": 19.575310687428036,
"learning_rate": 9.944560267738792e-06,
"loss": 3.279005527496338,
"step": 349
},
{
"epoch": 0.43103448275862066,
"grad_norm": 11.204766296525598,
"learning_rate": 9.943491015746704e-06,
"loss": 2.8206255435943604,
"step": 350
},
{
"epoch": 0.43226600985221675,
"grad_norm": 19.31443626404287,
"learning_rate": 9.942411609400127e-06,
"loss": 3.312700033187866,
"step": 351
},
{
"epoch": 0.43349753694581283,
"grad_norm": 12.40959825169754,
"learning_rate": 9.941322050916251e-06,
"loss": 2.580315113067627,
"step": 352
},
{
"epoch": 0.43472906403940886,
"grad_norm": 18.26867922192619,
"learning_rate": 9.940222342533126e-06,
"loss": 2.8339614868164062,
"step": 353
},
{
"epoch": 0.43596059113300495,
"grad_norm": 15.240586085653998,
"learning_rate": 9.939112486509644e-06,
"loss": 2.582752227783203,
"step": 354
},
{
"epoch": 0.437192118226601,
"grad_norm": 14.054810279727889,
"learning_rate": 9.937992485125547e-06,
"loss": 2.9355309009552,
"step": 355
},
{
"epoch": 0.43842364532019706,
"grad_norm": 7.204056413186231,
"learning_rate": 9.936862340681412e-06,
"loss": 2.796612024307251,
"step": 356
},
{
"epoch": 0.4396551724137931,
"grad_norm": 5.797127744814052,
"learning_rate": 9.935722055498655e-06,
"loss": 2.6307716369628906,
"step": 357
},
{
"epoch": 0.4408866995073892,
"grad_norm": 8.742348132173227,
"learning_rate": 9.934571631919518e-06,
"loss": 2.8603620529174805,
"step": 358
},
{
"epoch": 0.4421182266009852,
"grad_norm": 12.186262361276388,
"learning_rate": 9.933411072307071e-06,
"loss": 3.1397266387939453,
"step": 359
},
{
"epoch": 0.4433497536945813,
"grad_norm": 8.973047578523662,
"learning_rate": 9.9322403790452e-06,
"loss": 2.5362772941589355,
"step": 360
},
{
"epoch": 0.4445812807881773,
"grad_norm": 17.982816499460725,
"learning_rate": 9.931059554538613e-06,
"loss": 2.7547712326049805,
"step": 361
},
{
"epoch": 0.4458128078817734,
"grad_norm": 15.389405107024809,
"learning_rate": 9.929868601212822e-06,
"loss": 3.144801139831543,
"step": 362
},
{
"epoch": 0.44704433497536944,
"grad_norm": 16.343273720769005,
"learning_rate": 9.928667521514149e-06,
"loss": 2.600550889968872,
"step": 363
},
{
"epoch": 0.4482758620689655,
"grad_norm": 11.532249256759682,
"learning_rate": 9.927456317909711e-06,
"loss": 2.176116704940796,
"step": 364
},
{
"epoch": 0.44950738916256155,
"grad_norm": 25.088404612293182,
"learning_rate": 9.92623499288743e-06,
"loss": 3.1918365955352783,
"step": 365
},
{
"epoch": 0.45073891625615764,
"grad_norm": 12.864077493891681,
"learning_rate": 9.92500354895601e-06,
"loss": 2.6937577724456787,
"step": 366
},
{
"epoch": 0.45197044334975367,
"grad_norm": 29.27990733585633,
"learning_rate": 9.92376198864494e-06,
"loss": 3.6490774154663086,
"step": 367
},
{
"epoch": 0.45320197044334976,
"grad_norm": 7.620954232577737,
"learning_rate": 9.922510314504493e-06,
"loss": 3.0342392921447754,
"step": 368
},
{
"epoch": 0.4544334975369458,
"grad_norm": 14.562498240608573,
"learning_rate": 9.921248529105716e-06,
"loss": 3.175008773803711,
"step": 369
},
{
"epoch": 0.45566502463054187,
"grad_norm": 9.096092875139751,
"learning_rate": 9.919976635040425e-06,
"loss": 1.9000710248947144,
"step": 370
},
{
"epoch": 0.45689655172413796,
"grad_norm": 19.30965262540543,
"learning_rate": 9.918694634921195e-06,
"loss": 3.5248589515686035,
"step": 371
},
{
"epoch": 0.458128078817734,
"grad_norm": 10.529945298812061,
"learning_rate": 9.91740253138137e-06,
"loss": 2.869842529296875,
"step": 372
},
{
"epoch": 0.45935960591133007,
"grad_norm": 10.698638706211932,
"learning_rate": 9.916100327075038e-06,
"loss": 1.9380724430084229,
"step": 373
},
{
"epoch": 0.4605911330049261,
"grad_norm": 17.707591147238283,
"learning_rate": 9.914788024677039e-06,
"loss": 2.2112460136413574,
"step": 374
},
{
"epoch": 0.4618226600985222,
"grad_norm": 10.065846050311237,
"learning_rate": 9.913465626882954e-06,
"loss": 3.1283068656921387,
"step": 375
},
{
"epoch": 0.4630541871921182,
"grad_norm": 25.33369677490011,
"learning_rate": 9.912133136409103e-06,
"loss": 2.692117929458618,
"step": 376
},
{
"epoch": 0.4642857142857143,
"grad_norm": 57.3231139544447,
"learning_rate": 9.910790555992536e-06,
"loss": 3.047241687774658,
"step": 377
},
{
"epoch": 0.46551724137931033,
"grad_norm": 11.840834448379393,
"learning_rate": 9.909437888391025e-06,
"loss": 3.0103232860565186,
"step": 378
},
{
"epoch": 0.4667487684729064,
"grad_norm": 15.056907160003684,
"learning_rate": 9.908075136383068e-06,
"loss": 2.8296966552734375,
"step": 379
},
{
"epoch": 0.46798029556650245,
"grad_norm": 8.534626696858023,
"learning_rate": 9.906702302767876e-06,
"loss": 2.818819999694824,
"step": 380
},
{
"epoch": 0.46921182266009853,
"grad_norm": 29.849300222390532,
"learning_rate": 9.905319390365364e-06,
"loss": 3.6281867027282715,
"step": 381
},
{
"epoch": 0.47044334975369456,
"grad_norm": 17.161390821083423,
"learning_rate": 9.903926402016153e-06,
"loss": 2.7123236656188965,
"step": 382
},
{
"epoch": 0.47167487684729065,
"grad_norm": 13.097065098778378,
"learning_rate": 9.902523340581562e-06,
"loss": 2.69736909866333,
"step": 383
},
{
"epoch": 0.4729064039408867,
"grad_norm": 11.269340257234004,
"learning_rate": 9.901110208943599e-06,
"loss": 3.088184118270874,
"step": 384
},
{
"epoch": 0.47413793103448276,
"grad_norm": 6.6950707947616745,
"learning_rate": 9.899687010004956e-06,
"loss": 2.606736183166504,
"step": 385
},
{
"epoch": 0.4753694581280788,
"grad_norm": 10.297903581299613,
"learning_rate": 9.898253746689007e-06,
"loss": 2.684105157852173,
"step": 386
},
{
"epoch": 0.4766009852216749,
"grad_norm": 15.82478266058562,
"learning_rate": 9.896810421939797e-06,
"loss": 2.8739280700683594,
"step": 387
},
{
"epoch": 0.47783251231527096,
"grad_norm": 8.284309924074774,
"learning_rate": 9.895357038722043e-06,
"loss": 2.835542917251587,
"step": 388
},
{
"epoch": 0.479064039408867,
"grad_norm": 15.854123121769446,
"learning_rate": 9.893893600021112e-06,
"loss": 2.855287551879883,
"step": 389
},
{
"epoch": 0.4802955665024631,
"grad_norm": 7.88725535997062,
"learning_rate": 9.892420108843038e-06,
"loss": 2.8026838302612305,
"step": 390
},
{
"epoch": 0.4815270935960591,
"grad_norm": 11.000709518913423,
"learning_rate": 9.890936568214493e-06,
"loss": 3.1150124073028564,
"step": 391
},
{
"epoch": 0.4827586206896552,
"grad_norm": 13.588584372243895,
"learning_rate": 9.889442981182802e-06,
"loss": 2.578108072280884,
"step": 392
},
{
"epoch": 0.4839901477832512,
"grad_norm": 16.34748858179715,
"learning_rate": 9.88793935081592e-06,
"loss": 2.7470006942749023,
"step": 393
},
{
"epoch": 0.4852216748768473,
"grad_norm": 10.809579161505546,
"learning_rate": 9.88642568020243e-06,
"loss": 2.9015283584594727,
"step": 394
},
{
"epoch": 0.48645320197044334,
"grad_norm": 13.55439142286002,
"learning_rate": 9.884901972451542e-06,
"loss": 3.79250431060791,
"step": 395
},
{
"epoch": 0.4876847290640394,
"grad_norm": 8.909988613184693,
"learning_rate": 9.883368230693082e-06,
"loss": 3.0748767852783203,
"step": 396
},
{
"epoch": 0.48891625615763545,
"grad_norm": 13.412610776910293,
"learning_rate": 9.881824458077491e-06,
"loss": 2.822726011276245,
"step": 397
},
{
"epoch": 0.49014778325123154,
"grad_norm": 11.426335338698937,
"learning_rate": 9.880270657775806e-06,
"loss": 2.7966151237487793,
"step": 398
},
{
"epoch": 0.49137931034482757,
"grad_norm": 10.55324948832395,
"learning_rate": 9.878706832979668e-06,
"loss": 2.8517651557922363,
"step": 399
},
{
"epoch": 0.49261083743842365,
"grad_norm": 11.070058186972197,
"learning_rate": 9.877132986901306e-06,
"loss": 2.7754080295562744,
"step": 400
},
{
"epoch": 0.4938423645320197,
"grad_norm": 8.886322673700336,
"learning_rate": 9.875549122773536e-06,
"loss": 2.9478702545166016,
"step": 401
},
{
"epoch": 0.49507389162561577,
"grad_norm": 9.759021404672636,
"learning_rate": 9.87395524384975e-06,
"loss": 2.9535412788391113,
"step": 402
},
{
"epoch": 0.4963054187192118,
"grad_norm": 22.265516010081125,
"learning_rate": 9.872351353403912e-06,
"loss": 3.415161609649658,
"step": 403
},
{
"epoch": 0.4975369458128079,
"grad_norm": 10.3371436402533,
"learning_rate": 9.870737454730552e-06,
"loss": 2.573082447052002,
"step": 404
},
{
"epoch": 0.4987684729064039,
"grad_norm": 14.615736501967937,
"learning_rate": 9.869113551144754e-06,
"loss": 2.4743850231170654,
"step": 405
},
{
"epoch": 0.5,
"grad_norm": 10.275697391044838,
"learning_rate": 9.867479645982158e-06,
"loss": 2.6644279956817627,
"step": 406
},
{
"epoch": 0.5012315270935961,
"grad_norm": 7.731558128938727,
"learning_rate": 9.865835742598942e-06,
"loss": 2.7798032760620117,
"step": 407
},
{
"epoch": 0.5024630541871922,
"grad_norm": 28.59542346400597,
"learning_rate": 9.864181844371828e-06,
"loss": 3.939884662628174,
"step": 408
},
{
"epoch": 0.5036945812807881,
"grad_norm": 21.07739414791098,
"learning_rate": 9.86251795469806e-06,
"loss": 2.8093104362487793,
"step": 409
},
{
"epoch": 0.5049261083743842,
"grad_norm": 8.961555424981583,
"learning_rate": 9.860844076995416e-06,
"loss": 2.1494715213775635,
"step": 410
},
{
"epoch": 0.5061576354679803,
"grad_norm": 21.200756727942377,
"learning_rate": 9.85916021470218e-06,
"loss": 2.964136838912964,
"step": 411
},
{
"epoch": 0.5073891625615764,
"grad_norm": 11.020672835034468,
"learning_rate": 9.857466371277152e-06,
"loss": 2.641287088394165,
"step": 412
},
{
"epoch": 0.5086206896551724,
"grad_norm": 9.8391871787113,
"learning_rate": 9.85576255019963e-06,
"loss": 2.454512357711792,
"step": 413
},
{
"epoch": 0.5098522167487685,
"grad_norm": 9.302782088404763,
"learning_rate": 9.85404875496941e-06,
"loss": 2.4566071033477783,
"step": 414
},
{
"epoch": 0.5110837438423645,
"grad_norm": 12.209048739605382,
"learning_rate": 9.852324989106772e-06,
"loss": 2.7254204750061035,
"step": 415
},
{
"epoch": 0.5123152709359606,
"grad_norm": 17.193015982984093,
"learning_rate": 9.850591256152483e-06,
"loss": 2.743382215499878,
"step": 416
},
{
"epoch": 0.5135467980295566,
"grad_norm": 31.54989094640885,
"learning_rate": 9.848847559667774e-06,
"loss": 3.376046657562256,
"step": 417
},
{
"epoch": 0.5147783251231527,
"grad_norm": 11.734812553622533,
"learning_rate": 9.847093903234351e-06,
"loss": 2.73980975151062,
"step": 418
},
{
"epoch": 0.5160098522167488,
"grad_norm": 8.164256099521083,
"learning_rate": 9.845330290454373e-06,
"loss": 2.7565903663635254,
"step": 419
},
{
"epoch": 0.5172413793103449,
"grad_norm": 9.178438912949575,
"learning_rate": 9.843556724950454e-06,
"loss": 2.9061315059661865,
"step": 420
},
{
"epoch": 0.5184729064039408,
"grad_norm": 18.23493245534027,
"learning_rate": 9.841773210365646e-06,
"loss": 3.1584839820861816,
"step": 421
},
{
"epoch": 0.5197044334975369,
"grad_norm": 13.406138718704618,
"learning_rate": 9.839979750363443e-06,
"loss": 3.300762176513672,
"step": 422
},
{
"epoch": 0.520935960591133,
"grad_norm": 16.907140017416133,
"learning_rate": 9.838176348627768e-06,
"loss": 2.5202269554138184,
"step": 423
},
{
"epoch": 0.5221674876847291,
"grad_norm": 14.800436222535966,
"learning_rate": 9.83636300886296e-06,
"loss": 3.9240634441375732,
"step": 424
},
{
"epoch": 0.5233990147783252,
"grad_norm": 13.058319822050642,
"learning_rate": 9.834539734793774e-06,
"loss": 3.1783556938171387,
"step": 425
},
{
"epoch": 0.5246305418719212,
"grad_norm": 9.577210971277129,
"learning_rate": 9.832706530165372e-06,
"loss": 2.787106513977051,
"step": 426
},
{
"epoch": 0.5258620689655172,
"grad_norm": 17.432663310497652,
"learning_rate": 9.830863398743313e-06,
"loss": 3.270280599594116,
"step": 427
},
{
"epoch": 0.5270935960591133,
"grad_norm": 13.065514198679326,
"learning_rate": 9.829010344313548e-06,
"loss": 3.0135059356689453,
"step": 428
},
{
"epoch": 0.5283251231527094,
"grad_norm": 12.9248393025633,
"learning_rate": 9.82714737068241e-06,
"loss": 2.989795207977295,
"step": 429
},
{
"epoch": 0.5295566502463054,
"grad_norm": 15.64315185844485,
"learning_rate": 9.825274481676605e-06,
"loss": 2.5208187103271484,
"step": 430
},
{
"epoch": 0.5307881773399015,
"grad_norm": 11.452591471364267,
"learning_rate": 9.82339168114321e-06,
"loss": 3.1890928745269775,
"step": 431
},
{
"epoch": 0.5320197044334976,
"grad_norm": 11.650610381993676,
"learning_rate": 9.821498972949657e-06,
"loss": 3.0655789375305176,
"step": 432
},
{
"epoch": 0.5332512315270936,
"grad_norm": 7.7840344730355335,
"learning_rate": 9.81959636098373e-06,
"loss": 2.611284017562866,
"step": 433
},
{
"epoch": 0.5344827586206896,
"grad_norm": 8.93478095027874,
"learning_rate": 9.817683849153561e-06,
"loss": 2.863576889038086,
"step": 434
},
{
"epoch": 0.5357142857142857,
"grad_norm": 10.52062689285789,
"learning_rate": 9.815761441387609e-06,
"loss": 2.6186623573303223,
"step": 435
},
{
"epoch": 0.5369458128078818,
"grad_norm": 6.68274047677578,
"learning_rate": 9.813829141634666e-06,
"loss": 1.3848458528518677,
"step": 436
},
{
"epoch": 0.5381773399014779,
"grad_norm": 9.593848866659638,
"learning_rate": 9.811886953863841e-06,
"loss": 3.00791597366333,
"step": 437
},
{
"epoch": 0.5394088669950738,
"grad_norm": 7.8032629730941565,
"learning_rate": 9.809934882064555e-06,
"loss": 2.8431854248046875,
"step": 438
},
{
"epoch": 0.5406403940886699,
"grad_norm": 10.324361743530943,
"learning_rate": 9.807972930246531e-06,
"loss": 2.3595449924468994,
"step": 439
},
{
"epoch": 0.541871921182266,
"grad_norm": 15.306323140698186,
"learning_rate": 9.806001102439789e-06,
"loss": 2.55434250831604,
"step": 440
},
{
"epoch": 0.5431034482758621,
"grad_norm": 23.37582741202724,
"learning_rate": 9.804019402694627e-06,
"loss": 2.4509990215301514,
"step": 441
},
{
"epoch": 0.5443349753694581,
"grad_norm": 9.38267743442567,
"learning_rate": 9.802027835081628e-06,
"loss": 2.825401782989502,
"step": 442
},
{
"epoch": 0.5455665024630542,
"grad_norm": 10.449224530160473,
"learning_rate": 9.800026403691643e-06,
"loss": 2.7315573692321777,
"step": 443
},
{
"epoch": 0.5467980295566502,
"grad_norm": 22.900410887080454,
"learning_rate": 9.798015112635786e-06,
"loss": 3.1359333992004395,
"step": 444
},
{
"epoch": 0.5480295566502463,
"grad_norm": 9.839888483337905,
"learning_rate": 9.795993966045418e-06,
"loss": 3.2884740829467773,
"step": 445
},
{
"epoch": 0.5492610837438424,
"grad_norm": 9.35231433219537,
"learning_rate": 9.793962968072149e-06,
"loss": 2.8281359672546387,
"step": 446
},
{
"epoch": 0.5504926108374384,
"grad_norm": 6.698793862232108,
"learning_rate": 9.791922122887823e-06,
"loss": 2.633974313735962,
"step": 447
},
{
"epoch": 0.5517241379310345,
"grad_norm": 8.317360049933578,
"learning_rate": 9.78987143468451e-06,
"loss": 2.1651690006256104,
"step": 448
},
{
"epoch": 0.5529556650246306,
"grad_norm": 11.511312923842238,
"learning_rate": 9.7878109076745e-06,
"loss": 3.011908531188965,
"step": 449
},
{
"epoch": 0.5541871921182266,
"grad_norm": 15.627130212627556,
"learning_rate": 9.785740546090293e-06,
"loss": 3.121683359146118,
"step": 450
},
{
"epoch": 0.5554187192118226,
"grad_norm": 14.263261857694998,
"learning_rate": 9.783660354184589e-06,
"loss": 2.9901375770568848,
"step": 451
},
{
"epoch": 0.5566502463054187,
"grad_norm": 15.230602091833177,
"learning_rate": 9.78157033623028e-06,
"loss": 3.1121528148651123,
"step": 452
},
{
"epoch": 0.5578817733990148,
"grad_norm": 22.32110731618789,
"learning_rate": 9.779470496520442e-06,
"loss": 2.9811508655548096,
"step": 453
},
{
"epoch": 0.5591133004926109,
"grad_norm": 11.801131103021726,
"learning_rate": 9.777360839368327e-06,
"loss": 2.8219947814941406,
"step": 454
},
{
"epoch": 0.5603448275862069,
"grad_norm": 10.166506753796495,
"learning_rate": 9.77524136910735e-06,
"loss": 2.870987892150879,
"step": 455
},
{
"epoch": 0.5615763546798029,
"grad_norm": 9.413959781223877,
"learning_rate": 9.773112090091084e-06,
"loss": 3.1902365684509277,
"step": 456
},
{
"epoch": 0.562807881773399,
"grad_norm": 12.723571043561764,
"learning_rate": 9.770973006693256e-06,
"loss": 3.3052220344543457,
"step": 457
},
{
"epoch": 0.5640394088669951,
"grad_norm": 14.337077670753716,
"learning_rate": 9.76882412330772e-06,
"loss": 2.3376049995422363,
"step": 458
},
{
"epoch": 0.5652709359605911,
"grad_norm": 10.245935627064924,
"learning_rate": 9.766665444348472e-06,
"loss": 2.8364970684051514,
"step": 459
},
{
"epoch": 0.5665024630541872,
"grad_norm": 18.308636912090915,
"learning_rate": 9.76449697424962e-06,
"loss": 2.582505702972412,
"step": 460
},
{
"epoch": 0.5677339901477833,
"grad_norm": 8.927255205757533,
"learning_rate": 9.76231871746539e-06,
"loss": 2.485147476196289,
"step": 461
},
{
"epoch": 0.5689655172413793,
"grad_norm": 11.356171958036413,
"learning_rate": 9.760130678470106e-06,
"loss": 3.0910027027130127,
"step": 462
},
{
"epoch": 0.5701970443349754,
"grad_norm": 10.937354765360512,
"learning_rate": 9.757932861758188e-06,
"loss": 3.3621506690979004,
"step": 463
},
{
"epoch": 0.5714285714285714,
"grad_norm": 11.222097055926637,
"learning_rate": 9.755725271844142e-06,
"loss": 2.8310019969940186,
"step": 464
},
{
"epoch": 0.5726600985221675,
"grad_norm": 30.527175863167063,
"learning_rate": 9.753507913262548e-06,
"loss": 2.797703742980957,
"step": 465
},
{
"epoch": 0.5738916256157636,
"grad_norm": 15.045285480872131,
"learning_rate": 9.751280790568047e-06,
"loss": 2.6609878540039062,
"step": 466
},
{
"epoch": 0.5751231527093597,
"grad_norm": 13.871081363987201,
"learning_rate": 9.749043908335343e-06,
"loss": 2.778043508529663,
"step": 467
},
{
"epoch": 0.5763546798029556,
"grad_norm": 13.771545893500338,
"learning_rate": 9.74679727115918e-06,
"loss": 2.8315014839172363,
"step": 468
},
{
"epoch": 0.5775862068965517,
"grad_norm": 19.916341772532764,
"learning_rate": 9.744540883654348e-06,
"loss": 3.3902840614318848,
"step": 469
},
{
"epoch": 0.5788177339901478,
"grad_norm": 22.648986055714484,
"learning_rate": 9.742274750455659e-06,
"loss": 3.53080153465271,
"step": 470
},
{
"epoch": 0.5800492610837439,
"grad_norm": 23.493391135041467,
"learning_rate": 9.739998876217943e-06,
"loss": 2.270110845565796,
"step": 471
},
{
"epoch": 0.5812807881773399,
"grad_norm": 12.049204240060057,
"learning_rate": 9.737713265616043e-06,
"loss": 2.7059872150421143,
"step": 472
},
{
"epoch": 0.5825123152709359,
"grad_norm": 20.2953123538445,
"learning_rate": 9.735417923344798e-06,
"loss": 4.328514575958252,
"step": 473
},
{
"epoch": 0.583743842364532,
"grad_norm": 14.790979425207205,
"learning_rate": 9.73311285411904e-06,
"loss": 3.2155938148498535,
"step": 474
},
{
"epoch": 0.5849753694581281,
"grad_norm": 35.79655633932577,
"learning_rate": 9.730798062673575e-06,
"loss": 2.277022361755371,
"step": 475
},
{
"epoch": 0.5862068965517241,
"grad_norm": 10.760493401180613,
"learning_rate": 9.728473553763186e-06,
"loss": 2.794111490249634,
"step": 476
},
{
"epoch": 0.5874384236453202,
"grad_norm": 7.877057642797786,
"learning_rate": 9.726139332162613e-06,
"loss": 3.00388765335083,
"step": 477
},
{
"epoch": 0.5886699507389163,
"grad_norm": 10.974644270731439,
"learning_rate": 9.723795402666549e-06,
"loss": 2.5355563163757324,
"step": 478
},
{
"epoch": 0.5899014778325123,
"grad_norm": 22.285874447386394,
"learning_rate": 9.721441770089621e-06,
"loss": 3.2441415786743164,
"step": 479
},
{
"epoch": 0.5911330049261084,
"grad_norm": 13.333764613863938,
"learning_rate": 9.719078439266399e-06,
"loss": 2.826803207397461,
"step": 480
},
{
"epoch": 0.5923645320197044,
"grad_norm": 6.843940415955184,
"learning_rate": 9.716705415051362e-06,
"loss": 2.5396804809570312,
"step": 481
},
{
"epoch": 0.5935960591133005,
"grad_norm": 23.860174795633608,
"learning_rate": 9.714322702318908e-06,
"loss": 2.85546875,
"step": 482
},
{
"epoch": 0.5948275862068966,
"grad_norm": 12.255473790019064,
"learning_rate": 9.711930305963333e-06,
"loss": 3.217014789581299,
"step": 483
},
{
"epoch": 0.5960591133004927,
"grad_norm": 8.15967079186392,
"learning_rate": 9.70952823089882e-06,
"loss": 2.781094551086426,
"step": 484
},
{
"epoch": 0.5972906403940886,
"grad_norm": 11.942750739396006,
"learning_rate": 9.707116482059447e-06,
"loss": 2.617154121398926,
"step": 485
},
{
"epoch": 0.5985221674876847,
"grad_norm": 15.243819163950327,
"learning_rate": 9.704695064399143e-06,
"loss": 2.601886510848999,
"step": 486
},
{
"epoch": 0.5997536945812808,
"grad_norm": 27.321867153996244,
"learning_rate": 9.702263982891712e-06,
"loss": 2.9616146087646484,
"step": 487
},
{
"epoch": 0.6009852216748769,
"grad_norm": 9.511966390540264,
"learning_rate": 9.699823242530803e-06,
"loss": 2.8881943225860596,
"step": 488
},
{
"epoch": 0.6022167487684729,
"grad_norm": 9.673073669047454,
"learning_rate": 9.697372848329905e-06,
"loss": 2.6718311309814453,
"step": 489
},
{
"epoch": 0.603448275862069,
"grad_norm": 12.946431548834504,
"learning_rate": 9.69491280532234e-06,
"loss": 2.959104537963867,
"step": 490
},
{
"epoch": 0.604679802955665,
"grad_norm": 13.919071872066077,
"learning_rate": 9.692443118561248e-06,
"loss": 2.085991621017456,
"step": 491
},
{
"epoch": 0.6059113300492611,
"grad_norm": 168.7126461149896,
"learning_rate": 9.689963793119574e-06,
"loss": 4.498569488525391,
"step": 492
},
{
"epoch": 0.6071428571428571,
"grad_norm": 12.118400731206464,
"learning_rate": 9.68747483409007e-06,
"loss": 2.7837424278259277,
"step": 493
},
{
"epoch": 0.6083743842364532,
"grad_norm": 14.436749099341482,
"learning_rate": 9.684976246585264e-06,
"loss": 2.637524366378784,
"step": 494
},
{
"epoch": 0.6096059113300493,
"grad_norm": 12.923969042105849,
"learning_rate": 9.682468035737475e-06,
"loss": 2.765727996826172,
"step": 495
},
{
"epoch": 0.6108374384236454,
"grad_norm": 12.957696638033102,
"learning_rate": 9.679950206698782e-06,
"loss": 2.825129270553589,
"step": 496
},
{
"epoch": 0.6120689655172413,
"grad_norm": 12.328586386653942,
"learning_rate": 9.677422764641021e-06,
"loss": 2.733224630355835,
"step": 497
},
{
"epoch": 0.6133004926108374,
"grad_norm": 10.367355913707218,
"learning_rate": 9.674885714755773e-06,
"loss": 3.6287670135498047,
"step": 498
},
{
"epoch": 0.6145320197044335,
"grad_norm": 8.212604152981882,
"learning_rate": 9.672339062254359e-06,
"loss": 2.38788104057312,
"step": 499
},
{
"epoch": 0.6157635467980296,
"grad_norm": 13.545719741820621,
"learning_rate": 9.66978281236782e-06,
"loss": 2.942269802093506,
"step": 500
},
{
"epoch": 0.6169950738916257,
"grad_norm": 12.748449735511594,
"learning_rate": 9.667216970346916e-06,
"loss": 2.4100990295410156,
"step": 501
},
{
"epoch": 0.6182266009852216,
"grad_norm": 15.669540249604715,
"learning_rate": 9.6646415414621e-06,
"loss": 2.3959155082702637,
"step": 502
},
{
"epoch": 0.6194581280788177,
"grad_norm": 7.949797631449559,
"learning_rate": 9.662056531003528e-06,
"loss": 2.93027925491333,
"step": 503
},
{
"epoch": 0.6206896551724138,
"grad_norm": 10.116460165226645,
"learning_rate": 9.659461944281035e-06,
"loss": 3.164715528488159,
"step": 504
},
{
"epoch": 0.6219211822660099,
"grad_norm": 16.218136964088803,
"learning_rate": 9.656857786624119e-06,
"loss": 2.634587287902832,
"step": 505
},
{
"epoch": 0.6231527093596059,
"grad_norm": 10.922060482445831,
"learning_rate": 9.654244063381948e-06,
"loss": 3.5667788982391357,
"step": 506
},
{
"epoch": 0.624384236453202,
"grad_norm": 8.542161812174806,
"learning_rate": 9.651620779923332e-06,
"loss": 2.9383740425109863,
"step": 507
},
{
"epoch": 0.625615763546798,
"grad_norm": 10.347829866523263,
"learning_rate": 9.648987941636719e-06,
"loss": 2.7658987045288086,
"step": 508
},
{
"epoch": 0.6268472906403941,
"grad_norm": 8.548905747003822,
"learning_rate": 9.646345553930187e-06,
"loss": 3.3089890480041504,
"step": 509
},
{
"epoch": 0.6280788177339901,
"grad_norm": 6.487031716645425,
"learning_rate": 9.643693622231426e-06,
"loss": 2.6208066940307617,
"step": 510
},
{
"epoch": 0.6293103448275862,
"grad_norm": 8.110412464341984,
"learning_rate": 9.64103215198773e-06,
"loss": 2.7099995613098145,
"step": 511
},
{
"epoch": 0.6305418719211823,
"grad_norm": 14.245396567085763,
"learning_rate": 9.638361148665989e-06,
"loss": 2.894531488418579,
"step": 512
},
{
"epoch": 0.6317733990147784,
"grad_norm": 11.657856176430656,
"learning_rate": 9.63568061775267e-06,
"loss": 3.1289191246032715,
"step": 513
},
{
"epoch": 0.6330049261083743,
"grad_norm": 14.82098703249081,
"learning_rate": 9.632990564753817e-06,
"loss": 2.954707145690918,
"step": 514
},
{
"epoch": 0.6342364532019704,
"grad_norm": 6.808305322372754,
"learning_rate": 9.630290995195028e-06,
"loss": 2.93411922454834,
"step": 515
},
{
"epoch": 0.6354679802955665,
"grad_norm": 7.276364027378903,
"learning_rate": 9.62758191462145e-06,
"loss": 2.637021541595459,
"step": 516
},
{
"epoch": 0.6366995073891626,
"grad_norm": 13.898029887698447,
"learning_rate": 9.624863328597767e-06,
"loss": 3.020066261291504,
"step": 517
},
{
"epoch": 0.6379310344827587,
"grad_norm": 24.08793299798331,
"learning_rate": 9.622135242708188e-06,
"loss": 2.5983335971832275,
"step": 518
},
{
"epoch": 0.6391625615763546,
"grad_norm": 13.609628946959008,
"learning_rate": 9.619397662556434e-06,
"loss": 2.714207410812378,
"step": 519
},
{
"epoch": 0.6403940886699507,
"grad_norm": 8.67874834351866,
"learning_rate": 9.616650593765733e-06,
"loss": 2.8505520820617676,
"step": 520
},
{
"epoch": 0.6416256157635468,
"grad_norm": 8.300798802306481,
"learning_rate": 9.613894041978795e-06,
"loss": 2.8081271648406982,
"step": 521
},
{
"epoch": 0.6428571428571429,
"grad_norm": 10.020203888067801,
"learning_rate": 9.611128012857818e-06,
"loss": 3.106411933898926,
"step": 522
},
{
"epoch": 0.6440886699507389,
"grad_norm": 9.32846194404547,
"learning_rate": 9.60835251208446e-06,
"loss": 3.087594985961914,
"step": 523
},
{
"epoch": 0.645320197044335,
"grad_norm": 15.30312860694116,
"learning_rate": 9.60556754535984e-06,
"loss": 2.7104361057281494,
"step": 524
},
{
"epoch": 0.646551724137931,
"grad_norm": 14.847900307580543,
"learning_rate": 9.602773118404518e-06,
"loss": 2.8562324047088623,
"step": 525
},
{
"epoch": 0.6477832512315271,
"grad_norm": 8.874728218475076,
"learning_rate": 9.599969236958485e-06,
"loss": 3.282554864883423,
"step": 526
},
{
"epoch": 0.6490147783251231,
"grad_norm": 8.797844640723032,
"learning_rate": 9.597155906781154e-06,
"loss": 2.623101234436035,
"step": 527
},
{
"epoch": 0.6502463054187192,
"grad_norm": 9.863712955626877,
"learning_rate": 9.59433313365135e-06,
"loss": 2.889674663543701,
"step": 528
},
{
"epoch": 0.6514778325123153,
"grad_norm": 10.895399946836921,
"learning_rate": 9.591500923367287e-06,
"loss": 2.787289619445801,
"step": 529
},
{
"epoch": 0.6527093596059114,
"grad_norm": 10.227588231836696,
"learning_rate": 9.58865928174657e-06,
"loss": 2.879824161529541,
"step": 530
},
{
"epoch": 0.6539408866995073,
"grad_norm": 8.869590002729453,
"learning_rate": 9.585808214626173e-06,
"loss": 2.967193126678467,
"step": 531
},
{
"epoch": 0.6551724137931034,
"grad_norm": 8.822784237769133,
"learning_rate": 9.582947727862433e-06,
"loss": 3.1004772186279297,
"step": 532
},
{
"epoch": 0.6564039408866995,
"grad_norm": 13.346747444504954,
"learning_rate": 9.580077827331038e-06,
"loss": 2.69935941696167,
"step": 533
},
{
"epoch": 0.6576354679802956,
"grad_norm": 13.781647523739567,
"learning_rate": 9.577198518927005e-06,
"loss": 3.2806637287139893,
"step": 534
},
{
"epoch": 0.6588669950738916,
"grad_norm": 17.336818625260154,
"learning_rate": 9.574309808564682e-06,
"loss": 3.050356149673462,
"step": 535
},
{
"epoch": 0.6600985221674877,
"grad_norm": 9.311777076008125,
"learning_rate": 9.57141170217773e-06,
"loss": 2.8415322303771973,
"step": 536
},
{
"epoch": 0.6613300492610837,
"grad_norm": 12.410317292425518,
"learning_rate": 9.568504205719106e-06,
"loss": 2.5309085845947266,
"step": 537
},
{
"epoch": 0.6625615763546798,
"grad_norm": 15.225443304522335,
"learning_rate": 9.565587325161056e-06,
"loss": 3.5695877075195312,
"step": 538
},
{
"epoch": 0.6637931034482759,
"grad_norm": 9.562550097283651,
"learning_rate": 9.562661066495108e-06,
"loss": 2.7938594818115234,
"step": 539
},
{
"epoch": 0.6650246305418719,
"grad_norm": 8.825138850911314,
"learning_rate": 9.559725435732042e-06,
"loss": 2.8548948764801025,
"step": 540
},
{
"epoch": 0.666256157635468,
"grad_norm": 10.262300101456184,
"learning_rate": 9.556780438901899e-06,
"loss": 3.054051399230957,
"step": 541
},
{
"epoch": 0.6674876847290641,
"grad_norm": 26.545357662435233,
"learning_rate": 9.553826082053951e-06,
"loss": 3.566359281539917,
"step": 542
},
{
"epoch": 0.6687192118226601,
"grad_norm": 12.751257760928588,
"learning_rate": 9.550862371256705e-06,
"loss": 2.8619909286499023,
"step": 543
},
{
"epoch": 0.6699507389162561,
"grad_norm": 14.522375958962538,
"learning_rate": 9.547889312597877e-06,
"loss": 3.0177836418151855,
"step": 544
},
{
"epoch": 0.6711822660098522,
"grad_norm": 21.356139863129055,
"learning_rate": 9.544906912184383e-06,
"loss": 1.9943304061889648,
"step": 545
},
{
"epoch": 0.6724137931034483,
"grad_norm": 5.562548029921876,
"learning_rate": 9.541915176142326e-06,
"loss": 2.650038957595825,
"step": 546
},
{
"epoch": 0.6736453201970444,
"grad_norm": 12.716408540810125,
"learning_rate": 9.538914110616995e-06,
"loss": 2.826953411102295,
"step": 547
},
{
"epoch": 0.6748768472906403,
"grad_norm": 9.963475586190201,
"learning_rate": 9.53590372177283e-06,
"loss": 2.770202159881592,
"step": 548
},
{
"epoch": 0.6761083743842364,
"grad_norm": 32.875675817649174,
"learning_rate": 9.532884015793432e-06,
"loss": 2.0859670639038086,
"step": 549
},
{
"epoch": 0.6773399014778325,
"grad_norm": 11.983581363761447,
"learning_rate": 9.529854998881534e-06,
"loss": 2.7557499408721924,
"step": 550
},
{
"epoch": 0.6785714285714286,
"grad_norm": 13.15410482971192,
"learning_rate": 9.526816677258995e-06,
"loss": 2.710692882537842,
"step": 551
},
{
"epoch": 0.6798029556650246,
"grad_norm": 9.416519545873685,
"learning_rate": 9.523769057166791e-06,
"loss": 3.055102825164795,
"step": 552
},
{
"epoch": 0.6810344827586207,
"grad_norm": 11.60625904359093,
"learning_rate": 9.520712144864997e-06,
"loss": 2.606031894683838,
"step": 553
},
{
"epoch": 0.6822660098522167,
"grad_norm": 12.067258837088112,
"learning_rate": 9.517645946632766e-06,
"loss": 2.9099555015563965,
"step": 554
},
{
"epoch": 0.6834975369458128,
"grad_norm": 10.888483887311708,
"learning_rate": 9.514570468768338e-06,
"loss": 2.7148189544677734,
"step": 555
},
{
"epoch": 0.6847290640394089,
"grad_norm": 15.652077873544759,
"learning_rate": 9.511485717589006e-06,
"loss": 2.528857707977295,
"step": 556
},
{
"epoch": 0.6859605911330049,
"grad_norm": 12.750166049911234,
"learning_rate": 9.508391699431114e-06,
"loss": 2.814006805419922,
"step": 557
},
{
"epoch": 0.687192118226601,
"grad_norm": 12.187355034460829,
"learning_rate": 9.50528842065004e-06,
"loss": 3.3046352863311768,
"step": 558
},
{
"epoch": 0.6884236453201971,
"grad_norm": 12.182964964248615,
"learning_rate": 9.502175887620188e-06,
"loss": 3.1519320011138916,
"step": 559
},
{
"epoch": 0.6896551724137931,
"grad_norm": 26.00958255437091,
"learning_rate": 9.499054106734963e-06,
"loss": 2.2819509506225586,
"step": 560
},
{
"epoch": 0.6908866995073891,
"grad_norm": 10.437408285902773,
"learning_rate": 9.495923084406773e-06,
"loss": 2.7894287109375,
"step": 561
},
{
"epoch": 0.6921182266009852,
"grad_norm": 27.469926449959043,
"learning_rate": 9.492782827067006e-06,
"loss": 3.233968734741211,
"step": 562
},
{
"epoch": 0.6933497536945813,
"grad_norm": 19.246363086379436,
"learning_rate": 9.48963334116602e-06,
"loss": 2.594421863555908,
"step": 563
},
{
"epoch": 0.6945812807881774,
"grad_norm": 11.788384104886402,
"learning_rate": 9.486474633173129e-06,
"loss": 3.181318759918213,
"step": 564
},
{
"epoch": 0.6958128078817734,
"grad_norm": 10.754721829366346,
"learning_rate": 9.48330670957659e-06,
"loss": 3.2115392684936523,
"step": 565
},
{
"epoch": 0.6970443349753694,
"grad_norm": 12.089226690676854,
"learning_rate": 9.480129576883592e-06,
"loss": 2.408634901046753,
"step": 566
},
{
"epoch": 0.6982758620689655,
"grad_norm": 13.370163003636199,
"learning_rate": 9.476943241620233e-06,
"loss": 2.9304041862487793,
"step": 567
},
{
"epoch": 0.6995073891625616,
"grad_norm": 23.52604617683973,
"learning_rate": 9.473747710331524e-06,
"loss": 2.75127911567688,
"step": 568
},
{
"epoch": 0.7007389162561576,
"grad_norm": 33.407245089515435,
"learning_rate": 9.470542989581357e-06,
"loss": 3.3793530464172363,
"step": 569
},
{
"epoch": 0.7019704433497537,
"grad_norm": 8.494714152681327,
"learning_rate": 9.467329085952505e-06,
"loss": 3.001579999923706,
"step": 570
},
{
"epoch": 0.7032019704433498,
"grad_norm": 12.457476112208125,
"learning_rate": 9.464106006046602e-06,
"loss": 2.063443422317505,
"step": 571
},
{
"epoch": 0.7044334975369458,
"grad_norm": 11.893453239405563,
"learning_rate": 9.460873756484128e-06,
"loss": 3.079399585723877,
"step": 572
},
{
"epoch": 0.7056650246305419,
"grad_norm": 17.600286095390665,
"learning_rate": 9.457632343904404e-06,
"loss": 2.6499621868133545,
"step": 573
},
{
"epoch": 0.7068965517241379,
"grad_norm": 11.052824766544509,
"learning_rate": 9.454381774965567e-06,
"loss": 2.848517656326294,
"step": 574
},
{
"epoch": 0.708128078817734,
"grad_norm": 11.779141171142625,
"learning_rate": 9.451122056344564e-06,
"loss": 2.936286687850952,
"step": 575
},
{
"epoch": 0.7093596059113301,
"grad_norm": 12.447965784800195,
"learning_rate": 9.44785319473714e-06,
"loss": 2.315443515777588,
"step": 576
},
{
"epoch": 0.7105911330049262,
"grad_norm": 13.488894073216153,
"learning_rate": 9.444575196857814e-06,
"loss": 3.121138334274292,
"step": 577
},
{
"epoch": 0.7118226600985221,
"grad_norm": 15.155327825693226,
"learning_rate": 9.441288069439876e-06,
"loss": 3.326282501220703,
"step": 578
},
{
"epoch": 0.7130541871921182,
"grad_norm": 12.463167654535278,
"learning_rate": 9.437991819235366e-06,
"loss": 2.8816466331481934,
"step": 579
},
{
"epoch": 0.7142857142857143,
"grad_norm": 14.769356931380226,
"learning_rate": 9.434686453015067e-06,
"loss": 3.6819610595703125,
"step": 580
},
{
"epoch": 0.7155172413793104,
"grad_norm": 33.4724384154282,
"learning_rate": 9.431371977568483e-06,
"loss": 2.904045343399048,
"step": 581
},
{
"epoch": 0.7167487684729064,
"grad_norm": 8.623967512206425,
"learning_rate": 9.428048399703831e-06,
"loss": 3.5356435775756836,
"step": 582
},
{
"epoch": 0.7179802955665024,
"grad_norm": 11.543651581364673,
"learning_rate": 9.424715726248027e-06,
"loss": 2.4456870555877686,
"step": 583
},
{
"epoch": 0.7192118226600985,
"grad_norm": 6.392692599853808,
"learning_rate": 9.421373964046665e-06,
"loss": 2.5000674724578857,
"step": 584
},
{
"epoch": 0.7204433497536946,
"grad_norm": 14.327212598984625,
"learning_rate": 9.418023119964012e-06,
"loss": 2.856738567352295,
"step": 585
},
{
"epoch": 0.7216748768472906,
"grad_norm": 6.593431351524387,
"learning_rate": 9.414663200882991e-06,
"loss": 2.623438835144043,
"step": 586
},
{
"epoch": 0.7229064039408867,
"grad_norm": 21.188129548487396,
"learning_rate": 9.411294213705162e-06,
"loss": 2.987426996231079,
"step": 587
},
{
"epoch": 0.7241379310344828,
"grad_norm": 16.308054128010806,
"learning_rate": 9.407916165350713e-06,
"loss": 2.8868589401245117,
"step": 588
},
{
"epoch": 0.7253694581280788,
"grad_norm": 5.6345787753710965,
"learning_rate": 9.404529062758447e-06,
"loss": 2.878659725189209,
"step": 589
},
{
"epoch": 0.7266009852216748,
"grad_norm": 21.624096395043555,
"learning_rate": 9.401132912885764e-06,
"loss": 3.197636127471924,
"step": 590
},
{
"epoch": 0.7278325123152709,
"grad_norm": 28.674970274616843,
"learning_rate": 9.397727722708643e-06,
"loss": 2.8974030017852783,
"step": 591
},
{
"epoch": 0.729064039408867,
"grad_norm": 14.603582651571138,
"learning_rate": 9.39431349922164e-06,
"loss": 2.558945894241333,
"step": 592
},
{
"epoch": 0.7302955665024631,
"grad_norm": 6.004290408591086,
"learning_rate": 9.390890249437863e-06,
"loss": 1.0518803596496582,
"step": 593
},
{
"epoch": 0.7315270935960592,
"grad_norm": 16.62422153547852,
"learning_rate": 9.38745798038896e-06,
"loss": 3.5599231719970703,
"step": 594
},
{
"epoch": 0.7327586206896551,
"grad_norm": 9.731487783525235,
"learning_rate": 9.384016699125102e-06,
"loss": 3.1517539024353027,
"step": 595
},
{
"epoch": 0.7339901477832512,
"grad_norm": 10.319265754066222,
"learning_rate": 9.380566412714982e-06,
"loss": 2.809019088745117,
"step": 596
},
{
"epoch": 0.7352216748768473,
"grad_norm": 14.675772943073882,
"learning_rate": 9.377107128245782e-06,
"loss": 3.2317776679992676,
"step": 597
},
{
"epoch": 0.7364532019704434,
"grad_norm": 15.494293767128655,
"learning_rate": 9.373638852823166e-06,
"loss": 2.7792513370513916,
"step": 598
},
{
"epoch": 0.7376847290640394,
"grad_norm": 17.02704136876628,
"learning_rate": 9.370161593571274e-06,
"loss": 2.75253963470459,
"step": 599
},
{
"epoch": 0.7389162561576355,
"grad_norm": 14.987899586174,
"learning_rate": 9.36667535763269e-06,
"loss": 3.381519317626953,
"step": 600
},
{
"epoch": 0.7401477832512315,
"grad_norm": 19.24830788986111,
"learning_rate": 9.363180152168448e-06,
"loss": 2.62427020072937,
"step": 601
},
{
"epoch": 0.7413793103448276,
"grad_norm": 29.185871046378647,
"learning_rate": 9.359675984357992e-06,
"loss": 2.4824719429016113,
"step": 602
},
{
"epoch": 0.7426108374384236,
"grad_norm": 8.673285241589555,
"learning_rate": 9.356162861399188e-06,
"loss": 2.8167097568511963,
"step": 603
},
{
"epoch": 0.7438423645320197,
"grad_norm": 15.318689439779794,
"learning_rate": 9.352640790508291e-06,
"loss": 2.9545063972473145,
"step": 604
},
{
"epoch": 0.7450738916256158,
"grad_norm": 16.1719679891284,
"learning_rate": 9.349109778919938e-06,
"loss": 2.833635091781616,
"step": 605
},
{
"epoch": 0.7463054187192119,
"grad_norm": 9.791828516981264,
"learning_rate": 9.345569833887124e-06,
"loss": 2.775730609893799,
"step": 606
},
{
"epoch": 0.7475369458128078,
"grad_norm": 28.327643593931583,
"learning_rate": 9.342020962681206e-06,
"loss": 2.652602195739746,
"step": 607
},
{
"epoch": 0.7487684729064039,
"grad_norm": 10.194351110042778,
"learning_rate": 9.338463172591868e-06,
"loss": 2.7008144855499268,
"step": 608
},
{
"epoch": 0.75,
"grad_norm": 9.445868833849106,
"learning_rate": 9.334896470927115e-06,
"loss": 2.7525248527526855,
"step": 609
},
{
"epoch": 0.7512315270935961,
"grad_norm": 26.640278263158898,
"learning_rate": 9.331320865013257e-06,
"loss": 3.446526527404785,
"step": 610
},
{
"epoch": 0.7524630541871922,
"grad_norm": 14.322498892724218,
"learning_rate": 9.327736362194899e-06,
"loss": 3.0489022731781006,
"step": 611
},
{
"epoch": 0.7536945812807881,
"grad_norm": 9.879694468014232,
"learning_rate": 9.324142969834916e-06,
"loss": 2.840083360671997,
"step": 612
},
{
"epoch": 0.7549261083743842,
"grad_norm": 8.637072486896487,
"learning_rate": 9.32054069531444e-06,
"loss": 2.878903388977051,
"step": 613
},
{
"epoch": 0.7561576354679803,
"grad_norm": 10.815449949874669,
"learning_rate": 9.316929546032855e-06,
"loss": 2.568045139312744,
"step": 614
},
{
"epoch": 0.7573891625615764,
"grad_norm": 18.206411357576574,
"learning_rate": 9.313309529407773e-06,
"loss": 2.8981618881225586,
"step": 615
},
{
"epoch": 0.7586206896551724,
"grad_norm": 14.515670827099761,
"learning_rate": 9.309680652875015e-06,
"loss": 3.3486928939819336,
"step": 616
},
{
"epoch": 0.7598522167487685,
"grad_norm": 10.208627841304171,
"learning_rate": 9.306042923888607e-06,
"loss": 3.1101677417755127,
"step": 617
},
{
"epoch": 0.7610837438423645,
"grad_norm": 9.545526159427496,
"learning_rate": 9.302396349920756e-06,
"loss": 2.5806779861450195,
"step": 618
},
{
"epoch": 0.7623152709359606,
"grad_norm": 14.260459979245976,
"learning_rate": 9.298740938461835e-06,
"loss": 2.678412437438965,
"step": 619
},
{
"epoch": 0.7635467980295566,
"grad_norm": 10.808443055524243,
"learning_rate": 9.295076697020378e-06,
"loss": 2.62287974357605,
"step": 620
},
{
"epoch": 0.7647783251231527,
"grad_norm": 7.635004154714619,
"learning_rate": 9.291403633123046e-06,
"loss": 3.0267720222473145,
"step": 621
},
{
"epoch": 0.7660098522167488,
"grad_norm": 15.707612902426492,
"learning_rate": 9.287721754314629e-06,
"loss": 3.147644281387329,
"step": 622
},
{
"epoch": 0.7672413793103449,
"grad_norm": 14.526297785533162,
"learning_rate": 9.284031068158023e-06,
"loss": 3.159574031829834,
"step": 623
},
{
"epoch": 0.7684729064039408,
"grad_norm": 13.384426615670701,
"learning_rate": 9.280331582234212e-06,
"loss": 2.6432247161865234,
"step": 624
},
{
"epoch": 0.7697044334975369,
"grad_norm": 14.835270706650137,
"learning_rate": 9.27662330414226e-06,
"loss": 3.2058279514312744,
"step": 625
},
{
"epoch": 0.770935960591133,
"grad_norm": 10.18160016154191,
"learning_rate": 9.272906241499285e-06,
"loss": 2.787260055541992,
"step": 626
},
{
"epoch": 0.7721674876847291,
"grad_norm": 13.10691777443293,
"learning_rate": 9.269180401940455e-06,
"loss": 2.5751729011535645,
"step": 627
},
{
"epoch": 0.7733990147783252,
"grad_norm": 31.695378978025254,
"learning_rate": 9.265445793118962e-06,
"loss": 2.7433929443359375,
"step": 628
},
{
"epoch": 0.7746305418719212,
"grad_norm": 14.739647225699887,
"learning_rate": 9.261702422706014e-06,
"loss": 2.771510124206543,
"step": 629
},
{
"epoch": 0.7758620689655172,
"grad_norm": 10.064291707891675,
"learning_rate": 9.257950298390815e-06,
"loss": 2.873830795288086,
"step": 630
},
{
"epoch": 0.7770935960591133,
"grad_norm": 11.389694880244464,
"learning_rate": 9.254189427880548e-06,
"loss": 2.7849340438842773,
"step": 631
},
{
"epoch": 0.7783251231527094,
"grad_norm": 9.049096315314397,
"learning_rate": 9.250419818900366e-06,
"loss": 3.1721668243408203,
"step": 632
},
{
"epoch": 0.7795566502463054,
"grad_norm": 10.167539529464127,
"learning_rate": 9.24664147919337e-06,
"loss": 2.7493605613708496,
"step": 633
},
{
"epoch": 0.7807881773399015,
"grad_norm": 16.15312048584227,
"learning_rate": 9.242854416520591e-06,
"loss": 2.470233917236328,
"step": 634
},
{
"epoch": 0.7820197044334976,
"grad_norm": 11.446898989077285,
"learning_rate": 9.239058638660983e-06,
"loss": 2.7109014987945557,
"step": 635
},
{
"epoch": 0.7832512315270936,
"grad_norm": 15.265461277758774,
"learning_rate": 9.235254153411394e-06,
"loss": 3.0344791412353516,
"step": 636
},
{
"epoch": 0.7844827586206896,
"grad_norm": 12.820354961892846,
"learning_rate": 9.231440968586572e-06,
"loss": 2.381561279296875,
"step": 637
},
{
"epoch": 0.7857142857142857,
"grad_norm": 11.033746075983524,
"learning_rate": 9.227619092019116e-06,
"loss": 1.716524362564087,
"step": 638
},
{
"epoch": 0.7869458128078818,
"grad_norm": 36.36927433118522,
"learning_rate": 9.223788531559495e-06,
"loss": 2.591820240020752,
"step": 639
},
{
"epoch": 0.7881773399014779,
"grad_norm": 22.998289773218893,
"learning_rate": 9.219949295076006e-06,
"loss": 3.0194711685180664,
"step": 640
},
{
"epoch": 0.7894088669950738,
"grad_norm": 9.82623401522864,
"learning_rate": 9.216101390454771e-06,
"loss": 2.852489471435547,
"step": 641
},
{
"epoch": 0.7906403940886699,
"grad_norm": 16.052245879830704,
"learning_rate": 9.212244825599714e-06,
"loss": 3.1419005393981934,
"step": 642
},
{
"epoch": 0.791871921182266,
"grad_norm": 7.825862600095094,
"learning_rate": 9.208379608432552e-06,
"loss": 2.8307576179504395,
"step": 643
},
{
"epoch": 0.7931034482758621,
"grad_norm": 8.143984458879574,
"learning_rate": 9.204505746892772e-06,
"loss": 2.581083297729492,
"step": 644
},
{
"epoch": 0.7943349753694581,
"grad_norm": 18.48744043986469,
"learning_rate": 9.200623248937619e-06,
"loss": 2.868973731994629,
"step": 645
},
{
"epoch": 0.7955665024630542,
"grad_norm": 8.257209013058233,
"learning_rate": 9.196732122542073e-06,
"loss": 2.8063859939575195,
"step": 646
},
{
"epoch": 0.7967980295566502,
"grad_norm": 12.8457758247775,
"learning_rate": 9.192832375698845e-06,
"loss": 2.990504264831543,
"step": 647
},
{
"epoch": 0.7980295566502463,
"grad_norm": 15.29216631759892,
"learning_rate": 9.18892401641835e-06,
"loss": 2.390320301055908,
"step": 648
},
{
"epoch": 0.7992610837438424,
"grad_norm": 10.724837816433517,
"learning_rate": 9.185007052728689e-06,
"loss": 2.671368360519409,
"step": 649
},
{
"epoch": 0.8004926108374384,
"grad_norm": 34.65249876179552,
"learning_rate": 9.181081492675645e-06,
"loss": 3.259225845336914,
"step": 650
},
{
"epoch": 0.8017241379310345,
"grad_norm": 15.454469742488547,
"learning_rate": 9.177147344322651e-06,
"loss": 2.6810710430145264,
"step": 651
},
{
"epoch": 0.8029556650246306,
"grad_norm": 11.530365704888945,
"learning_rate": 9.173204615750792e-06,
"loss": 2.833371162414551,
"step": 652
},
{
"epoch": 0.8041871921182266,
"grad_norm": 16.732932575361076,
"learning_rate": 9.169253315058764e-06,
"loss": 2.3488945960998535,
"step": 653
},
{
"epoch": 0.8054187192118226,
"grad_norm": 9.726564803680413,
"learning_rate": 9.165293450362882e-06,
"loss": 2.609282970428467,
"step": 654
},
{
"epoch": 0.8066502463054187,
"grad_norm": 7.091881545178562,
"learning_rate": 9.161325029797044e-06,
"loss": 2.536142587661743,
"step": 655
},
{
"epoch": 0.8078817733990148,
"grad_norm": 9.986592341017682,
"learning_rate": 9.157348061512728e-06,
"loss": 2.7175073623657227,
"step": 656
},
{
"epoch": 0.8091133004926109,
"grad_norm": 8.682128121343633,
"learning_rate": 9.153362553678967e-06,
"loss": 2.99211049079895,
"step": 657
},
{
"epoch": 0.8103448275862069,
"grad_norm": 9.322932294885456,
"learning_rate": 9.149368514482337e-06,
"loss": 2.9390807151794434,
"step": 658
},
{
"epoch": 0.8115763546798029,
"grad_norm": 18.322306761451276,
"learning_rate": 9.145365952126937e-06,
"loss": 3.0422894954681396,
"step": 659
},
{
"epoch": 0.812807881773399,
"grad_norm": 13.085537087984829,
"learning_rate": 9.141354874834372e-06,
"loss": 3.0573301315307617,
"step": 660
},
{
"epoch": 0.8140394088669951,
"grad_norm": 11.125925990068074,
"learning_rate": 9.13733529084374e-06,
"loss": 2.5086781978607178,
"step": 661
},
{
"epoch": 0.8152709359605911,
"grad_norm": 12.865460326379043,
"learning_rate": 9.13330720841161e-06,
"loss": 2.858813762664795,
"step": 662
},
{
"epoch": 0.8165024630541872,
"grad_norm": 16.68197454357427,
"learning_rate": 9.129270635812013e-06,
"loss": 2.6715052127838135,
"step": 663
},
{
"epoch": 0.8177339901477833,
"grad_norm": 8.328828299636488,
"learning_rate": 9.125225581336408e-06,
"loss": 3.18508243560791,
"step": 664
},
{
"epoch": 0.8189655172413793,
"grad_norm": 12.129831350250795,
"learning_rate": 9.12117205329369e-06,
"loss": 3.0426509380340576,
"step": 665
},
{
"epoch": 0.8201970443349754,
"grad_norm": 10.31532455027376,
"learning_rate": 9.11711006001015e-06,
"loss": 2.8654000759124756,
"step": 666
},
{
"epoch": 0.8214285714285714,
"grad_norm": 22.312769944556898,
"learning_rate": 9.113039609829472e-06,
"loss": 3.141207695007324,
"step": 667
},
{
"epoch": 0.8226600985221675,
"grad_norm": 9.864189257198062,
"learning_rate": 9.108960711112709e-06,
"loss": 2.3188462257385254,
"step": 668
},
{
"epoch": 0.8238916256157636,
"grad_norm": 7.227847497482275,
"learning_rate": 9.104873372238269e-06,
"loss": 2.785968542098999,
"step": 669
},
{
"epoch": 0.8251231527093597,
"grad_norm": 11.651688072805056,
"learning_rate": 9.100777601601896e-06,
"loss": 3.0693092346191406,
"step": 670
},
{
"epoch": 0.8263546798029556,
"grad_norm": 14.359029220301974,
"learning_rate": 9.096673407616656e-06,
"loss": 3.038943290710449,
"step": 671
},
{
"epoch": 0.8275862068965517,
"grad_norm": 11.367718044029667,
"learning_rate": 9.092560798712913e-06,
"loss": 3.259847640991211,
"step": 672
},
{
"epoch": 0.8288177339901478,
"grad_norm": 7.44988788267686,
"learning_rate": 9.08843978333832e-06,
"loss": 2.8227295875549316,
"step": 673
},
{
"epoch": 0.8300492610837439,
"grad_norm": 11.316814915640423,
"learning_rate": 9.084310369957795e-06,
"loss": 3.373309850692749,
"step": 674
},
{
"epoch": 0.8312807881773399,
"grad_norm": 8.828902957926932,
"learning_rate": 9.08017256705351e-06,
"loss": 3.2833662033081055,
"step": 675
},
{
"epoch": 0.8325123152709359,
"grad_norm": 26.42438693311499,
"learning_rate": 9.076026383124863e-06,
"loss": 2.7175965309143066,
"step": 676
},
{
"epoch": 0.833743842364532,
"grad_norm": 15.34429558424053,
"learning_rate": 9.071871826688472e-06,
"loss": 2.594611167907715,
"step": 677
},
{
"epoch": 0.8349753694581281,
"grad_norm": 23.79233069504134,
"learning_rate": 9.067708906278155e-06,
"loss": 2.8605175018310547,
"step": 678
},
{
"epoch": 0.8362068965517241,
"grad_norm": 16.81935056764866,
"learning_rate": 9.063537630444903e-06,
"loss": 2.1438748836517334,
"step": 679
},
{
"epoch": 0.8374384236453202,
"grad_norm": 10.888612008792562,
"learning_rate": 9.05935800775688e-06,
"loss": 2.8170299530029297,
"step": 680
},
{
"epoch": 0.8386699507389163,
"grad_norm": 14.167748893628115,
"learning_rate": 9.055170046799386e-06,
"loss": 1.7328954935073853,
"step": 681
},
{
"epoch": 0.8399014778325123,
"grad_norm": 9.011227940975711,
"learning_rate": 9.050973756174852e-06,
"loss": 2.8324766159057617,
"step": 682
},
{
"epoch": 0.8411330049261084,
"grad_norm": 10.1469630150836,
"learning_rate": 9.046769144502818e-06,
"loss": 2.805690288543701,
"step": 683
},
{
"epoch": 0.8423645320197044,
"grad_norm": 18.955236663194235,
"learning_rate": 9.04255622041992e-06,
"loss": 2.1270194053649902,
"step": 684
},
{
"epoch": 0.8435960591133005,
"grad_norm": 15.32094380068091,
"learning_rate": 9.038334992579863e-06,
"loss": 2.8757829666137695,
"step": 685
},
{
"epoch": 0.8448275862068966,
"grad_norm": 11.38695715200097,
"learning_rate": 9.034105469653412e-06,
"loss": 2.84549617767334,
"step": 686
},
{
"epoch": 0.8460591133004927,
"grad_norm": 9.897557814234148,
"learning_rate": 9.029867660328369e-06,
"loss": 2.4058642387390137,
"step": 687
},
{
"epoch": 0.8472906403940886,
"grad_norm": 11.793589267069729,
"learning_rate": 9.025621573309559e-06,
"loss": 3.2583184242248535,
"step": 688
},
{
"epoch": 0.8485221674876847,
"grad_norm": 16.425935376287054,
"learning_rate": 9.021367217318808e-06,
"loss": 2.951143264770508,
"step": 689
},
{
"epoch": 0.8497536945812808,
"grad_norm": 23.876213749579968,
"learning_rate": 9.017104601094927e-06,
"loss": 3.0142836570739746,
"step": 690
},
{
"epoch": 0.8509852216748769,
"grad_norm": 6.8041557155789345,
"learning_rate": 9.012833733393697e-06,
"loss": 2.7629013061523438,
"step": 691
},
{
"epoch": 0.8522167487684729,
"grad_norm": 12.775266706976657,
"learning_rate": 9.008554622987845e-06,
"loss": 2.6153712272644043,
"step": 692
},
{
"epoch": 0.853448275862069,
"grad_norm": 10.104362674966435,
"learning_rate": 9.004267278667032e-06,
"loss": 2.7227087020874023,
"step": 693
},
{
"epoch": 0.854679802955665,
"grad_norm": 10.955806195385584,
"learning_rate": 8.999971709237832e-06,
"loss": 2.7320899963378906,
"step": 694
},
{
"epoch": 0.8559113300492611,
"grad_norm": 9.04416662510961,
"learning_rate": 8.99566792352371e-06,
"loss": 2.4416356086730957,
"step": 695
},
{
"epoch": 0.8571428571428571,
"grad_norm": 23.838296750423428,
"learning_rate": 8.991355930365013e-06,
"loss": 3.251642942428589,
"step": 696
},
{
"epoch": 0.8583743842364532,
"grad_norm": 46.67562045008053,
"learning_rate": 8.987035738618943e-06,
"loss": 2.9292666912078857,
"step": 697
},
{
"epoch": 0.8596059113300493,
"grad_norm": 16.120654552226135,
"learning_rate": 8.982707357159549e-06,
"loss": 2.804452896118164,
"step": 698
},
{
"epoch": 0.8608374384236454,
"grad_norm": 9.903594099304835,
"learning_rate": 8.978370794877691e-06,
"loss": 2.4997687339782715,
"step": 699
},
{
"epoch": 0.8620689655172413,
"grad_norm": 43.24532276513338,
"learning_rate": 8.974026060681044e-06,
"loss": 2.459716558456421,
"step": 700
},
{
"epoch": 0.8633004926108374,
"grad_norm": 6.407514764745252,
"learning_rate": 8.969673163494063e-06,
"loss": 2.57291316986084,
"step": 701
},
{
"epoch": 0.8645320197044335,
"grad_norm": 9.925965111489338,
"learning_rate": 8.965312112257973e-06,
"loss": 2.6452269554138184,
"step": 702
},
{
"epoch": 0.8657635467980296,
"grad_norm": 15.666974346483006,
"learning_rate": 8.960942915930749e-06,
"loss": 2.4361040592193604,
"step": 703
},
{
"epoch": 0.8669950738916257,
"grad_norm": 12.205200732214369,
"learning_rate": 8.956565583487092e-06,
"loss": 2.819046974182129,
"step": 704
},
{
"epoch": 0.8682266009852216,
"grad_norm": 23.813445037945687,
"learning_rate": 8.952180123918419e-06,
"loss": 3.536510944366455,
"step": 705
},
{
"epoch": 0.8694581280788177,
"grad_norm": 19.455220333084014,
"learning_rate": 8.94778654623284e-06,
"loss": 3.340855121612549,
"step": 706
},
{
"epoch": 0.8706896551724138,
"grad_norm": 15.988003472296347,
"learning_rate": 8.94338485945514e-06,
"loss": 2.7881288528442383,
"step": 707
},
{
"epoch": 0.8719211822660099,
"grad_norm": 18.44911045759373,
"learning_rate": 8.938975072626762e-06,
"loss": 3.119422197341919,
"step": 708
},
{
"epoch": 0.8731527093596059,
"grad_norm": 18.233236078041163,
"learning_rate": 8.934557194805787e-06,
"loss": 2.694553852081299,
"step": 709
},
{
"epoch": 0.874384236453202,
"grad_norm": 13.897466836595251,
"learning_rate": 8.930131235066914e-06,
"loss": 2.7162301540374756,
"step": 710
},
{
"epoch": 0.875615763546798,
"grad_norm": 9.86969530883223,
"learning_rate": 8.925697202501442e-06,
"loss": 2.4017574787139893,
"step": 711
},
{
"epoch": 0.8768472906403941,
"grad_norm": 22.07024366462836,
"learning_rate": 8.92125510621726e-06,
"loss": 2.491663932800293,
"step": 712
},
{
"epoch": 0.8780788177339901,
"grad_norm": 9.704458797982127,
"learning_rate": 8.916804955338807e-06,
"loss": 3.09323787689209,
"step": 713
},
{
"epoch": 0.8793103448275862,
"grad_norm": 14.245234888372442,
"learning_rate": 8.91234675900708e-06,
"loss": 3.0273964405059814,
"step": 714
},
{
"epoch": 0.8805418719211823,
"grad_norm": 10.033605733175728,
"learning_rate": 8.907880526379594e-06,
"loss": 2.5009701251983643,
"step": 715
},
{
"epoch": 0.8817733990147784,
"grad_norm": 14.04261929200788,
"learning_rate": 8.903406266630374e-06,
"loss": 2.7629752159118652,
"step": 716
},
{
"epoch": 0.8830049261083743,
"grad_norm": 19.00265649950274,
"learning_rate": 8.898923988949936e-06,
"loss": 2.5285563468933105,
"step": 717
},
{
"epoch": 0.8842364532019704,
"grad_norm": 11.293266358312355,
"learning_rate": 8.89443370254526e-06,
"loss": 2.6903738975524902,
"step": 718
},
{
"epoch": 0.8854679802955665,
"grad_norm": 4.918527502448237,
"learning_rate": 8.88993541663978e-06,
"loss": 2.8083925247192383,
"step": 719
},
{
"epoch": 0.8866995073891626,
"grad_norm": 14.900444889845339,
"learning_rate": 8.885429140473361e-06,
"loss": 3.0920486450195312,
"step": 720
},
{
"epoch": 0.8879310344827587,
"grad_norm": 15.55585461742265,
"learning_rate": 8.880914883302278e-06,
"loss": 2.7464776039123535,
"step": 721
},
{
"epoch": 0.8891625615763546,
"grad_norm": 28.218307852720514,
"learning_rate": 8.876392654399208e-06,
"loss": 2.7022242546081543,
"step": 722
},
{
"epoch": 0.8903940886699507,
"grad_norm": 7.9907639594026385,
"learning_rate": 8.871862463053193e-06,
"loss": 3.202090263366699,
"step": 723
},
{
"epoch": 0.8916256157635468,
"grad_norm": 12.370662746549176,
"learning_rate": 8.867324318569637e-06,
"loss": 2.792590856552124,
"step": 724
},
{
"epoch": 0.8928571428571429,
"grad_norm": 12.485149742498526,
"learning_rate": 8.862778230270276e-06,
"loss": 2.8918404579162598,
"step": 725
},
{
"epoch": 0.8940886699507389,
"grad_norm": 17.523163987955954,
"learning_rate": 8.858224207493165e-06,
"loss": 2.881380081176758,
"step": 726
},
{
"epoch": 0.895320197044335,
"grad_norm": 10.929446497515306,
"learning_rate": 8.85366225959266e-06,
"loss": 2.7197518348693848,
"step": 727
},
{
"epoch": 0.896551724137931,
"grad_norm": 14.58273441890301,
"learning_rate": 8.849092395939388e-06,
"loss": 2.8458380699157715,
"step": 728
},
{
"epoch": 0.8977832512315271,
"grad_norm": 9.240130544994555,
"learning_rate": 8.844514625920246e-06,
"loss": 2.5815629959106445,
"step": 729
},
{
"epoch": 0.8990147783251231,
"grad_norm": 12.536324929930204,
"learning_rate": 8.839928958938364e-06,
"loss": 2.388244867324829,
"step": 730
},
{
"epoch": 0.9002463054187192,
"grad_norm": 9.268565736662921,
"learning_rate": 8.835335404413096e-06,
"loss": 2.678809404373169,
"step": 731
},
{
"epoch": 0.9014778325123153,
"grad_norm": 13.664345931125762,
"learning_rate": 8.830733971779996e-06,
"loss": 3.4926984310150146,
"step": 732
},
{
"epoch": 0.9027093596059114,
"grad_norm": 8.38741339708261,
"learning_rate": 8.826124670490804e-06,
"loss": 3.143955707550049,
"step": 733
},
{
"epoch": 0.9039408866995073,
"grad_norm": 8.285169477267281,
"learning_rate": 8.821507510013416e-06,
"loss": 2.30763840675354,
"step": 734
},
{
"epoch": 0.9051724137931034,
"grad_norm": 11.658087999854533,
"learning_rate": 8.816882499831877e-06,
"loss": 3.2019965648651123,
"step": 735
},
{
"epoch": 0.9064039408866995,
"grad_norm": 11.03286006250671,
"learning_rate": 8.812249649446357e-06,
"loss": 2.5554118156433105,
"step": 736
},
{
"epoch": 0.9076354679802956,
"grad_norm": 10.468019775536181,
"learning_rate": 8.807608968373123e-06,
"loss": 2.6560721397399902,
"step": 737
},
{
"epoch": 0.9088669950738916,
"grad_norm": 21.753543318554573,
"learning_rate": 8.802960466144537e-06,
"loss": 3.2792091369628906,
"step": 738
},
{
"epoch": 0.9100985221674877,
"grad_norm": 8.801113008077715,
"learning_rate": 8.798304152309019e-06,
"loss": 2.4306914806365967,
"step": 739
},
{
"epoch": 0.9113300492610837,
"grad_norm": 11.427047186823343,
"learning_rate": 8.793640036431036e-06,
"loss": 2.791334867477417,
"step": 740
},
{
"epoch": 0.9125615763546798,
"grad_norm": 11.78168946860072,
"learning_rate": 8.788968128091084e-06,
"loss": 2.8516879081726074,
"step": 741
},
{
"epoch": 0.9137931034482759,
"grad_norm": 18.40294226204317,
"learning_rate": 8.784288436885663e-06,
"loss": 2.783674716949463,
"step": 742
},
{
"epoch": 0.9150246305418719,
"grad_norm": 9.042045966372719,
"learning_rate": 8.779600972427257e-06,
"loss": 2.538564443588257,
"step": 743
},
{
"epoch": 0.916256157635468,
"grad_norm": 21.11608056647587,
"learning_rate": 8.774905744344326e-06,
"loss": 2.603914260864258,
"step": 744
},
{
"epoch": 0.9174876847290641,
"grad_norm": 18.991966127623154,
"learning_rate": 8.770202762281267e-06,
"loss": 2.6232197284698486,
"step": 745
},
{
"epoch": 0.9187192118226601,
"grad_norm": 9.533961363388334,
"learning_rate": 8.765492035898406e-06,
"loss": 2.586906671524048,
"step": 746
},
{
"epoch": 0.9199507389162561,
"grad_norm": 11.702571386481814,
"learning_rate": 8.760773574871985e-06,
"loss": 3.019075870513916,
"step": 747
},
{
"epoch": 0.9211822660098522,
"grad_norm": 13.549959986762131,
"learning_rate": 8.756047388894123e-06,
"loss": 2.6554617881774902,
"step": 748
},
{
"epoch": 0.9224137931034483,
"grad_norm": 10.617389263376301,
"learning_rate": 8.751313487672815e-06,
"loss": 3.3622567653656006,
"step": 749
},
{
"epoch": 0.9236453201970444,
"grad_norm": 15.62971817318244,
"learning_rate": 8.746571880931896e-06,
"loss": 2.748253345489502,
"step": 750
},
{
"epoch": 0.9248768472906403,
"grad_norm": 10.680533586135248,
"learning_rate": 8.741822578411036e-06,
"loss": 3.358571767807007,
"step": 751
},
{
"epoch": 0.9261083743842364,
"grad_norm": 8.513871800316197,
"learning_rate": 8.737065589865709e-06,
"loss": 2.707146167755127,
"step": 752
},
{
"epoch": 0.9273399014778325,
"grad_norm": 15.06206429941032,
"learning_rate": 8.732300925067177e-06,
"loss": 2.782027006149292,
"step": 753
},
{
"epoch": 0.9285714285714286,
"grad_norm": 13.377969237833796,
"learning_rate": 8.727528593802469e-06,
"loss": 2.758582830429077,
"step": 754
},
{
"epoch": 0.9298029556650246,
"grad_norm": 12.5189792863405,
"learning_rate": 8.722748605874365e-06,
"loss": 2.798398971557617,
"step": 755
},
{
"epoch": 0.9310344827586207,
"grad_norm": 7.0237993457565056,
"learning_rate": 8.717960971101367e-06,
"loss": 2.8893141746520996,
"step": 756
},
{
"epoch": 0.9322660098522167,
"grad_norm": 13.108491345078546,
"learning_rate": 8.71316569931769e-06,
"loss": 2.8260703086853027,
"step": 757
},
{
"epoch": 0.9334975369458128,
"grad_norm": 13.669452983841648,
"learning_rate": 8.708362800373235e-06,
"loss": 2.8373727798461914,
"step": 758
},
{
"epoch": 0.9347290640394089,
"grad_norm": 9.979755254671996,
"learning_rate": 8.703552284133565e-06,
"loss": 2.7638840675354004,
"step": 759
},
{
"epoch": 0.9359605911330049,
"grad_norm": 12.948663627163679,
"learning_rate": 8.698734160479892e-06,
"loss": 3.436288833618164,
"step": 760
},
{
"epoch": 0.937192118226601,
"grad_norm": 11.570964225425659,
"learning_rate": 8.69390843930906e-06,
"loss": 2.9463398456573486,
"step": 761
},
{
"epoch": 0.9384236453201971,
"grad_norm": 7.2963116550893945,
"learning_rate": 8.68907513053351e-06,
"loss": 2.8301844596862793,
"step": 762
},
{
"epoch": 0.9396551724137931,
"grad_norm": 22.281531901716622,
"learning_rate": 8.684234244081274e-06,
"loss": 2.329922676086426,
"step": 763
},
{
"epoch": 0.9408866995073891,
"grad_norm": 7.190935942786577,
"learning_rate": 8.67938578989595e-06,
"loss": 2.2752580642700195,
"step": 764
},
{
"epoch": 0.9421182266009852,
"grad_norm": 15.09705330042877,
"learning_rate": 8.674529777936674e-06,
"loss": 2.549682378768921,
"step": 765
},
{
"epoch": 0.9433497536945813,
"grad_norm": 12.2992067648861,
"learning_rate": 8.669666218178114e-06,
"loss": 2.177875518798828,
"step": 766
},
{
"epoch": 0.9445812807881774,
"grad_norm": 17.93631082058447,
"learning_rate": 8.66479512061044e-06,
"loss": 3.4030704498291016,
"step": 767
},
{
"epoch": 0.9458128078817734,
"grad_norm": 12.986753736790972,
"learning_rate": 8.659916495239302e-06,
"loss": 2.8890881538391113,
"step": 768
},
{
"epoch": 0.9470443349753694,
"grad_norm": 7.80817017570662,
"learning_rate": 8.655030352085816e-06,
"loss": 2.6665287017822266,
"step": 769
},
{
"epoch": 0.9482758620689655,
"grad_norm": 8.892699708308717,
"learning_rate": 8.650136701186537e-06,
"loss": 2.8044798374176025,
"step": 770
},
{
"epoch": 0.9495073891625616,
"grad_norm": 12.053681412169821,
"learning_rate": 8.645235552593447e-06,
"loss": 2.809295654296875,
"step": 771
},
{
"epoch": 0.9507389162561576,
"grad_norm": 9.563242350440067,
"learning_rate": 8.640326916373923e-06,
"loss": 2.66239070892334,
"step": 772
},
{
"epoch": 0.9519704433497537,
"grad_norm": 11.397593157331492,
"learning_rate": 8.635410802610724e-06,
"loss": 3.0714645385742188,
"step": 773
},
{
"epoch": 0.9532019704433498,
"grad_norm": 11.141014900339497,
"learning_rate": 8.630487221401974e-06,
"loss": 2.5254178047180176,
"step": 774
},
{
"epoch": 0.9544334975369458,
"grad_norm": 61.411465635020065,
"learning_rate": 8.625556182861126e-06,
"loss": 2.4160585403442383,
"step": 775
},
{
"epoch": 0.9556650246305419,
"grad_norm": 15.426050261321397,
"learning_rate": 8.620617697116957e-06,
"loss": 2.972367763519287,
"step": 776
},
{
"epoch": 0.9568965517241379,
"grad_norm": 11.628713988566439,
"learning_rate": 8.615671774313543e-06,
"loss": 2.9206340312957764,
"step": 777
},
{
"epoch": 0.958128078817734,
"grad_norm": 9.967877704713992,
"learning_rate": 8.61071842461023e-06,
"loss": 3.192002296447754,
"step": 778
},
{
"epoch": 0.9593596059113301,
"grad_norm": 8.547648553030225,
"learning_rate": 8.605757658181626e-06,
"loss": 3.0840883255004883,
"step": 779
},
{
"epoch": 0.9605911330049262,
"grad_norm": 16.72939304902535,
"learning_rate": 8.60078948521757e-06,
"loss": 3.344426155090332,
"step": 780
},
{
"epoch": 0.9618226600985221,
"grad_norm": 14.860196885671575,
"learning_rate": 8.595813915923113e-06,
"loss": 2.887132406234741,
"step": 781
},
{
"epoch": 0.9630541871921182,
"grad_norm": 16.504287008501006,
"learning_rate": 8.590830960518502e-06,
"loss": 2.354299306869507,
"step": 782
},
{
"epoch": 0.9642857142857143,
"grad_norm": 14.601237072457945,
"learning_rate": 8.585840629239158e-06,
"loss": 2.574817657470703,
"step": 783
},
{
"epoch": 0.9655172413793104,
"grad_norm": 13.581762855163804,
"learning_rate": 8.580842932335644e-06,
"loss": 2.3363120555877686,
"step": 784
},
{
"epoch": 0.9667487684729064,
"grad_norm": 8.025263413179824,
"learning_rate": 8.575837880073663e-06,
"loss": 2.452828884124756,
"step": 785
},
{
"epoch": 0.9679802955665024,
"grad_norm": 13.65572211743131,
"learning_rate": 8.57082548273402e-06,
"loss": 2.8182177543640137,
"step": 786
},
{
"epoch": 0.9692118226600985,
"grad_norm": 22.799475456448384,
"learning_rate": 8.565805750612607e-06,
"loss": 3.2871310710906982,
"step": 787
},
{
"epoch": 0.9704433497536946,
"grad_norm": 18.807286124868686,
"learning_rate": 8.560778694020387e-06,
"loss": 2.959153175354004,
"step": 788
},
{
"epoch": 0.9716748768472906,
"grad_norm": 10.644957881123116,
"learning_rate": 8.555744323283364e-06,
"loss": 2.859107732772827,
"step": 789
},
{
"epoch": 0.9729064039408867,
"grad_norm": 9.606245608690044,
"learning_rate": 8.550702648742566e-06,
"loss": 2.8537421226501465,
"step": 790
},
{
"epoch": 0.9741379310344828,
"grad_norm": 11.364684038946328,
"learning_rate": 8.545653680754029e-06,
"loss": 2.77693772315979,
"step": 791
},
{
"epoch": 0.9753694581280788,
"grad_norm": 14.67534992412754,
"learning_rate": 8.540597429688761e-06,
"loss": 2.6960999965667725,
"step": 792
},
{
"epoch": 0.9766009852216748,
"grad_norm": 14.854511519014162,
"learning_rate": 8.535533905932739e-06,
"loss": 3.3942298889160156,
"step": 793
},
{
"epoch": 0.9778325123152709,
"grad_norm": 14.090660071520212,
"learning_rate": 8.530463119886871e-06,
"loss": 2.8664398193359375,
"step": 794
},
{
"epoch": 0.979064039408867,
"grad_norm": 15.427403822127253,
"learning_rate": 8.525385081966992e-06,
"loss": 3.023148536682129,
"step": 795
},
{
"epoch": 0.9802955665024631,
"grad_norm": 27.257958140053717,
"learning_rate": 8.520299802603826e-06,
"loss": 2.7858657836914062,
"step": 796
},
{
"epoch": 0.9815270935960592,
"grad_norm": 9.983005237782791,
"learning_rate": 8.515207292242969e-06,
"loss": 2.4665451049804688,
"step": 797
},
{
"epoch": 0.9827586206896551,
"grad_norm": 11.230050254551738,
"learning_rate": 8.510107561344876e-06,
"loss": 2.412269115447998,
"step": 798
},
{
"epoch": 0.9839901477832512,
"grad_norm": 18.314579409480903,
"learning_rate": 8.505000620384834e-06,
"loss": 3.08200740814209,
"step": 799
},
{
"epoch": 0.9852216748768473,
"grad_norm": 12.337382000838234,
"learning_rate": 8.499886479852935e-06,
"loss": 2.851126194000244,
"step": 800
},
{
"epoch": 0.9864532019704434,
"grad_norm": 16.588814488060716,
"learning_rate": 8.494765150254063e-06,
"loss": 2.7692008018493652,
"step": 801
},
{
"epoch": 0.9876847290640394,
"grad_norm": 10.778667289136193,
"learning_rate": 8.489636642107867e-06,
"loss": 2.045649290084839,
"step": 802
},
{
"epoch": 0.9889162561576355,
"grad_norm": 16.235817598925898,
"learning_rate": 8.484500965948746e-06,
"loss": 3.0901870727539062,
"step": 803
},
{
"epoch": 0.9901477832512315,
"grad_norm": 12.772148604340376,
"learning_rate": 8.479358132325815e-06,
"loss": 4.652253150939941,
"step": 804
},
{
"epoch": 0.9913793103448276,
"grad_norm": 30.743685192648066,
"learning_rate": 8.474208151802898e-06,
"loss": 3.992189884185791,
"step": 805
},
{
"epoch": 0.9926108374384236,
"grad_norm": 8.73281768145785,
"learning_rate": 8.469051034958496e-06,
"loss": 2.7150464057922363,
"step": 806
},
{
"epoch": 0.9938423645320197,
"grad_norm": 9.053303002827397,
"learning_rate": 8.46388679238577e-06,
"loss": 2.807770013809204,
"step": 807
},
{
"epoch": 0.9950738916256158,
"grad_norm": 10.322870900342917,
"learning_rate": 8.458715434692515e-06,
"loss": 2.386625289916992,
"step": 808
},
{
"epoch": 0.9963054187192119,
"grad_norm": 11.08968761753187,
"learning_rate": 8.453536972501146e-06,
"loss": 2.585855484008789,
"step": 809
},
{
"epoch": 0.9975369458128078,
"grad_norm": 17.867602225530977,
"learning_rate": 8.448351416448664e-06,
"loss": 1.9756630659103394,
"step": 810
},
{
"epoch": 0.9987684729064039,
"grad_norm": 10.119397987976452,
"learning_rate": 8.443158777186652e-06,
"loss": 2.844794511795044,
"step": 811
},
{
"epoch": 1.0,
"grad_norm": 7.980679156666685,
"learning_rate": 8.437959065381232e-06,
"loss": 2.8835721015930176,
"step": 812
},
{
"epoch": 1.001231527093596,
"grad_norm": 7.910274895398585,
"learning_rate": 8.432752291713058e-06,
"loss": 1.4173179864883423,
"step": 813
},
{
"epoch": 1.0024630541871922,
"grad_norm": 11.748384071481883,
"learning_rate": 8.427538466877294e-06,
"loss": 1.3743655681610107,
"step": 814
},
{
"epoch": 1.0036945812807883,
"grad_norm": 15.520903995356328,
"learning_rate": 8.422317601583576e-06,
"loss": 1.448968768119812,
"step": 815
},
{
"epoch": 1.0049261083743843,
"grad_norm": 10.900297712673185,
"learning_rate": 8.417089706556015e-06,
"loss": 1.4555410146713257,
"step": 816
},
{
"epoch": 1.0061576354679802,
"grad_norm": 14.944365989075473,
"learning_rate": 8.411854792533154e-06,
"loss": 1.3096075057983398,
"step": 817
},
{
"epoch": 1.0073891625615763,
"grad_norm": 28.47454569698464,
"learning_rate": 8.406612870267957e-06,
"loss": 1.8452348709106445,
"step": 818
},
{
"epoch": 1.0086206896551724,
"grad_norm": 15.756002610301957,
"learning_rate": 8.401363950527777e-06,
"loss": 1.6339285373687744,
"step": 819
},
{
"epoch": 1.0098522167487685,
"grad_norm": 6.289340790151406,
"learning_rate": 8.39610804409435e-06,
"loss": 1.714133381843567,
"step": 820
},
{
"epoch": 1.0110837438423645,
"grad_norm": 11.713574774158978,
"learning_rate": 8.390845161763756e-06,
"loss": 1.7810550928115845,
"step": 821
},
{
"epoch": 1.0123152709359606,
"grad_norm": 13.688437053039554,
"learning_rate": 8.385575314346408e-06,
"loss": 1.2523250579833984,
"step": 822
},
{
"epoch": 1.0135467980295567,
"grad_norm": 9.835238587520983,
"learning_rate": 8.380298512667023e-06,
"loss": 1.4618515968322754,
"step": 823
},
{
"epoch": 1.0147783251231528,
"grad_norm": 12.580368500055666,
"learning_rate": 8.375014767564606e-06,
"loss": 1.5188508033752441,
"step": 824
},
{
"epoch": 1.0160098522167487,
"grad_norm": 13.76649655840591,
"learning_rate": 8.369724089892423e-06,
"loss": 1.3847301006317139,
"step": 825
},
{
"epoch": 1.0172413793103448,
"grad_norm": 10.435853268719002,
"learning_rate": 8.364426490517978e-06,
"loss": 1.2926149368286133,
"step": 826
},
{
"epoch": 1.0184729064039408,
"grad_norm": 16.445003227804108,
"learning_rate": 8.359121980322992e-06,
"loss": 2.3063907623291016,
"step": 827
},
{
"epoch": 1.019704433497537,
"grad_norm": 11.557235656795728,
"learning_rate": 8.353810570203392e-06,
"loss": 1.8268505334854126,
"step": 828
},
{
"epoch": 1.020935960591133,
"grad_norm": 14.632274264873946,
"learning_rate": 8.34849227106926e-06,
"loss": 1.7018903493881226,
"step": 829
},
{
"epoch": 1.022167487684729,
"grad_norm": 11.600489411721503,
"learning_rate": 8.343167093844847e-06,
"loss": 1.228044867515564,
"step": 830
},
{
"epoch": 1.0233990147783252,
"grad_norm": 16.088239405853525,
"learning_rate": 8.337835049468517e-06,
"loss": 1.8953372240066528,
"step": 831
},
{
"epoch": 1.0246305418719213,
"grad_norm": 18.96191614490354,
"learning_rate": 8.332496148892748e-06,
"loss": 2.2595765590667725,
"step": 832
},
{
"epoch": 1.0258620689655173,
"grad_norm": 15.40920733163635,
"learning_rate": 8.327150403084105e-06,
"loss": 1.9772108793258667,
"step": 833
},
{
"epoch": 1.0270935960591132,
"grad_norm": 13.682030994380478,
"learning_rate": 8.321797823023201e-06,
"loss": 1.6397690773010254,
"step": 834
},
{
"epoch": 1.0283251231527093,
"grad_norm": 15.155038881668695,
"learning_rate": 8.3164384197047e-06,
"loss": 1.8092628717422485,
"step": 835
},
{
"epoch": 1.0295566502463054,
"grad_norm": 11.138568264810678,
"learning_rate": 8.311072204137272e-06,
"loss": 1.4974594116210938,
"step": 836
},
{
"epoch": 1.0307881773399015,
"grad_norm": 12.21109867389211,
"learning_rate": 8.305699187343586e-06,
"loss": 1.6198664903640747,
"step": 837
},
{
"epoch": 1.0320197044334976,
"grad_norm": 15.324750685835358,
"learning_rate": 8.300319380360278e-06,
"loss": 1.3746960163116455,
"step": 838
},
{
"epoch": 1.0332512315270936,
"grad_norm": 7.824249576144248,
"learning_rate": 8.294932794237936e-06,
"loss": 1.6171293258666992,
"step": 839
},
{
"epoch": 1.0344827586206897,
"grad_norm": 8.892333167572344,
"learning_rate": 8.289539440041066e-06,
"loss": 1.569738507270813,
"step": 840
},
{
"epoch": 1.0357142857142858,
"grad_norm": 11.852198048161208,
"learning_rate": 8.284139328848083e-06,
"loss": 1.2823517322540283,
"step": 841
},
{
"epoch": 1.0369458128078817,
"grad_norm": 8.261136034676777,
"learning_rate": 8.278732471751275e-06,
"loss": 1.646303415298462,
"step": 842
},
{
"epoch": 1.0381773399014778,
"grad_norm": 10.756475200770923,
"learning_rate": 8.273318879856794e-06,
"loss": 1.1557375192642212,
"step": 843
},
{
"epoch": 1.0394088669950738,
"grad_norm": 11.706598803766697,
"learning_rate": 8.26789856428462e-06,
"loss": 1.8793773651123047,
"step": 844
},
{
"epoch": 1.04064039408867,
"grad_norm": 12.96726521358098,
"learning_rate": 8.262471536168547e-06,
"loss": 1.8577170372009277,
"step": 845
},
{
"epoch": 1.041871921182266,
"grad_norm": 9.437922676603566,
"learning_rate": 8.257037806656156e-06,
"loss": 1.6104650497436523,
"step": 846
},
{
"epoch": 1.043103448275862,
"grad_norm": 9.578661144979,
"learning_rate": 8.251597386908791e-06,
"loss": 1.5425922870635986,
"step": 847
},
{
"epoch": 1.0443349753694582,
"grad_norm": 20.263987667471525,
"learning_rate": 8.246150288101544e-06,
"loss": 1.681383728981018,
"step": 848
},
{
"epoch": 1.0455665024630543,
"grad_norm": 13.601576634163374,
"learning_rate": 8.240696521423221e-06,
"loss": 1.7646219730377197,
"step": 849
},
{
"epoch": 1.0467980295566504,
"grad_norm": 7.679649660703675,
"learning_rate": 8.23523609807633e-06,
"loss": 1.445223331451416,
"step": 850
},
{
"epoch": 1.0480295566502462,
"grad_norm": 14.66829985016366,
"learning_rate": 8.229769029277044e-06,
"loss": 0.9492518901824951,
"step": 851
},
{
"epoch": 1.0492610837438423,
"grad_norm": 10.487758371701569,
"learning_rate": 8.224295326255194e-06,
"loss": 1.33433997631073,
"step": 852
},
{
"epoch": 1.0504926108374384,
"grad_norm": 10.533804685248148,
"learning_rate": 8.218815000254233e-06,
"loss": 1.712221384048462,
"step": 853
},
{
"epoch": 1.0517241379310345,
"grad_norm": 9.208819021387981,
"learning_rate": 8.213328062531223e-06,
"loss": 2.256254196166992,
"step": 854
},
{
"epoch": 1.0529556650246306,
"grad_norm": 20.4330836347585,
"learning_rate": 8.207834524356804e-06,
"loss": 1.1827871799468994,
"step": 855
},
{
"epoch": 1.0541871921182266,
"grad_norm": 16.459676535775454,
"learning_rate": 8.202334397015173e-06,
"loss": 1.831944465637207,
"step": 856
},
{
"epoch": 1.0554187192118227,
"grad_norm": 9.540607740889314,
"learning_rate": 8.196827691804066e-06,
"loss": 1.4239716529846191,
"step": 857
},
{
"epoch": 1.0566502463054188,
"grad_norm": 8.826612392912715,
"learning_rate": 8.191314420034728e-06,
"loss": 1.4468379020690918,
"step": 858
},
{
"epoch": 1.0578817733990147,
"grad_norm": 11.710928299860754,
"learning_rate": 8.185794593031889e-06,
"loss": 1.5082018375396729,
"step": 859
},
{
"epoch": 1.0591133004926108,
"grad_norm": 11.098469341339896,
"learning_rate": 8.180268222133748e-06,
"loss": 1.7838118076324463,
"step": 860
},
{
"epoch": 1.0603448275862069,
"grad_norm": 14.517325254327519,
"learning_rate": 8.174735318691946e-06,
"loss": 2.0072226524353027,
"step": 861
},
{
"epoch": 1.061576354679803,
"grad_norm": 15.816554295123568,
"learning_rate": 8.16919589407154e-06,
"loss": 1.521295189857483,
"step": 862
},
{
"epoch": 1.062807881773399,
"grad_norm": 10.07588615463877,
"learning_rate": 8.163649959650983e-06,
"loss": 1.790357232093811,
"step": 863
},
{
"epoch": 1.064039408866995,
"grad_norm": 12.92318973646725,
"learning_rate": 8.1580975268221e-06,
"loss": 1.602294683456421,
"step": 864
},
{
"epoch": 1.0652709359605912,
"grad_norm": 16.86268483373184,
"learning_rate": 8.152538606990065e-06,
"loss": 1.4220796823501587,
"step": 865
},
{
"epoch": 1.0665024630541873,
"grad_norm": 8.194415784575718,
"learning_rate": 8.146973211573378e-06,
"loss": 1.5728261470794678,
"step": 866
},
{
"epoch": 1.0677339901477834,
"grad_norm": 9.338981810977407,
"learning_rate": 8.141401352003834e-06,
"loss": 1.4759845733642578,
"step": 867
},
{
"epoch": 1.0689655172413792,
"grad_norm": 13.09579029321424,
"learning_rate": 8.135823039726513e-06,
"loss": 1.0524405241012573,
"step": 868
},
{
"epoch": 1.0701970443349753,
"grad_norm": 11.844876838448121,
"learning_rate": 8.130238286199747e-06,
"loss": 1.538460373878479,
"step": 869
},
{
"epoch": 1.0714285714285714,
"grad_norm": 14.772231246122598,
"learning_rate": 8.124647102895098e-06,
"loss": 1.1455146074295044,
"step": 870
},
{
"epoch": 1.0726600985221675,
"grad_norm": 6.428068633502984,
"learning_rate": 8.119049501297336e-06,
"loss": 1.5209722518920898,
"step": 871
},
{
"epoch": 1.0738916256157636,
"grad_norm": 8.28556104097166,
"learning_rate": 8.113445492904416e-06,
"loss": 1.359959602355957,
"step": 872
},
{
"epoch": 1.0751231527093597,
"grad_norm": 17.73488508571987,
"learning_rate": 8.107835089227446e-06,
"loss": 0.7508935928344727,
"step": 873
},
{
"epoch": 1.0763546798029557,
"grad_norm": 11.851747710913228,
"learning_rate": 8.102218301790686e-06,
"loss": 1.1200660467147827,
"step": 874
},
{
"epoch": 1.0775862068965518,
"grad_norm": 19.474238137735632,
"learning_rate": 8.096595142131491e-06,
"loss": 1.4502555131912231,
"step": 875
},
{
"epoch": 1.0788177339901477,
"grad_norm": 15.231876740076657,
"learning_rate": 8.090965621800317e-06,
"loss": 1.4533472061157227,
"step": 876
},
{
"epoch": 1.0800492610837438,
"grad_norm": 11.532100577512736,
"learning_rate": 8.085329752360683e-06,
"loss": 1.3467981815338135,
"step": 877
},
{
"epoch": 1.0812807881773399,
"grad_norm": 13.292362259628844,
"learning_rate": 8.079687545389144e-06,
"loss": 1.5720915794372559,
"step": 878
},
{
"epoch": 1.082512315270936,
"grad_norm": 9.912980730028881,
"learning_rate": 8.074039012475277e-06,
"loss": 0.9794504642486572,
"step": 879
},
{
"epoch": 1.083743842364532,
"grad_norm": 13.363222552608596,
"learning_rate": 8.068384165221657e-06,
"loss": 1.8581080436706543,
"step": 880
},
{
"epoch": 1.0849753694581281,
"grad_norm": 11.004102766432679,
"learning_rate": 8.062723015243821e-06,
"loss": 1.5307658910751343,
"step": 881
},
{
"epoch": 1.0862068965517242,
"grad_norm": 18.014628524050508,
"learning_rate": 8.05705557417026e-06,
"loss": 2.7890782356262207,
"step": 882
},
{
"epoch": 1.0874384236453203,
"grad_norm": 14.288061386453462,
"learning_rate": 8.051381853642385e-06,
"loss": 1.7938904762268066,
"step": 883
},
{
"epoch": 1.0886699507389164,
"grad_norm": 10.969422494881371,
"learning_rate": 8.0457018653145e-06,
"loss": 1.7228388786315918,
"step": 884
},
{
"epoch": 1.0899014778325122,
"grad_norm": 12.323796763628843,
"learning_rate": 8.04001562085379e-06,
"loss": 1.2761911153793335,
"step": 885
},
{
"epoch": 1.0911330049261083,
"grad_norm": 14.027385869484647,
"learning_rate": 8.034323131940288e-06,
"loss": 1.2001762390136719,
"step": 886
},
{
"epoch": 1.0923645320197044,
"grad_norm": 14.618738176876956,
"learning_rate": 8.028624410266856e-06,
"loss": 1.0602792501449585,
"step": 887
},
{
"epoch": 1.0935960591133005,
"grad_norm": 11.93157233511751,
"learning_rate": 8.022919467539157e-06,
"loss": 1.6093053817749023,
"step": 888
},
{
"epoch": 1.0948275862068966,
"grad_norm": 10.808992515441345,
"learning_rate": 8.017208315475633e-06,
"loss": 1.3845837116241455,
"step": 889
},
{
"epoch": 1.0960591133004927,
"grad_norm": 12.467752533525676,
"learning_rate": 8.011490965807479e-06,
"loss": 1.170523762702942,
"step": 890
},
{
"epoch": 1.0972906403940887,
"grad_norm": 17.336013797078692,
"learning_rate": 8.005767430278619e-06,
"loss": 2.2524640560150146,
"step": 891
},
{
"epoch": 1.0985221674876848,
"grad_norm": 15.86628802074285,
"learning_rate": 8.00003772064569e-06,
"loss": 1.900492787361145,
"step": 892
},
{
"epoch": 1.0997536945812807,
"grad_norm": 19.413325130840665,
"learning_rate": 7.994301848678006e-06,
"loss": 1.9371180534362793,
"step": 893
},
{
"epoch": 1.1009852216748768,
"grad_norm": 4.577148785717797,
"learning_rate": 7.98855982615754e-06,
"loss": 0.5737314224243164,
"step": 894
},
{
"epoch": 1.1022167487684729,
"grad_norm": 10.864604119199031,
"learning_rate": 7.982811664878897e-06,
"loss": 1.9806501865386963,
"step": 895
},
{
"epoch": 1.103448275862069,
"grad_norm": 8.224536911257772,
"learning_rate": 7.977057376649295e-06,
"loss": 1.0362755060195923,
"step": 896
},
{
"epoch": 1.104679802955665,
"grad_norm": 13.847190655637428,
"learning_rate": 7.971296973288534e-06,
"loss": 1.70633864402771,
"step": 897
},
{
"epoch": 1.1059113300492611,
"grad_norm": 11.90483842365472,
"learning_rate": 7.965530466628977e-06,
"loss": 1.787100911140442,
"step": 898
},
{
"epoch": 1.1071428571428572,
"grad_norm": 7.493522717607931,
"learning_rate": 7.959757868515526e-06,
"loss": 1.725630283355713,
"step": 899
},
{
"epoch": 1.1083743842364533,
"grad_norm": 12.386314393672189,
"learning_rate": 7.953979190805587e-06,
"loss": 1.216347575187683,
"step": 900
},
{
"epoch": 1.1096059113300494,
"grad_norm": 13.629660364524488,
"learning_rate": 7.948194445369065e-06,
"loss": 1.4683033227920532,
"step": 901
},
{
"epoch": 1.1108374384236452,
"grad_norm": 9.487923792239608,
"learning_rate": 7.942403644088319e-06,
"loss": 1.1516010761260986,
"step": 902
},
{
"epoch": 1.1120689655172413,
"grad_norm": 10.340810165841779,
"learning_rate": 7.936606798858154e-06,
"loss": 1.9040346145629883,
"step": 903
},
{
"epoch": 1.1133004926108374,
"grad_norm": 10.742162155829218,
"learning_rate": 7.930803921585787e-06,
"loss": 1.3092480897903442,
"step": 904
},
{
"epoch": 1.1145320197044335,
"grad_norm": 16.471340717748625,
"learning_rate": 7.924995024190825e-06,
"loss": 1.5384130477905273,
"step": 905
},
{
"epoch": 1.1157635467980296,
"grad_norm": 11.414793353837775,
"learning_rate": 7.91918011860524e-06,
"loss": 1.537634015083313,
"step": 906
},
{
"epoch": 1.1169950738916257,
"grad_norm": 12.176064899819426,
"learning_rate": 7.91335921677335e-06,
"loss": 1.7487473487854004,
"step": 907
},
{
"epoch": 1.1182266009852218,
"grad_norm": 12.781345279460623,
"learning_rate": 7.907532330651784e-06,
"loss": 2.079786539077759,
"step": 908
},
{
"epoch": 1.1194581280788178,
"grad_norm": 10.30058954805613,
"learning_rate": 7.901699472209467e-06,
"loss": 1.8143104314804077,
"step": 909
},
{
"epoch": 1.1206896551724137,
"grad_norm": 15.820572235657158,
"learning_rate": 7.89586065342759e-06,
"loss": 1.532914161682129,
"step": 910
},
{
"epoch": 1.1219211822660098,
"grad_norm": 26.078680608781927,
"learning_rate": 7.890015886299587e-06,
"loss": 1.2643623352050781,
"step": 911
},
{
"epoch": 1.1231527093596059,
"grad_norm": 15.92927259283418,
"learning_rate": 7.884165182831112e-06,
"loss": 1.9245643615722656,
"step": 912
},
{
"epoch": 1.124384236453202,
"grad_norm": 8.730585299979154,
"learning_rate": 7.878308555040012e-06,
"loss": 1.7177766561508179,
"step": 913
},
{
"epoch": 1.125615763546798,
"grad_norm": 13.722962990198047,
"learning_rate": 7.872446014956302e-06,
"loss": 1.8152745962142944,
"step": 914
},
{
"epoch": 1.1268472906403941,
"grad_norm": 12.040054937289696,
"learning_rate": 7.86657757462214e-06,
"loss": 1.1599400043487549,
"step": 915
},
{
"epoch": 1.1280788177339902,
"grad_norm": 17.03991328119548,
"learning_rate": 7.860703246091808e-06,
"loss": 2.191415786743164,
"step": 916
},
{
"epoch": 1.1293103448275863,
"grad_norm": 8.884816055359531,
"learning_rate": 7.85482304143168e-06,
"loss": 1.395401120185852,
"step": 917
},
{
"epoch": 1.1305418719211824,
"grad_norm": 10.016142876641439,
"learning_rate": 7.848936972720203e-06,
"loss": 1.3161064386367798,
"step": 918
},
{
"epoch": 1.1317733990147782,
"grad_norm": 10.950651931490869,
"learning_rate": 7.843045052047863e-06,
"loss": 1.1442368030548096,
"step": 919
},
{
"epoch": 1.1330049261083743,
"grad_norm": 11.684566217639523,
"learning_rate": 7.837147291517172e-06,
"loss": 1.7718126773834229,
"step": 920
},
{
"epoch": 1.1342364532019704,
"grad_norm": 38.19632435773612,
"learning_rate": 7.831243703242636e-06,
"loss": 0.8722761869430542,
"step": 921
},
{
"epoch": 1.1354679802955665,
"grad_norm": 13.481663274756508,
"learning_rate": 7.825334299350733e-06,
"loss": 1.5427806377410889,
"step": 922
},
{
"epoch": 1.1366995073891626,
"grad_norm": 12.916623808621747,
"learning_rate": 7.819419091979884e-06,
"loss": 1.1668936014175415,
"step": 923
},
{
"epoch": 1.1379310344827587,
"grad_norm": 33.988394562573184,
"learning_rate": 7.813498093280432e-06,
"loss": 1.1266424655914307,
"step": 924
},
{
"epoch": 1.1391625615763548,
"grad_norm": 12.20456485780647,
"learning_rate": 7.807571315414616e-06,
"loss": 1.493699550628662,
"step": 925
},
{
"epoch": 1.1403940886699506,
"grad_norm": 11.501099824006364,
"learning_rate": 7.801638770556547e-06,
"loss": 1.6297705173492432,
"step": 926
},
{
"epoch": 1.1416256157635467,
"grad_norm": 15.624448888450939,
"learning_rate": 7.795700470892177e-06,
"loss": 2.0215024948120117,
"step": 927
},
{
"epoch": 1.1428571428571428,
"grad_norm": 16.250949070025708,
"learning_rate": 7.78975642861929e-06,
"loss": 1.6887433528900146,
"step": 928
},
{
"epoch": 1.1440886699507389,
"grad_norm": 11.317008900299918,
"learning_rate": 7.783806655947454e-06,
"loss": 1.3021103143692017,
"step": 929
},
{
"epoch": 1.145320197044335,
"grad_norm": 18.00432398689311,
"learning_rate": 7.777851165098012e-06,
"loss": 1.2565847635269165,
"step": 930
},
{
"epoch": 1.146551724137931,
"grad_norm": 12.425268826770786,
"learning_rate": 7.771889968304054e-06,
"loss": 2.616732358932495,
"step": 931
},
{
"epoch": 1.1477832512315271,
"grad_norm": 8.224670550968264,
"learning_rate": 7.765923077810389e-06,
"loss": 1.4130675792694092,
"step": 932
},
{
"epoch": 1.1490147783251232,
"grad_norm": 10.969684493935905,
"learning_rate": 7.759950505873523e-06,
"loss": 1.4476386308670044,
"step": 933
},
{
"epoch": 1.1502463054187193,
"grad_norm": 11.651048950094761,
"learning_rate": 7.753972264761629e-06,
"loss": 2.25156307220459,
"step": 934
},
{
"epoch": 1.1514778325123154,
"grad_norm": 8.613574530576384,
"learning_rate": 7.747988366754529e-06,
"loss": 1.5051602125167847,
"step": 935
},
{
"epoch": 1.1527093596059113,
"grad_norm": 7.732488282674765,
"learning_rate": 7.74199882414366e-06,
"loss": 1.6275739669799805,
"step": 936
},
{
"epoch": 1.1539408866995073,
"grad_norm": 8.808852629450387,
"learning_rate": 7.736003649232058e-06,
"loss": 1.595947504043579,
"step": 937
},
{
"epoch": 1.1551724137931034,
"grad_norm": 9.458208308368622,
"learning_rate": 7.730002854334328e-06,
"loss": 1.4467124938964844,
"step": 938
},
{
"epoch": 1.1564039408866995,
"grad_norm": 9.214195809195965,
"learning_rate": 7.723996451776615e-06,
"loss": 1.2888911962509155,
"step": 939
},
{
"epoch": 1.1576354679802956,
"grad_norm": 9.788392349003187,
"learning_rate": 7.717984453896585e-06,
"loss": 1.2005081176757812,
"step": 940
},
{
"epoch": 1.1588669950738917,
"grad_norm": 13.47176609715776,
"learning_rate": 7.711966873043396e-06,
"loss": 1.5737872123718262,
"step": 941
},
{
"epoch": 1.1600985221674878,
"grad_norm": 14.995704151739991,
"learning_rate": 7.705943721577679e-06,
"loss": 1.929309368133545,
"step": 942
},
{
"epoch": 1.1613300492610836,
"grad_norm": 17.48600802078703,
"learning_rate": 7.699915011871502e-06,
"loss": 1.2395710945129395,
"step": 943
},
{
"epoch": 1.1625615763546797,
"grad_norm": 17.02963003158409,
"learning_rate": 7.693880756308349e-06,
"loss": 1.5058845281600952,
"step": 944
},
{
"epoch": 1.1637931034482758,
"grad_norm": 9.980347268918823,
"learning_rate": 7.687840967283102e-06,
"loss": 1.1811325550079346,
"step": 945
},
{
"epoch": 1.1650246305418719,
"grad_norm": 10.638678008803145,
"learning_rate": 7.681795657202004e-06,
"loss": 1.0631262063980103,
"step": 946
},
{
"epoch": 1.166256157635468,
"grad_norm": 13.280226823401785,
"learning_rate": 7.675744838482641e-06,
"loss": 1.8445112705230713,
"step": 947
},
{
"epoch": 1.167487684729064,
"grad_norm": 14.581956189852988,
"learning_rate": 7.669688523553913e-06,
"loss": 0.4735199511051178,
"step": 948
},
{
"epoch": 1.1687192118226601,
"grad_norm": 17.412681962110952,
"learning_rate": 7.66362672485601e-06,
"loss": 2.7862026691436768,
"step": 949
},
{
"epoch": 1.1699507389162562,
"grad_norm": 15.996981867868751,
"learning_rate": 7.657559454840386e-06,
"loss": 2.1690142154693604,
"step": 950
},
{
"epoch": 1.1711822660098523,
"grad_norm": 13.46492564795987,
"learning_rate": 7.651486725969736e-06,
"loss": 1.7143161296844482,
"step": 951
},
{
"epoch": 1.1724137931034484,
"grad_norm": 60.546763405202356,
"learning_rate": 7.645408550717966e-06,
"loss": 1.5288606882095337,
"step": 952
},
{
"epoch": 1.1736453201970443,
"grad_norm": 20.830833617022666,
"learning_rate": 7.639324941570165e-06,
"loss": 1.8929002285003662,
"step": 953
},
{
"epoch": 1.1748768472906403,
"grad_norm": 11.758979912185547,
"learning_rate": 7.633235911022592e-06,
"loss": 1.5853391885757446,
"step": 954
},
{
"epoch": 1.1761083743842364,
"grad_norm": 9.321138258104417,
"learning_rate": 7.627141471582635e-06,
"loss": 1.1136324405670166,
"step": 955
},
{
"epoch": 1.1773399014778325,
"grad_norm": 12.598497007373025,
"learning_rate": 7.6210416357687975e-06,
"loss": 1.868667721748352,
"step": 956
},
{
"epoch": 1.1785714285714286,
"grad_norm": 18.119098704002848,
"learning_rate": 7.614936416110668e-06,
"loss": 1.5594688653945923,
"step": 957
},
{
"epoch": 1.1798029556650247,
"grad_norm": 12.510268205050629,
"learning_rate": 7.6088258251488845e-06,
"loss": 2.3145830631256104,
"step": 958
},
{
"epoch": 1.1810344827586208,
"grad_norm": 21.45877658729593,
"learning_rate": 7.6027098754351306e-06,
"loss": 1.1473604440689087,
"step": 959
},
{
"epoch": 1.1822660098522166,
"grad_norm": 14.411977842812997,
"learning_rate": 7.596588579532087e-06,
"loss": 2.2835638523101807,
"step": 960
},
{
"epoch": 1.1834975369458127,
"grad_norm": 10.612962818159787,
"learning_rate": 7.590461950013424e-06,
"loss": 1.8787577152252197,
"step": 961
},
{
"epoch": 1.1847290640394088,
"grad_norm": 14.448843378652771,
"learning_rate": 7.584329999463763e-06,
"loss": 2.114804983139038,
"step": 962
},
{
"epoch": 1.185960591133005,
"grad_norm": 18.66312529631292,
"learning_rate": 7.578192740478656e-06,
"loss": 1.288927435874939,
"step": 963
},
{
"epoch": 1.187192118226601,
"grad_norm": 13.413800953526167,
"learning_rate": 7.572050185664558e-06,
"loss": 1.929607629776001,
"step": 964
},
{
"epoch": 1.188423645320197,
"grad_norm": 33.30553598268168,
"learning_rate": 7.565902347638806e-06,
"loss": 0.5397343039512634,
"step": 965
},
{
"epoch": 1.1896551724137931,
"grad_norm": 22.357001178408265,
"learning_rate": 7.559749239029584e-06,
"loss": 1.1908174753189087,
"step": 966
},
{
"epoch": 1.1908866995073892,
"grad_norm": 12.645033432851402,
"learning_rate": 7.553590872475909e-06,
"loss": 1.624518632888794,
"step": 967
},
{
"epoch": 1.1921182266009853,
"grad_norm": 7.88579724345472,
"learning_rate": 7.547427260627586e-06,
"loss": 1.3011376857757568,
"step": 968
},
{
"epoch": 1.1933497536945814,
"grad_norm": 12.668296763355277,
"learning_rate": 7.541258416145212e-06,
"loss": 1.2930490970611572,
"step": 969
},
{
"epoch": 1.1945812807881773,
"grad_norm": 13.656364437533624,
"learning_rate": 7.535084351700117e-06,
"loss": 1.34272038936615,
"step": 970
},
{
"epoch": 1.1958128078817734,
"grad_norm": 7.953764967047039,
"learning_rate": 7.528905079974358e-06,
"loss": 1.2804269790649414,
"step": 971
},
{
"epoch": 1.1970443349753694,
"grad_norm": 30.30009152991955,
"learning_rate": 7.522720613660691e-06,
"loss": 1.7138396501541138,
"step": 972
},
{
"epoch": 1.1982758620689655,
"grad_norm": 11.304720421109014,
"learning_rate": 7.5165309654625405e-06,
"loss": 1.7358574867248535,
"step": 973
},
{
"epoch": 1.1995073891625616,
"grad_norm": 12.764936977199811,
"learning_rate": 7.510336148093975e-06,
"loss": 1.0514552593231201,
"step": 974
},
{
"epoch": 1.2007389162561577,
"grad_norm": 13.712017805285841,
"learning_rate": 7.504136174279679e-06,
"loss": 1.7314313650131226,
"step": 975
},
{
"epoch": 1.2019704433497538,
"grad_norm": 10.549295388514395,
"learning_rate": 7.4979310567549315e-06,
"loss": 1.0069202184677124,
"step": 976
},
{
"epoch": 1.2032019704433496,
"grad_norm": 11.995004609846932,
"learning_rate": 7.491720808265576e-06,
"loss": 1.1851680278778076,
"step": 977
},
{
"epoch": 1.2044334975369457,
"grad_norm": 9.145447142909285,
"learning_rate": 7.485505441567995e-06,
"loss": 1.355776309967041,
"step": 978
},
{
"epoch": 1.2056650246305418,
"grad_norm": 12.426586307445273,
"learning_rate": 7.4792849694290846e-06,
"loss": 1.5034677982330322,
"step": 979
},
{
"epoch": 1.206896551724138,
"grad_norm": 10.349726791509415,
"learning_rate": 7.473059404626229e-06,
"loss": 1.9321900606155396,
"step": 980
},
{
"epoch": 1.208128078817734,
"grad_norm": 15.998756607416226,
"learning_rate": 7.466828759947271e-06,
"loss": 1.4899095296859741,
"step": 981
},
{
"epoch": 1.20935960591133,
"grad_norm": 9.148483453369403,
"learning_rate": 7.46059304819049e-06,
"loss": 1.9984737634658813,
"step": 982
},
{
"epoch": 1.2105911330049262,
"grad_norm": 14.110455851158502,
"learning_rate": 7.454352282164572e-06,
"loss": 1.7756625413894653,
"step": 983
},
{
"epoch": 1.2118226600985222,
"grad_norm": 14.856359846911952,
"learning_rate": 7.448106474688588e-06,
"loss": 1.47117018699646,
"step": 984
},
{
"epoch": 1.2130541871921183,
"grad_norm": 11.010014718420686,
"learning_rate": 7.441855638591958e-06,
"loss": 1.3485603332519531,
"step": 985
},
{
"epoch": 1.2142857142857142,
"grad_norm": 9.111669104291623,
"learning_rate": 7.435599786714438e-06,
"loss": 1.3982055187225342,
"step": 986
},
{
"epoch": 1.2155172413793103,
"grad_norm": 8.494506145789243,
"learning_rate": 7.429338931906085e-06,
"loss": 1.4942795038223267,
"step": 987
},
{
"epoch": 1.2167487684729064,
"grad_norm": 10.475857134873458,
"learning_rate": 7.423073087027228e-06,
"loss": 2.227587938308716,
"step": 988
},
{
"epoch": 1.2179802955665024,
"grad_norm": 14.131512244457296,
"learning_rate": 7.416802264948455e-06,
"loss": 1.523234486579895,
"step": 989
},
{
"epoch": 1.2192118226600985,
"grad_norm": 26.011485441346537,
"learning_rate": 7.410526478550568e-06,
"loss": 3.9873814582824707,
"step": 990
},
{
"epoch": 1.2204433497536946,
"grad_norm": 8.306933788704631,
"learning_rate": 7.404245740724573e-06,
"loss": 1.279615044593811,
"step": 991
},
{
"epoch": 1.2216748768472907,
"grad_norm": 9.109406755351628,
"learning_rate": 7.3979600643716435e-06,
"loss": 0.9347010850906372,
"step": 992
},
{
"epoch": 1.2229064039408868,
"grad_norm": 8.57513677802596,
"learning_rate": 7.391669462403096e-06,
"loss": 1.9017002582550049,
"step": 993
},
{
"epoch": 1.2241379310344827,
"grad_norm": 10.325069084719962,
"learning_rate": 7.385373947740369e-06,
"loss": 1.7247897386550903,
"step": 994
},
{
"epoch": 1.2253694581280787,
"grad_norm": 13.648497855444653,
"learning_rate": 7.379073533314988e-06,
"loss": 0.7111251950263977,
"step": 995
},
{
"epoch": 1.2266009852216748,
"grad_norm": 10.812707758109589,
"learning_rate": 7.372768232068544e-06,
"loss": 0.9086591601371765,
"step": 996
},
{
"epoch": 1.227832512315271,
"grad_norm": 11.1413160950967,
"learning_rate": 7.366458056952668e-06,
"loss": 1.6426423788070679,
"step": 997
},
{
"epoch": 1.229064039408867,
"grad_norm": 19.358982299314505,
"learning_rate": 7.360143020929e-06,
"loss": 1.2501566410064697,
"step": 998
},
{
"epoch": 1.230295566502463,
"grad_norm": 15.35154457763416,
"learning_rate": 7.353823136969167e-06,
"loss": 2.263824939727783,
"step": 999
},
{
"epoch": 1.2315270935960592,
"grad_norm": 15.502037939673096,
"learning_rate": 7.34749841805475e-06,
"loss": 1.3503868579864502,
"step": 1000
},
{
"epoch": 1.2327586206896552,
"grad_norm": 12.387685564521446,
"learning_rate": 7.341168877177267e-06,
"loss": 1.2844277620315552,
"step": 1001
},
{
"epoch": 1.2339901477832513,
"grad_norm": 21.028406448646585,
"learning_rate": 7.3348345273381365e-06,
"loss": 1.823725700378418,
"step": 1002
},
{
"epoch": 1.2352216748768472,
"grad_norm": 12.53431965462443,
"learning_rate": 7.328495381548655e-06,
"loss": 1.8349339962005615,
"step": 1003
},
{
"epoch": 1.2364532019704433,
"grad_norm": 11.75012181314542,
"learning_rate": 7.322151452829972e-06,
"loss": 1.431024432182312,
"step": 1004
},
{
"epoch": 1.2376847290640394,
"grad_norm": 7.268447687614364,
"learning_rate": 7.315802754213062e-06,
"loss": 0.8406596183776855,
"step": 1005
},
{
"epoch": 1.2389162561576355,
"grad_norm": 16.476664169610704,
"learning_rate": 7.309449298738696e-06,
"loss": 1.7037804126739502,
"step": 1006
},
{
"epoch": 1.2401477832512315,
"grad_norm": 10.719400575974607,
"learning_rate": 7.303091099457418e-06,
"loss": 1.4264461994171143,
"step": 1007
},
{
"epoch": 1.2413793103448276,
"grad_norm": 11.634717084876037,
"learning_rate": 7.296728169429511e-06,
"loss": 2.502678632736206,
"step": 1008
},
{
"epoch": 1.2426108374384237,
"grad_norm": 9.436373278027489,
"learning_rate": 7.290360521724984e-06,
"loss": 1.5582114458084106,
"step": 1009
},
{
"epoch": 1.2438423645320198,
"grad_norm": 10.373164591549747,
"learning_rate": 7.283988169423526e-06,
"loss": 1.494875192642212,
"step": 1010
},
{
"epoch": 1.2450738916256157,
"grad_norm": 13.031187040858585,
"learning_rate": 7.277611125614499e-06,
"loss": 1.886913776397705,
"step": 1011
},
{
"epoch": 1.2463054187192117,
"grad_norm": 19.92471933345498,
"learning_rate": 7.271229403396896e-06,
"loss": 1.8913657665252686,
"step": 1012
},
{
"epoch": 1.2475369458128078,
"grad_norm": 21.8856932814209,
"learning_rate": 7.264843015879321e-06,
"loss": 1.1614234447479248,
"step": 1013
},
{
"epoch": 1.248768472906404,
"grad_norm": 11.581317439717322,
"learning_rate": 7.258451976179967e-06,
"loss": 1.6838147640228271,
"step": 1014
},
{
"epoch": 1.25,
"grad_norm": 14.274704649607155,
"learning_rate": 7.25205629742657e-06,
"loss": 1.1039239168167114,
"step": 1015
},
{
"epoch": 1.251231527093596,
"grad_norm": 10.222730157124893,
"learning_rate": 7.245655992756406e-06,
"loss": 1.519346833229065,
"step": 1016
},
{
"epoch": 1.2524630541871922,
"grad_norm": 8.325249693832719,
"learning_rate": 7.2392510753162516e-06,
"loss": 1.0175197124481201,
"step": 1017
},
{
"epoch": 1.2536945812807883,
"grad_norm": 12.766382857494223,
"learning_rate": 7.232841558262354e-06,
"loss": 0.9778202772140503,
"step": 1018
},
{
"epoch": 1.2549261083743843,
"grad_norm": 17.499343558391605,
"learning_rate": 7.226427454760412e-06,
"loss": 1.8379024267196655,
"step": 1019
},
{
"epoch": 1.2561576354679804,
"grad_norm": 11.150234617545141,
"learning_rate": 7.2200087779855435e-06,
"loss": 1.8412721157073975,
"step": 1020
},
{
"epoch": 1.2573891625615763,
"grad_norm": 8.992400726896724,
"learning_rate": 7.213585541122261e-06,
"loss": 1.8508501052856445,
"step": 1021
},
{
"epoch": 1.2586206896551724,
"grad_norm": 12.44309006439825,
"learning_rate": 7.207157757364445e-06,
"loss": 1.3070871829986572,
"step": 1022
},
{
"epoch": 1.2598522167487685,
"grad_norm": 12.840031276685824,
"learning_rate": 7.200725439915314e-06,
"loss": 2.1278223991394043,
"step": 1023
},
{
"epoch": 1.2610837438423645,
"grad_norm": 8.633495704921142,
"learning_rate": 7.194288601987398e-06,
"loss": 1.0636892318725586,
"step": 1024
},
{
"epoch": 1.2623152709359606,
"grad_norm": 10.874767223460788,
"learning_rate": 7.187847256802518e-06,
"loss": 1.7365200519561768,
"step": 1025
},
{
"epoch": 1.2635467980295567,
"grad_norm": 12.21472476387578,
"learning_rate": 7.181401417591746e-06,
"loss": 1.792116403579712,
"step": 1026
},
{
"epoch": 1.2647783251231526,
"grad_norm": 8.787411821208611,
"learning_rate": 7.174951097595389e-06,
"loss": 1.3348667621612549,
"step": 1027
},
{
"epoch": 1.2660098522167487,
"grad_norm": 17.72872801553084,
"learning_rate": 7.168496310062959e-06,
"loss": 1.677919626235962,
"step": 1028
},
{
"epoch": 1.2672413793103448,
"grad_norm": 13.283913596324016,
"learning_rate": 7.162037068253141e-06,
"loss": 1.1518199443817139,
"step": 1029
},
{
"epoch": 1.2684729064039408,
"grad_norm": 7.98681967422814,
"learning_rate": 7.155573385433772e-06,
"loss": 2.1126716136932373,
"step": 1030
},
{
"epoch": 1.269704433497537,
"grad_norm": 11.20695829302969,
"learning_rate": 7.149105274881815e-06,
"loss": 1.3222094774246216,
"step": 1031
},
{
"epoch": 1.270935960591133,
"grad_norm": 9.408024877970139,
"learning_rate": 7.1426327498833174e-06,
"loss": 0.8843763470649719,
"step": 1032
},
{
"epoch": 1.272167487684729,
"grad_norm": 18.111033872908873,
"learning_rate": 7.136155823733405e-06,
"loss": 1.3091545104980469,
"step": 1033
},
{
"epoch": 1.2733990147783252,
"grad_norm": 11.598349915801498,
"learning_rate": 7.129674509736237e-06,
"loss": 1.4408364295959473,
"step": 1034
},
{
"epoch": 1.2746305418719213,
"grad_norm": 17.074081488403696,
"learning_rate": 7.12318882120499e-06,
"loss": 1.330906867980957,
"step": 1035
},
{
"epoch": 1.2758620689655173,
"grad_norm": 11.931439673872655,
"learning_rate": 7.116698771461825e-06,
"loss": 1.9561724662780762,
"step": 1036
},
{
"epoch": 1.2770935960591134,
"grad_norm": 14.506364150634404,
"learning_rate": 7.110204373837857e-06,
"loss": 2.185842275619507,
"step": 1037
},
{
"epoch": 1.2783251231527093,
"grad_norm": 8.783423067272876,
"learning_rate": 7.1037056416731395e-06,
"loss": 1.724360466003418,
"step": 1038
},
{
"epoch": 1.2795566502463054,
"grad_norm": 10.548795738669158,
"learning_rate": 7.097202588316625e-06,
"loss": 1.179841160774231,
"step": 1039
},
{
"epoch": 1.2807881773399015,
"grad_norm": 14.968187776502731,
"learning_rate": 7.090695227126141e-06,
"loss": 1.6783604621887207,
"step": 1040
},
{
"epoch": 1.2820197044334976,
"grad_norm": 10.70366989067169,
"learning_rate": 7.084183571468368e-06,
"loss": 1.761925220489502,
"step": 1041
},
{
"epoch": 1.2832512315270936,
"grad_norm": 12.9020971876039,
"learning_rate": 7.077667634718801e-06,
"loss": 0.9297729134559631,
"step": 1042
},
{
"epoch": 1.2844827586206897,
"grad_norm": 12.446847341840494,
"learning_rate": 7.071147430261738e-06,
"loss": 1.6091060638427734,
"step": 1043
},
{
"epoch": 1.2857142857142856,
"grad_norm": 8.238449521430923,
"learning_rate": 7.064622971490234e-06,
"loss": 1.280853509902954,
"step": 1044
},
{
"epoch": 1.2869458128078817,
"grad_norm": 10.190528956891907,
"learning_rate": 7.058094271806091e-06,
"loss": 2.4095635414123535,
"step": 1045
},
{
"epoch": 1.2881773399014778,
"grad_norm": 12.210698142217534,
"learning_rate": 7.051561344619814e-06,
"loss": 1.7969441413879395,
"step": 1046
},
{
"epoch": 1.2894088669950738,
"grad_norm": 8.968258930303262,
"learning_rate": 7.045024203350598e-06,
"loss": 2.4331698417663574,
"step": 1047
},
{
"epoch": 1.29064039408867,
"grad_norm": 9.034111830970843,
"learning_rate": 7.0384828614262905e-06,
"loss": 1.336733341217041,
"step": 1048
},
{
"epoch": 1.291871921182266,
"grad_norm": 9.358643506315515,
"learning_rate": 7.031937332283367e-06,
"loss": 1.2959213256835938,
"step": 1049
},
{
"epoch": 1.293103448275862,
"grad_norm": 15.177096960870495,
"learning_rate": 7.025387629366912e-06,
"loss": 1.0095289945602417,
"step": 1050
},
{
"epoch": 1.2943349753694582,
"grad_norm": 8.708668143059782,
"learning_rate": 7.018833766130571e-06,
"loss": 1.8314733505249023,
"step": 1051
},
{
"epoch": 1.2955665024630543,
"grad_norm": 12.10925693324793,
"learning_rate": 7.012275756036544e-06,
"loss": 1.121436595916748,
"step": 1052
},
{
"epoch": 1.2967980295566504,
"grad_norm": 20.569530418297486,
"learning_rate": 7.0057136125555456e-06,
"loss": 1.5652289390563965,
"step": 1053
},
{
"epoch": 1.2980295566502464,
"grad_norm": 14.018717429311812,
"learning_rate": 6.999147349166779e-06,
"loss": 1.1146215200424194,
"step": 1054
},
{
"epoch": 1.2992610837438423,
"grad_norm": 17.232932273490494,
"learning_rate": 6.9925769793579165e-06,
"loss": 2.400024175643921,
"step": 1055
},
{
"epoch": 1.3004926108374384,
"grad_norm": 11.12761938883381,
"learning_rate": 6.986002516625058e-06,
"loss": 1.7114648818969727,
"step": 1056
},
{
"epoch": 1.3017241379310345,
"grad_norm": 10.072038004871871,
"learning_rate": 6.979423974472714e-06,
"loss": 1.5338797569274902,
"step": 1057
},
{
"epoch": 1.3029556650246306,
"grad_norm": 8.812025010262357,
"learning_rate": 6.972841366413777e-06,
"loss": 1.078460931777954,
"step": 1058
},
{
"epoch": 1.3041871921182266,
"grad_norm": 11.356722343645167,
"learning_rate": 6.966254705969484e-06,
"loss": 1.5467915534973145,
"step": 1059
},
{
"epoch": 1.3054187192118227,
"grad_norm": 14.67705148794403,
"learning_rate": 6.959664006669404e-06,
"loss": 1.2715568542480469,
"step": 1060
},
{
"epoch": 1.3066502463054186,
"grad_norm": 8.890913561904203,
"learning_rate": 6.953069282051397e-06,
"loss": 1.887066125869751,
"step": 1061
},
{
"epoch": 1.3078817733990147,
"grad_norm": 10.182269397064065,
"learning_rate": 6.946470545661593e-06,
"loss": 1.419116497039795,
"step": 1062
},
{
"epoch": 1.3091133004926108,
"grad_norm": 8.361662711059678,
"learning_rate": 6.939867811054365e-06,
"loss": 1.3843079805374146,
"step": 1063
},
{
"epoch": 1.3103448275862069,
"grad_norm": 27.704350160970165,
"learning_rate": 6.9332610917922915e-06,
"loss": 2.5894885063171387,
"step": 1064
},
{
"epoch": 1.311576354679803,
"grad_norm": 16.17688431061018,
"learning_rate": 6.9266504014461425e-06,
"loss": 1.6600944995880127,
"step": 1065
},
{
"epoch": 1.312807881773399,
"grad_norm": 18.474330510936614,
"learning_rate": 6.920035753594845e-06,
"loss": 1.7698057889938354,
"step": 1066
},
{
"epoch": 1.314039408866995,
"grad_norm": 9.914676123570585,
"learning_rate": 6.913417161825449e-06,
"loss": 1.5610848665237427,
"step": 1067
},
{
"epoch": 1.3152709359605912,
"grad_norm": 8.489359998020161,
"learning_rate": 6.906794639733114e-06,
"loss": 1.6380643844604492,
"step": 1068
},
{
"epoch": 1.3165024630541873,
"grad_norm": 8.9532327938231,
"learning_rate": 6.900168200921065e-06,
"loss": 1.390014410018921,
"step": 1069
},
{
"epoch": 1.3177339901477834,
"grad_norm": 10.45013795003969,
"learning_rate": 6.893537859000576e-06,
"loss": 1.6589158773422241,
"step": 1070
},
{
"epoch": 1.3189655172413794,
"grad_norm": 12.436644147912617,
"learning_rate": 6.886903627590938e-06,
"loss": 1.5524673461914062,
"step": 1071
},
{
"epoch": 1.3201970443349753,
"grad_norm": 12.240484798983633,
"learning_rate": 6.880265520319434e-06,
"loss": 2.0204474925994873,
"step": 1072
},
{
"epoch": 1.3214285714285714,
"grad_norm": 10.928634620934101,
"learning_rate": 6.8736235508213024e-06,
"loss": 1.7947957515716553,
"step": 1073
},
{
"epoch": 1.3226600985221675,
"grad_norm": 12.192004015491179,
"learning_rate": 6.866977732739719e-06,
"loss": 1.6154756546020508,
"step": 1074
},
{
"epoch": 1.3238916256157636,
"grad_norm": 10.239608872921218,
"learning_rate": 6.860328079725764e-06,
"loss": 1.419677734375,
"step": 1075
},
{
"epoch": 1.3251231527093597,
"grad_norm": 11.490298083513249,
"learning_rate": 6.853674605438395e-06,
"loss": 2.2221052646636963,
"step": 1076
},
{
"epoch": 1.3263546798029557,
"grad_norm": 10.796599749157496,
"learning_rate": 6.84701732354442e-06,
"loss": 1.6474840641021729,
"step": 1077
},
{
"epoch": 1.3275862068965516,
"grad_norm": 16.05723789346112,
"learning_rate": 6.840356247718466e-06,
"loss": 2.035231828689575,
"step": 1078
},
{
"epoch": 1.3288177339901477,
"grad_norm": 12.127949373836048,
"learning_rate": 6.8336913916429515e-06,
"loss": 1.5675947666168213,
"step": 1079
},
{
"epoch": 1.3300492610837438,
"grad_norm": 12.561351822867852,
"learning_rate": 6.827022769008068e-06,
"loss": 1.2241394519805908,
"step": 1080
},
{
"epoch": 1.3312807881773399,
"grad_norm": 10.606640209072971,
"learning_rate": 6.820350393511732e-06,
"loss": 1.3507403135299683,
"step": 1081
},
{
"epoch": 1.332512315270936,
"grad_norm": 23.44696719245062,
"learning_rate": 6.81367427885958e-06,
"loss": 2.256551504135132,
"step": 1082
},
{
"epoch": 1.333743842364532,
"grad_norm": 17.90054749002111,
"learning_rate": 6.806994438764922e-06,
"loss": 1.6412163972854614,
"step": 1083
},
{
"epoch": 1.3349753694581281,
"grad_norm": 10.747816339677435,
"learning_rate": 6.8003108869487225e-06,
"loss": 1.500988483428955,
"step": 1084
},
{
"epoch": 1.3362068965517242,
"grad_norm": 8.86240548184895,
"learning_rate": 6.79362363713957e-06,
"loss": 1.4661070108413696,
"step": 1085
},
{
"epoch": 1.3374384236453203,
"grad_norm": 9.325455271074935,
"learning_rate": 6.786932703073648e-06,
"loss": 1.42755126953125,
"step": 1086
},
{
"epoch": 1.3386699507389164,
"grad_norm": 14.863538954404982,
"learning_rate": 6.780238098494711e-06,
"loss": 1.165806531906128,
"step": 1087
},
{
"epoch": 1.3399014778325122,
"grad_norm": 21.9332846077213,
"learning_rate": 6.773539837154051e-06,
"loss": 1.3795387744903564,
"step": 1088
},
{
"epoch": 1.3411330049261083,
"grad_norm": 15.064922882268542,
"learning_rate": 6.766837932810468e-06,
"loss": 1.3203850984573364,
"step": 1089
},
{
"epoch": 1.3423645320197044,
"grad_norm": 12.791071147567429,
"learning_rate": 6.7601323992302525e-06,
"loss": 1.645883321762085,
"step": 1090
},
{
"epoch": 1.3435960591133005,
"grad_norm": 8.072143933965927,
"learning_rate": 6.7534232501871425e-06,
"loss": 1.6904821395874023,
"step": 1091
},
{
"epoch": 1.3448275862068966,
"grad_norm": 8.711589751937055,
"learning_rate": 6.7467104994623066e-06,
"loss": 1.332162618637085,
"step": 1092
},
{
"epoch": 1.3460591133004927,
"grad_norm": 9.451447429997234,
"learning_rate": 6.7399941608443096e-06,
"loss": 1.4389145374298096,
"step": 1093
},
{
"epoch": 1.3472906403940887,
"grad_norm": 7.323937666452591,
"learning_rate": 6.733274248129089e-06,
"loss": 1.6597908735275269,
"step": 1094
},
{
"epoch": 1.3485221674876846,
"grad_norm": 17.883843051775344,
"learning_rate": 6.72655077511992e-06,
"loss": 0.9520257711410522,
"step": 1095
},
{
"epoch": 1.3497536945812807,
"grad_norm": 11.223594087909252,
"learning_rate": 6.719823755627393e-06,
"loss": 1.4488117694854736,
"step": 1096
},
{
"epoch": 1.3509852216748768,
"grad_norm": 7.977177991617555,
"learning_rate": 6.713093203469384e-06,
"loss": 1.5133984088897705,
"step": 1097
},
{
"epoch": 1.3522167487684729,
"grad_norm": 8.682066451366055,
"learning_rate": 6.7063591324710234e-06,
"loss": 1.846522569656372,
"step": 1098
},
{
"epoch": 1.353448275862069,
"grad_norm": 12.792486675857687,
"learning_rate": 6.6996215564646705e-06,
"loss": 0.9724826812744141,
"step": 1099
},
{
"epoch": 1.354679802955665,
"grad_norm": 11.989074062954435,
"learning_rate": 6.692880489289885e-06,
"loss": 1.24728262424469,
"step": 1100
},
{
"epoch": 1.3559113300492611,
"grad_norm": 22.734635359059652,
"learning_rate": 6.686135944793395e-06,
"loss": 1.5332872867584229,
"step": 1101
},
{
"epoch": 1.3571428571428572,
"grad_norm": 11.645074036110657,
"learning_rate": 6.679387936829076e-06,
"loss": 1.5978163480758667,
"step": 1102
},
{
"epoch": 1.3583743842364533,
"grad_norm": 9.223736434919791,
"learning_rate": 6.672636479257912e-06,
"loss": 2.05710506439209,
"step": 1103
},
{
"epoch": 1.3596059113300494,
"grad_norm": 11.48041589458668,
"learning_rate": 6.665881585947981e-06,
"loss": 1.667812466621399,
"step": 1104
},
{
"epoch": 1.3608374384236452,
"grad_norm": 18.141176793209265,
"learning_rate": 6.659123270774406e-06,
"loss": 1.3053381443023682,
"step": 1105
},
{
"epoch": 1.3620689655172413,
"grad_norm": 11.11014263526773,
"learning_rate": 6.652361547619352e-06,
"loss": 1.5228716135025024,
"step": 1106
},
{
"epoch": 1.3633004926108374,
"grad_norm": 11.869708221541034,
"learning_rate": 6.645596430371976e-06,
"loss": 1.3818378448486328,
"step": 1107
},
{
"epoch": 1.3645320197044335,
"grad_norm": 11.298030039811758,
"learning_rate": 6.6388279329284065e-06,
"loss": 1.217841386795044,
"step": 1108
},
{
"epoch": 1.3657635467980296,
"grad_norm": 21.11595250544298,
"learning_rate": 6.632056069191723e-06,
"loss": 1.4309210777282715,
"step": 1109
},
{
"epoch": 1.3669950738916257,
"grad_norm": 13.7021684816084,
"learning_rate": 6.6252808530719095e-06,
"loss": 1.3015059232711792,
"step": 1110
},
{
"epoch": 1.3682266009852218,
"grad_norm": 11.973457349226296,
"learning_rate": 6.618502298485844e-06,
"loss": 1.2734256982803345,
"step": 1111
},
{
"epoch": 1.3694581280788176,
"grad_norm": 15.830227785424638,
"learning_rate": 6.611720419357257e-06,
"loss": 1.907172441482544,
"step": 1112
},
{
"epoch": 1.3706896551724137,
"grad_norm": 10.756653422484252,
"learning_rate": 6.604935229616711e-06,
"loss": 1.1207606792449951,
"step": 1113
},
{
"epoch": 1.3719211822660098,
"grad_norm": 12.736281126843005,
"learning_rate": 6.598146743201568e-06,
"loss": 2.3231239318847656,
"step": 1114
},
{
"epoch": 1.3731527093596059,
"grad_norm": 11.597483205953116,
"learning_rate": 6.5913549740559606e-06,
"loss": 1.1395865678787231,
"step": 1115
},
{
"epoch": 1.374384236453202,
"grad_norm": 14.754486017260728,
"learning_rate": 6.584559936130763e-06,
"loss": 3.1981747150421143,
"step": 1116
},
{
"epoch": 1.375615763546798,
"grad_norm": 12.874438415282308,
"learning_rate": 6.57776164338357e-06,
"loss": 1.7495319843292236,
"step": 1117
},
{
"epoch": 1.3768472906403941,
"grad_norm": 12.611228408009778,
"learning_rate": 6.570960109778655e-06,
"loss": 1.3304778337478638,
"step": 1118
},
{
"epoch": 1.3780788177339902,
"grad_norm": 11.84441441686591,
"learning_rate": 6.564155349286952e-06,
"loss": 1.6510775089263916,
"step": 1119
},
{
"epoch": 1.3793103448275863,
"grad_norm": 13.996316648052032,
"learning_rate": 6.557347375886022e-06,
"loss": 1.3382967710494995,
"step": 1120
},
{
"epoch": 1.3805418719211824,
"grad_norm": 11.351524045305764,
"learning_rate": 6.550536203560028e-06,
"loss": 1.418992042541504,
"step": 1121
},
{
"epoch": 1.3817733990147782,
"grad_norm": 16.848897992260934,
"learning_rate": 6.543721846299701e-06,
"loss": 1.4815843105316162,
"step": 1122
},
{
"epoch": 1.3830049261083743,
"grad_norm": 13.42654012333122,
"learning_rate": 6.536904318102314e-06,
"loss": 0.9823303818702698,
"step": 1123
},
{
"epoch": 1.3842364532019704,
"grad_norm": 11.039715301984293,
"learning_rate": 6.530083632971658e-06,
"loss": 1.4959704875946045,
"step": 1124
},
{
"epoch": 1.3854679802955665,
"grad_norm": 13.499332863560449,
"learning_rate": 6.523259804918001e-06,
"loss": 1.3141142129898071,
"step": 1125
},
{
"epoch": 1.3866995073891626,
"grad_norm": 18.762617405218773,
"learning_rate": 6.516432847958074e-06,
"loss": 1.60225248336792,
"step": 1126
},
{
"epoch": 1.3879310344827587,
"grad_norm": 12.76800599324204,
"learning_rate": 6.509602776115029e-06,
"loss": 1.7774362564086914,
"step": 1127
},
{
"epoch": 1.3891625615763548,
"grad_norm": 14.80003777651342,
"learning_rate": 6.502769603418423e-06,
"loss": 1.3750693798065186,
"step": 1128
},
{
"epoch": 1.3903940886699506,
"grad_norm": 12.846839874270263,
"learning_rate": 6.4959333439041775e-06,
"loss": 1.0850452184677124,
"step": 1129
},
{
"epoch": 1.3916256157635467,
"grad_norm": 17.175837709461415,
"learning_rate": 6.489094011614553e-06,
"loss": 1.7440909147262573,
"step": 1130
},
{
"epoch": 1.3928571428571428,
"grad_norm": 8.34120026588026,
"learning_rate": 6.482251620598129e-06,
"loss": 1.5904752016067505,
"step": 1131
},
{
"epoch": 1.3940886699507389,
"grad_norm": 10.398946422121055,
"learning_rate": 6.47540618490976e-06,
"loss": 1.4864649772644043,
"step": 1132
},
{
"epoch": 1.395320197044335,
"grad_norm": 16.449380414530893,
"learning_rate": 6.4685577186105595e-06,
"loss": 1.3869491815567017,
"step": 1133
},
{
"epoch": 1.396551724137931,
"grad_norm": 11.708541771363075,
"learning_rate": 6.461706235767866e-06,
"loss": 1.1635327339172363,
"step": 1134
},
{
"epoch": 1.3977832512315271,
"grad_norm": 6.616557203492817,
"learning_rate": 6.45485175045521e-06,
"loss": 1.4063032865524292,
"step": 1135
},
{
"epoch": 1.3990147783251232,
"grad_norm": 26.794737362449215,
"learning_rate": 6.447994276752293e-06,
"loss": 2.2259998321533203,
"step": 1136
},
{
"epoch": 1.4002463054187193,
"grad_norm": 10.511853223185177,
"learning_rate": 6.441133828744954e-06,
"loss": 1.2302110195159912,
"step": 1137
},
{
"epoch": 1.4014778325123154,
"grad_norm": 10.658533095355526,
"learning_rate": 6.434270420525144e-06,
"loss": 1.2579622268676758,
"step": 1138
},
{
"epoch": 1.4027093596059113,
"grad_norm": 18.972607390940905,
"learning_rate": 6.427404066190889e-06,
"loss": 1.6761397123336792,
"step": 1139
},
{
"epoch": 1.4039408866995073,
"grad_norm": 12.172946298049014,
"learning_rate": 6.4205347798462704e-06,
"loss": 1.3933346271514893,
"step": 1140
},
{
"epoch": 1.4051724137931034,
"grad_norm": 13.681043588339055,
"learning_rate": 6.413662575601391e-06,
"loss": 1.9914003610610962,
"step": 1141
},
{
"epoch": 1.4064039408866995,
"grad_norm": 16.934291210588032,
"learning_rate": 6.406787467572348e-06,
"loss": 1.9921746253967285,
"step": 1142
},
{
"epoch": 1.4076354679802956,
"grad_norm": 18.5006822922468,
"learning_rate": 6.3999094698812055e-06,
"loss": 1.6050479412078857,
"step": 1143
},
{
"epoch": 1.4088669950738917,
"grad_norm": 12.333046745730567,
"learning_rate": 6.393028596655958e-06,
"loss": 1.7796251773834229,
"step": 1144
},
{
"epoch": 1.4100985221674878,
"grad_norm": 18.731485023409682,
"learning_rate": 6.386144862030508e-06,
"loss": 1.7936886548995972,
"step": 1145
},
{
"epoch": 1.4113300492610836,
"grad_norm": 18.37593149730845,
"learning_rate": 6.37925828014464e-06,
"loss": 1.9030745029449463,
"step": 1146
},
{
"epoch": 1.4125615763546797,
"grad_norm": 11.93678536094984,
"learning_rate": 6.3723688651439806e-06,
"loss": 1.4446496963500977,
"step": 1147
},
{
"epoch": 1.4137931034482758,
"grad_norm": 13.469356839829612,
"learning_rate": 6.365476631179982e-06,
"loss": 1.5683763027191162,
"step": 1148
},
{
"epoch": 1.4150246305418719,
"grad_norm": 8.488203520402504,
"learning_rate": 6.358581592409881e-06,
"loss": 1.4594917297363281,
"step": 1149
},
{
"epoch": 1.416256157635468,
"grad_norm": 25.588676453436552,
"learning_rate": 6.351683762996681e-06,
"loss": 2.1706323623657227,
"step": 1150
},
{
"epoch": 1.417487684729064,
"grad_norm": 11.810343655960159,
"learning_rate": 6.344783157109114e-06,
"loss": 1.835425853729248,
"step": 1151
},
{
"epoch": 1.4187192118226601,
"grad_norm": 10.711102782202751,
"learning_rate": 6.337879788921615e-06,
"loss": 1.1789867877960205,
"step": 1152
},
{
"epoch": 1.4199507389162562,
"grad_norm": 28.404082710690172,
"learning_rate": 6.3309736726142965e-06,
"loss": 1.9750418663024902,
"step": 1153
},
{
"epoch": 1.4211822660098523,
"grad_norm": 14.02852797567233,
"learning_rate": 6.324064822372913e-06,
"loss": 1.4960027933120728,
"step": 1154
},
{
"epoch": 1.4224137931034484,
"grad_norm": 20.199397968799044,
"learning_rate": 6.317153252388834e-06,
"loss": 1.12904691696167,
"step": 1155
},
{
"epoch": 1.4236453201970443,
"grad_norm": 10.534543863605384,
"learning_rate": 6.31023897685902e-06,
"loss": 1.30333411693573,
"step": 1156
},
{
"epoch": 1.4248768472906403,
"grad_norm": 15.66714236524435,
"learning_rate": 6.303322009985984e-06,
"loss": 2.5257434844970703,
"step": 1157
},
{
"epoch": 1.4261083743842364,
"grad_norm": 18.065303617570866,
"learning_rate": 6.296402365977767e-06,
"loss": 0.9684423208236694,
"step": 1158
},
{
"epoch": 1.4273399014778325,
"grad_norm": 12.376925974972115,
"learning_rate": 6.289480059047915e-06,
"loss": 1.457876443862915,
"step": 1159
},
{
"epoch": 1.4285714285714286,
"grad_norm": 9.05985921030025,
"learning_rate": 6.282555103415438e-06,
"loss": 1.5206713676452637,
"step": 1160
},
{
"epoch": 1.4298029556650247,
"grad_norm": 14.712390356925216,
"learning_rate": 6.27562751330479e-06,
"loss": 1.680644154548645,
"step": 1161
},
{
"epoch": 1.4310344827586206,
"grad_norm": 9.786932196785434,
"learning_rate": 6.268697302945835e-06,
"loss": 1.3704997301101685,
"step": 1162
},
{
"epoch": 1.4322660098522166,
"grad_norm": 9.786888328650228,
"learning_rate": 6.261764486573816e-06,
"loss": 1.3250343799591064,
"step": 1163
},
{
"epoch": 1.4334975369458127,
"grad_norm": 15.544106160026582,
"learning_rate": 6.254829078429336e-06,
"loss": 1.8659427165985107,
"step": 1164
},
{
"epoch": 1.4347290640394088,
"grad_norm": 21.077430430000046,
"learning_rate": 6.247891092758319e-06,
"loss": 2.043597936630249,
"step": 1165
},
{
"epoch": 1.435960591133005,
"grad_norm": 12.476492579798414,
"learning_rate": 6.24095054381198e-06,
"loss": 1.5634403228759766,
"step": 1166
},
{
"epoch": 1.437192118226601,
"grad_norm": 11.790373846414154,
"learning_rate": 6.2340074458468014e-06,
"loss": 1.1179373264312744,
"step": 1167
},
{
"epoch": 1.438423645320197,
"grad_norm": 13.094422813370427,
"learning_rate": 6.227061813124504e-06,
"loss": 0.8013179302215576,
"step": 1168
},
{
"epoch": 1.4396551724137931,
"grad_norm": 9.010286032120458,
"learning_rate": 6.220113659912012e-06,
"loss": 1.3435392379760742,
"step": 1169
},
{
"epoch": 1.4408866995073892,
"grad_norm": 8.308881028265468,
"learning_rate": 6.213163000481428e-06,
"loss": 1.39387845993042,
"step": 1170
},
{
"epoch": 1.4421182266009853,
"grad_norm": 8.499060752632088,
"learning_rate": 6.206209849110001e-06,
"loss": 1.760462760925293,
"step": 1171
},
{
"epoch": 1.4433497536945814,
"grad_norm": 13.348998095152654,
"learning_rate": 6.1992542200801035e-06,
"loss": 1.0812432765960693,
"step": 1172
},
{
"epoch": 1.4445812807881773,
"grad_norm": 9.263056193047571,
"learning_rate": 6.1922961276791925e-06,
"loss": 1.7997616529464722,
"step": 1173
},
{
"epoch": 1.4458128078817734,
"grad_norm": 11.646405372699148,
"learning_rate": 6.1853355861997854e-06,
"loss": 1.773369550704956,
"step": 1174
},
{
"epoch": 1.4470443349753694,
"grad_norm": 8.442523087287304,
"learning_rate": 6.1783726099394324e-06,
"loss": 1.9488962888717651,
"step": 1175
},
{
"epoch": 1.4482758620689655,
"grad_norm": 13.332895782423902,
"learning_rate": 6.171407213200683e-06,
"loss": 1.6990149021148682,
"step": 1176
},
{
"epoch": 1.4495073891625616,
"grad_norm": 12.609637801512664,
"learning_rate": 6.164439410291061e-06,
"loss": 1.4307571649551392,
"step": 1177
},
{
"epoch": 1.4507389162561577,
"grad_norm": 8.885074358137231,
"learning_rate": 6.157469215523031e-06,
"loss": 1.3966443538665771,
"step": 1178
},
{
"epoch": 1.4519704433497536,
"grad_norm": 16.606696238854166,
"learning_rate": 6.150496643213969e-06,
"loss": 1.2959253787994385,
"step": 1179
},
{
"epoch": 1.4532019704433496,
"grad_norm": 16.898895754976742,
"learning_rate": 6.143521707686137e-06,
"loss": 1.4992142915725708,
"step": 1180
},
{
"epoch": 1.4544334975369457,
"grad_norm": 16.69245348652636,
"learning_rate": 6.136544423266651e-06,
"loss": 1.8196167945861816,
"step": 1181
},
{
"epoch": 1.4556650246305418,
"grad_norm": 16.12465629803321,
"learning_rate": 6.129564804287454e-06,
"loss": 1.4129021167755127,
"step": 1182
},
{
"epoch": 1.456896551724138,
"grad_norm": 15.4451290282442,
"learning_rate": 6.122582865085278e-06,
"loss": 1.2009403705596924,
"step": 1183
},
{
"epoch": 1.458128078817734,
"grad_norm": 12.682560791700617,
"learning_rate": 6.115598620001627e-06,
"loss": 1.698556661605835,
"step": 1184
},
{
"epoch": 1.45935960591133,
"grad_norm": 21.414952415899087,
"learning_rate": 6.108612083382739e-06,
"loss": 1.5819299221038818,
"step": 1185
},
{
"epoch": 1.4605911330049262,
"grad_norm": 10.708464197323055,
"learning_rate": 6.101623269579558e-06,
"loss": 1.374379277229309,
"step": 1186
},
{
"epoch": 1.4618226600985222,
"grad_norm": 10.541290993965774,
"learning_rate": 6.094632192947711e-06,
"loss": 1.2765707969665527,
"step": 1187
},
{
"epoch": 1.4630541871921183,
"grad_norm": 14.098976562454558,
"learning_rate": 6.087638867847465e-06,
"loss": 1.2740705013275146,
"step": 1188
},
{
"epoch": 1.4642857142857144,
"grad_norm": 11.154362665776958,
"learning_rate": 6.08064330864371e-06,
"loss": 1.6713453531265259,
"step": 1189
},
{
"epoch": 1.4655172413793103,
"grad_norm": 9.205967970627526,
"learning_rate": 6.073645529705926e-06,
"loss": 1.6606531143188477,
"step": 1190
},
{
"epoch": 1.4667487684729064,
"grad_norm": 12.43504089477338,
"learning_rate": 6.066645545408149e-06,
"loss": 1.6029870510101318,
"step": 1191
},
{
"epoch": 1.4679802955665024,
"grad_norm": 9.416406443647212,
"learning_rate": 6.0596433701289506e-06,
"loss": 1.5884819030761719,
"step": 1192
},
{
"epoch": 1.4692118226600985,
"grad_norm": 17.434043985101933,
"learning_rate": 6.052639018251394e-06,
"loss": 1.060668706893921,
"step": 1193
},
{
"epoch": 1.4704433497536946,
"grad_norm": 13.053843358479307,
"learning_rate": 6.045632504163024e-06,
"loss": 1.6251329183578491,
"step": 1194
},
{
"epoch": 1.4716748768472907,
"grad_norm": 10.200397873502725,
"learning_rate": 6.03862384225582e-06,
"loss": 1.2369989156723022,
"step": 1195
},
{
"epoch": 1.4729064039408866,
"grad_norm": 28.146477262288624,
"learning_rate": 6.0316130469261705e-06,
"loss": 1.7742527723312378,
"step": 1196
},
{
"epoch": 1.4741379310344827,
"grad_norm": 6.380213600146285,
"learning_rate": 6.024600132574855e-06,
"loss": 2.166492223739624,
"step": 1197
},
{
"epoch": 1.4753694581280787,
"grad_norm": 15.296147923549848,
"learning_rate": 6.017585113606999e-06,
"loss": 1.8031083345413208,
"step": 1198
},
{
"epoch": 1.4766009852216748,
"grad_norm": 7.580109898357858,
"learning_rate": 6.010568004432055e-06,
"loss": 1.9966365098953247,
"step": 1199
},
{
"epoch": 1.477832512315271,
"grad_norm": 13.138438168026589,
"learning_rate": 6.0035488194637645e-06,
"loss": 1.0125515460968018,
"step": 1200
},
{
"epoch": 1.479064039408867,
"grad_norm": 16.24938270382903,
"learning_rate": 5.9965275731201364e-06,
"loss": 1.1396842002868652,
"step": 1201
},
{
"epoch": 1.480295566502463,
"grad_norm": 6.579201955073294,
"learning_rate": 5.9895042798234125e-06,
"loss": 1.8030388355255127,
"step": 1202
},
{
"epoch": 1.4815270935960592,
"grad_norm": 12.865016417179568,
"learning_rate": 5.982478954000042e-06,
"loss": 1.4132026433944702,
"step": 1203
},
{
"epoch": 1.4827586206896552,
"grad_norm": 11.295614659779242,
"learning_rate": 5.975451610080643e-06,
"loss": 1.3726825714111328,
"step": 1204
},
{
"epoch": 1.4839901477832513,
"grad_norm": 10.812781562044428,
"learning_rate": 5.968422262499983e-06,
"loss": 2.3436193466186523,
"step": 1205
},
{
"epoch": 1.4852216748768474,
"grad_norm": 11.93980767439267,
"learning_rate": 5.961390925696947e-06,
"loss": 1.4617420434951782,
"step": 1206
},
{
"epoch": 1.4864532019704433,
"grad_norm": 8.752972802049372,
"learning_rate": 5.9543576141145035e-06,
"loss": 1.8050814867019653,
"step": 1207
},
{
"epoch": 1.4876847290640394,
"grad_norm": 11.595272230479853,
"learning_rate": 5.947322342199674e-06,
"loss": 1.3426543474197388,
"step": 1208
},
{
"epoch": 1.4889162561576355,
"grad_norm": 13.910327681643947,
"learning_rate": 5.940285124403517e-06,
"loss": 1.6211771965026855,
"step": 1209
},
{
"epoch": 1.4901477832512315,
"grad_norm": 10.490417163522949,
"learning_rate": 5.933245975181074e-06,
"loss": 2.695863723754883,
"step": 1210
},
{
"epoch": 1.4913793103448276,
"grad_norm": 9.128292414129945,
"learning_rate": 5.926204908991366e-06,
"loss": 1.2743788957595825,
"step": 1211
},
{
"epoch": 1.4926108374384237,
"grad_norm": 11.2632445422812,
"learning_rate": 5.919161940297346e-06,
"loss": 1.652765154838562,
"step": 1212
},
{
"epoch": 1.4938423645320196,
"grad_norm": 7.537950882850561,
"learning_rate": 5.912117083565874e-06,
"loss": 1.3720670938491821,
"step": 1213
},
{
"epoch": 1.4950738916256157,
"grad_norm": 14.216763115794095,
"learning_rate": 5.905070353267692e-06,
"loss": 1.222616195678711,
"step": 1214
},
{
"epoch": 1.4963054187192117,
"grad_norm": 7.742622309976788,
"learning_rate": 5.898021763877388e-06,
"loss": 1.4626069068908691,
"step": 1215
},
{
"epoch": 1.4975369458128078,
"grad_norm": 10.044815043339705,
"learning_rate": 5.890971329873366e-06,
"loss": 1.7813634872436523,
"step": 1216
},
{
"epoch": 1.498768472906404,
"grad_norm": 14.537107209189347,
"learning_rate": 5.883919065737827e-06,
"loss": 0.5114675760269165,
"step": 1217
},
{
"epoch": 1.5,
"grad_norm": 18.934697309871,
"learning_rate": 5.876864985956722e-06,
"loss": 1.6000962257385254,
"step": 1218
},
{
"epoch": 1.501231527093596,
"grad_norm": 33.040397060632486,
"learning_rate": 5.869809105019738e-06,
"loss": 1.5674512386322021,
"step": 1219
},
{
"epoch": 1.5024630541871922,
"grad_norm": 9.76563438047523,
"learning_rate": 5.8627514374202596e-06,
"loss": 1.7963311672210693,
"step": 1220
},
{
"epoch": 1.5036945812807883,
"grad_norm": 10.95067481959561,
"learning_rate": 5.85569199765534e-06,
"loss": 1.1649596691131592,
"step": 1221
},
{
"epoch": 1.5049261083743843,
"grad_norm": 9.927773449159055,
"learning_rate": 5.848630800225678e-06,
"loss": 1.140197992324829,
"step": 1222
},
{
"epoch": 1.5061576354679804,
"grad_norm": 8.586607717080767,
"learning_rate": 5.841567859635572e-06,
"loss": 1.865435242652893,
"step": 1223
},
{
"epoch": 1.5073891625615765,
"grad_norm": 11.43552738813054,
"learning_rate": 5.834503190392912e-06,
"loss": 1.457642912864685,
"step": 1224
},
{
"epoch": 1.5086206896551724,
"grad_norm": 9.978595721772624,
"learning_rate": 5.827436807009133e-06,
"loss": 1.3783336877822876,
"step": 1225
},
{
"epoch": 1.5098522167487685,
"grad_norm": 10.75044326200818,
"learning_rate": 5.8203687239991935e-06,
"loss": 1.939549207687378,
"step": 1226
},
{
"epoch": 1.5110837438423645,
"grad_norm": 14.588582695069839,
"learning_rate": 5.813298955881542e-06,
"loss": 1.3607597351074219,
"step": 1227
},
{
"epoch": 1.5123152709359606,
"grad_norm": 9.739548479278437,
"learning_rate": 5.806227517178089e-06,
"loss": 0.81966233253479,
"step": 1228
},
{
"epoch": 1.5135467980295565,
"grad_norm": 7.228017183846092,
"learning_rate": 5.799154422414174e-06,
"loss": 0.9481602311134338,
"step": 1229
},
{
"epoch": 1.5147783251231526,
"grad_norm": 16.162733557662186,
"learning_rate": 5.79207968611854e-06,
"loss": 1.3550889492034912,
"step": 1230
},
{
"epoch": 1.5160098522167487,
"grad_norm": 10.696500057601996,
"learning_rate": 5.785003322823307e-06,
"loss": 2.022425889968872,
"step": 1231
},
{
"epoch": 1.5172413793103448,
"grad_norm": 8.501680697642309,
"learning_rate": 5.777925347063927e-06,
"loss": 1.5649950504302979,
"step": 1232
},
{
"epoch": 1.5184729064039408,
"grad_norm": 12.185227926920462,
"learning_rate": 5.7708457733791715e-06,
"loss": 1.9720977544784546,
"step": 1233
},
{
"epoch": 1.519704433497537,
"grad_norm": 12.902985615374178,
"learning_rate": 5.763764616311089e-06,
"loss": 1.0029213428497314,
"step": 1234
},
{
"epoch": 1.520935960591133,
"grad_norm": 13.23751211435566,
"learning_rate": 5.756681890404987e-06,
"loss": 1.8926727771759033,
"step": 1235
},
{
"epoch": 1.522167487684729,
"grad_norm": 8.93687413398984,
"learning_rate": 5.749597610209392e-06,
"loss": 1.462761402130127,
"step": 1236
},
{
"epoch": 1.5233990147783252,
"grad_norm": 10.137890971821589,
"learning_rate": 5.7425117902760195e-06,
"loss": 2.1467416286468506,
"step": 1237
},
{
"epoch": 1.5246305418719213,
"grad_norm": 12.30865285718221,
"learning_rate": 5.7354244451597545e-06,
"loss": 1.191473364830017,
"step": 1238
},
{
"epoch": 1.5258620689655173,
"grad_norm": 11.884477014639941,
"learning_rate": 5.72833558941861e-06,
"loss": 0.896723210811615,
"step": 1239
},
{
"epoch": 1.5270935960591134,
"grad_norm": 12.439035862181441,
"learning_rate": 5.721245237613704e-06,
"loss": 0.8741526007652283,
"step": 1240
},
{
"epoch": 1.5283251231527095,
"grad_norm": 11.437489612490284,
"learning_rate": 5.714153404309228e-06,
"loss": 1.6330994367599487,
"step": 1241
},
{
"epoch": 1.5295566502463054,
"grad_norm": 8.493940846915361,
"learning_rate": 5.707060104072415e-06,
"loss": 2.2386982440948486,
"step": 1242
},
{
"epoch": 1.5307881773399015,
"grad_norm": 15.002139823216499,
"learning_rate": 5.6999653514735124e-06,
"loss": 1.5266145467758179,
"step": 1243
},
{
"epoch": 1.5320197044334976,
"grad_norm": 10.763593391596421,
"learning_rate": 5.6928691610857515e-06,
"loss": 1.4918262958526611,
"step": 1244
},
{
"epoch": 1.5332512315270936,
"grad_norm": 13.978563202935332,
"learning_rate": 5.685771547485312e-06,
"loss": 1.241945743560791,
"step": 1245
},
{
"epoch": 1.5344827586206895,
"grad_norm": 13.403953021065679,
"learning_rate": 5.678672525251304e-06,
"loss": 1.1569273471832275,
"step": 1246
},
{
"epoch": 1.5357142857142856,
"grad_norm": 11.182023407334606,
"learning_rate": 5.671572108965729e-06,
"loss": 1.946014404296875,
"step": 1247
},
{
"epoch": 1.5369458128078817,
"grad_norm": 11.304302205859694,
"learning_rate": 5.664470313213448e-06,
"loss": 1.8601741790771484,
"step": 1248
},
{
"epoch": 1.5381773399014778,
"grad_norm": 16.894321658591,
"learning_rate": 5.65736715258216e-06,
"loss": 1.7164549827575684,
"step": 1249
},
{
"epoch": 1.5394088669950738,
"grad_norm": 10.02548837159482,
"learning_rate": 5.650262641662367e-06,
"loss": 2.0459697246551514,
"step": 1250
},
{
"epoch": 1.54064039408867,
"grad_norm": 9.37570660013781,
"learning_rate": 5.643156795047343e-06,
"loss": 1.4485859870910645,
"step": 1251
},
{
"epoch": 1.541871921182266,
"grad_norm": 7.685396722064439,
"learning_rate": 5.6360496273331055e-06,
"loss": 1.8672525882720947,
"step": 1252
},
{
"epoch": 1.543103448275862,
"grad_norm": 10.04870984968868,
"learning_rate": 5.628941153118388e-06,
"loss": 1.4309324026107788,
"step": 1253
},
{
"epoch": 1.5443349753694582,
"grad_norm": 8.68197237847592,
"learning_rate": 5.621831387004603e-06,
"loss": 1.8784745931625366,
"step": 1254
},
{
"epoch": 1.5455665024630543,
"grad_norm": 13.277977807429252,
"learning_rate": 5.6147203435958246e-06,
"loss": 2.109992027282715,
"step": 1255
},
{
"epoch": 1.5467980295566504,
"grad_norm": 12.972460738003901,
"learning_rate": 5.607608037498742e-06,
"loss": 1.5892071723937988,
"step": 1256
},
{
"epoch": 1.5480295566502464,
"grad_norm": 13.365650986627243,
"learning_rate": 5.600494483322643e-06,
"loss": 1.3583379983901978,
"step": 1257
},
{
"epoch": 1.5492610837438425,
"grad_norm": 20.27099102357665,
"learning_rate": 5.593379695679378e-06,
"loss": 2.126896381378174,
"step": 1258
},
{
"epoch": 1.5504926108374384,
"grad_norm": 17.176572909103676,
"learning_rate": 5.586263689183332e-06,
"loss": 1.7454299926757812,
"step": 1259
},
{
"epoch": 1.5517241379310345,
"grad_norm": 13.916773869762237,
"learning_rate": 5.5791464784513905e-06,
"loss": 1.1533763408660889,
"step": 1260
},
{
"epoch": 1.5529556650246306,
"grad_norm": 7.929553367189426,
"learning_rate": 5.572028078102917e-06,
"loss": 1.4818049669265747,
"step": 1261
},
{
"epoch": 1.5541871921182266,
"grad_norm": 10.401505556673449,
"learning_rate": 5.564908502759714e-06,
"loss": 1.7103283405303955,
"step": 1262
},
{
"epoch": 1.5554187192118225,
"grad_norm": 9.47500952850124,
"learning_rate": 5.557787767046001e-06,
"loss": 2.1653401851654053,
"step": 1263
},
{
"epoch": 1.5566502463054186,
"grad_norm": 11.53902942298552,
"learning_rate": 5.55066588558838e-06,
"loss": 1.3127275705337524,
"step": 1264
},
{
"epoch": 1.5578817733990147,
"grad_norm": 16.55540616140196,
"learning_rate": 5.543542873015806e-06,
"loss": 1.0865871906280518,
"step": 1265
},
{
"epoch": 1.5591133004926108,
"grad_norm": 11.513704169835737,
"learning_rate": 5.536418743959559e-06,
"loss": 1.341281533241272,
"step": 1266
},
{
"epoch": 1.5603448275862069,
"grad_norm": 13.363897307451165,
"learning_rate": 5.529293513053207e-06,
"loss": 1.1612720489501953,
"step": 1267
},
{
"epoch": 1.561576354679803,
"grad_norm": 8.231595025537441,
"learning_rate": 5.522167194932588e-06,
"loss": 1.7491642236709595,
"step": 1268
},
{
"epoch": 1.562807881773399,
"grad_norm": 14.714195860173573,
"learning_rate": 5.515039804235772e-06,
"loss": 1.8244414329528809,
"step": 1269
},
{
"epoch": 1.564039408866995,
"grad_norm": 14.369418745397832,
"learning_rate": 5.50791135560303e-06,
"loss": 1.6449997425079346,
"step": 1270
},
{
"epoch": 1.5652709359605912,
"grad_norm": 10.791840038500066,
"learning_rate": 5.5007818636768055e-06,
"loss": 1.258559226989746,
"step": 1271
},
{
"epoch": 1.5665024630541873,
"grad_norm": 12.265469895779276,
"learning_rate": 5.493651343101686e-06,
"loss": 2.075775146484375,
"step": 1272
},
{
"epoch": 1.5677339901477834,
"grad_norm": 33.663491606092755,
"learning_rate": 5.486519808524374e-06,
"loss": 1.8196138143539429,
"step": 1273
},
{
"epoch": 1.5689655172413794,
"grad_norm": 10.504622195873791,
"learning_rate": 5.479387274593653e-06,
"loss": 1.129037618637085,
"step": 1274
},
{
"epoch": 1.5701970443349755,
"grad_norm": 10.887519946570082,
"learning_rate": 5.472253755960358e-06,
"loss": 1.7367748022079468,
"step": 1275
},
{
"epoch": 1.5714285714285714,
"grad_norm": 9.127598313619417,
"learning_rate": 5.4651192672773475e-06,
"loss": 1.9274532794952393,
"step": 1276
},
{
"epoch": 1.5726600985221675,
"grad_norm": 17.490821839529264,
"learning_rate": 5.457983823199475e-06,
"loss": 1.4018654823303223,
"step": 1277
},
{
"epoch": 1.5738916256157636,
"grad_norm": 17.899672160499332,
"learning_rate": 5.450847438383555e-06,
"loss": 1.383131504058838,
"step": 1278
},
{
"epoch": 1.5751231527093597,
"grad_norm": 6.595048027752494,
"learning_rate": 5.443710127488331e-06,
"loss": 1.277740716934204,
"step": 1279
},
{
"epoch": 1.5763546798029555,
"grad_norm": 9.304406142462632,
"learning_rate": 5.4365719051744556e-06,
"loss": 1.507627010345459,
"step": 1280
},
{
"epoch": 1.5775862068965516,
"grad_norm": 13.383687869982538,
"learning_rate": 5.429432786104446e-06,
"loss": 1.609743595123291,
"step": 1281
},
{
"epoch": 1.5788177339901477,
"grad_norm": 14.966009265010456,
"learning_rate": 5.422292784942666e-06,
"loss": 3.7705276012420654,
"step": 1282
},
{
"epoch": 1.5800492610837438,
"grad_norm": 8.997880163576188,
"learning_rate": 5.415151916355292e-06,
"loss": 1.5003160238265991,
"step": 1283
},
{
"epoch": 1.5812807881773399,
"grad_norm": 9.476478190888859,
"learning_rate": 5.408010195010278e-06,
"loss": 2.2466366291046143,
"step": 1284
},
{
"epoch": 1.582512315270936,
"grad_norm": 7.465134227448914,
"learning_rate": 5.400867635577335e-06,
"loss": 1.0722277164459229,
"step": 1285
},
{
"epoch": 1.583743842364532,
"grad_norm": 13.942249242079209,
"learning_rate": 5.3937242527278885e-06,
"loss": 1.3113644123077393,
"step": 1286
},
{
"epoch": 1.5849753694581281,
"grad_norm": 14.224147707467683,
"learning_rate": 5.3865800611350634e-06,
"loss": 1.4688694477081299,
"step": 1287
},
{
"epoch": 1.5862068965517242,
"grad_norm": 9.648975936769988,
"learning_rate": 5.379435075473641e-06,
"loss": 1.3646764755249023,
"step": 1288
},
{
"epoch": 1.5874384236453203,
"grad_norm": 8.753285038565833,
"learning_rate": 5.372289310420032e-06,
"loss": 1.6248177289962769,
"step": 1289
},
{
"epoch": 1.5886699507389164,
"grad_norm": 9.773114583134893,
"learning_rate": 5.365142780652255e-06,
"loss": 1.5507471561431885,
"step": 1290
},
{
"epoch": 1.5899014778325125,
"grad_norm": 8.752822975110762,
"learning_rate": 5.35799550084989e-06,
"loss": 1.2866086959838867,
"step": 1291
},
{
"epoch": 1.5911330049261085,
"grad_norm": 10.021050170312028,
"learning_rate": 5.350847485694067e-06,
"loss": 2.336108684539795,
"step": 1292
},
{
"epoch": 1.5923645320197044,
"grad_norm": 11.648640054355637,
"learning_rate": 5.343698749867421e-06,
"loss": 1.6604368686676025,
"step": 1293
},
{
"epoch": 1.5935960591133005,
"grad_norm": 16.28378480699955,
"learning_rate": 5.336549308054066e-06,
"loss": 1.2169203758239746,
"step": 1294
},
{
"epoch": 1.5948275862068966,
"grad_norm": 14.069009000417143,
"learning_rate": 5.329399174939572e-06,
"loss": 1.546027421951294,
"step": 1295
},
{
"epoch": 1.5960591133004927,
"grad_norm": 9.646944240372145,
"learning_rate": 5.3222483652109235e-06,
"loss": 1.1372979879379272,
"step": 1296
},
{
"epoch": 1.5972906403940885,
"grad_norm": 10.548510904543294,
"learning_rate": 5.315096893556497e-06,
"loss": 1.3435921669006348,
"step": 1297
},
{
"epoch": 1.5985221674876846,
"grad_norm": 14.79008878560828,
"learning_rate": 5.307944774666029e-06,
"loss": 1.522647500038147,
"step": 1298
},
{
"epoch": 1.5997536945812807,
"grad_norm": 17.912683434114346,
"learning_rate": 5.300792023230587e-06,
"loss": 2.0829434394836426,
"step": 1299
},
{
"epoch": 1.6009852216748768,
"grad_norm": 8.420566897576393,
"learning_rate": 5.2936386539425325e-06,
"loss": 1.761828064918518,
"step": 1300
},
{
"epoch": 1.6022167487684729,
"grad_norm": 14.83308627903251,
"learning_rate": 5.2864846814955e-06,
"loss": 2.4108588695526123,
"step": 1301
},
{
"epoch": 1.603448275862069,
"grad_norm": 7.959651684795871,
"learning_rate": 5.279330120584365e-06,
"loss": 1.626701831817627,
"step": 1302
},
{
"epoch": 1.604679802955665,
"grad_norm": 15.705970904875606,
"learning_rate": 5.272174985905207e-06,
"loss": 1.2424887418746948,
"step": 1303
},
{
"epoch": 1.6059113300492611,
"grad_norm": 12.239359710615943,
"learning_rate": 5.2650192921552845e-06,
"loss": 2.149031639099121,
"step": 1304
},
{
"epoch": 1.6071428571428572,
"grad_norm": 10.231856507403213,
"learning_rate": 5.257863054033012e-06,
"loss": 2.6947379112243652,
"step": 1305
},
{
"epoch": 1.6083743842364533,
"grad_norm": 18.838018326977505,
"learning_rate": 5.25070628623791e-06,
"loss": 1.665069818496704,
"step": 1306
},
{
"epoch": 1.6096059113300494,
"grad_norm": 14.325294673284358,
"learning_rate": 5.243549003470599e-06,
"loss": 1.3887734413146973,
"step": 1307
},
{
"epoch": 1.6108374384236455,
"grad_norm": 11.840772011671689,
"learning_rate": 5.236391220432745e-06,
"loss": 1.340559720993042,
"step": 1308
},
{
"epoch": 1.6120689655172413,
"grad_norm": 10.400173398296557,
"learning_rate": 5.229232951827054e-06,
"loss": 1.1291146278381348,
"step": 1309
},
{
"epoch": 1.6133004926108374,
"grad_norm": 11.008129364503455,
"learning_rate": 5.222074212357221e-06,
"loss": 1.8375647068023682,
"step": 1310
},
{
"epoch": 1.6145320197044335,
"grad_norm": 26.174008264121436,
"learning_rate": 5.2149150167279106e-06,
"loss": 1.3299870491027832,
"step": 1311
},
{
"epoch": 1.6157635467980296,
"grad_norm": 9.874671943961642,
"learning_rate": 5.2077553796447254e-06,
"loss": 1.1574440002441406,
"step": 1312
},
{
"epoch": 1.6169950738916257,
"grad_norm": 9.304756709434216,
"learning_rate": 5.200595315814174e-06,
"loss": 1.8118785619735718,
"step": 1313
},
{
"epoch": 1.6182266009852215,
"grad_norm": 10.54430610217864,
"learning_rate": 5.19343483994364e-06,
"loss": 1.333923101425171,
"step": 1314
},
{
"epoch": 1.6194581280788176,
"grad_norm": 8.365290613104223,
"learning_rate": 5.18627396674136e-06,
"loss": 1.2107478380203247,
"step": 1315
},
{
"epoch": 1.6206896551724137,
"grad_norm": 11.934365489822259,
"learning_rate": 5.1791127109163734e-06,
"loss": 1.662817120552063,
"step": 1316
},
{
"epoch": 1.6219211822660098,
"grad_norm": 11.66068657995672,
"learning_rate": 5.17195108717852e-06,
"loss": 1.7790195941925049,
"step": 1317
},
{
"epoch": 1.6231527093596059,
"grad_norm": 15.883414066148024,
"learning_rate": 5.164789110238387e-06,
"loss": 1.5893058776855469,
"step": 1318
},
{
"epoch": 1.624384236453202,
"grad_norm": 9.631844787083402,
"learning_rate": 5.15762679480729e-06,
"loss": 1.256395936012268,
"step": 1319
},
{
"epoch": 1.625615763546798,
"grad_norm": 18.80096398191795,
"learning_rate": 5.150464155597239e-06,
"loss": 1.3061628341674805,
"step": 1320
},
{
"epoch": 1.6268472906403941,
"grad_norm": 8.93680164244121,
"learning_rate": 5.143301207320909e-06,
"loss": 1.4399319887161255,
"step": 1321
},
{
"epoch": 1.6280788177339902,
"grad_norm": 13.559338660465917,
"learning_rate": 5.136137964691609e-06,
"loss": 1.2071207761764526,
"step": 1322
},
{
"epoch": 1.6293103448275863,
"grad_norm": 15.329093630080337,
"learning_rate": 5.128974442423254e-06,
"loss": 2.2784008979797363,
"step": 1323
},
{
"epoch": 1.6305418719211824,
"grad_norm": 10.677223802578135,
"learning_rate": 5.121810655230336e-06,
"loss": 1.3703962564468384,
"step": 1324
},
{
"epoch": 1.6317733990147785,
"grad_norm": 7.672085033643185,
"learning_rate": 5.114646617827884e-06,
"loss": 0.6955282688140869,
"step": 1325
},
{
"epoch": 1.6330049261083743,
"grad_norm": 9.372418453872616,
"learning_rate": 5.107482344931448e-06,
"loss": 1.5774227380752563,
"step": 1326
},
{
"epoch": 1.6342364532019704,
"grad_norm": 7.569882170382433,
"learning_rate": 5.100317851257057e-06,
"loss": 1.6811349391937256,
"step": 1327
},
{
"epoch": 1.6354679802955665,
"grad_norm": 13.234466243138659,
"learning_rate": 5.093153151521196e-06,
"loss": 1.563596487045288,
"step": 1328
},
{
"epoch": 1.6366995073891626,
"grad_norm": 13.317086470459271,
"learning_rate": 5.085988260440776e-06,
"loss": 1.44309401512146,
"step": 1329
},
{
"epoch": 1.6379310344827587,
"grad_norm": 12.614583983426193,
"learning_rate": 5.0788231927330924e-06,
"loss": 1.5392205715179443,
"step": 1330
},
{
"epoch": 1.6391625615763545,
"grad_norm": 19.688183928504156,
"learning_rate": 5.0716579631158124e-06,
"loss": 0.9557719826698303,
"step": 1331
},
{
"epoch": 1.6403940886699506,
"grad_norm": 12.748000945416605,
"learning_rate": 5.064492586306931e-06,
"loss": 1.1032493114471436,
"step": 1332
},
{
"epoch": 1.6416256157635467,
"grad_norm": 14.590229259835747,
"learning_rate": 5.057327077024745e-06,
"loss": 1.4907091856002808,
"step": 1333
},
{
"epoch": 1.6428571428571428,
"grad_norm": 13.569513298786392,
"learning_rate": 5.050161449987828e-06,
"loss": 1.4919164180755615,
"step": 1334
},
{
"epoch": 1.6440886699507389,
"grad_norm": 17.53788627610522,
"learning_rate": 5.0429957199149905e-06,
"loss": 2.177396297454834,
"step": 1335
},
{
"epoch": 1.645320197044335,
"grad_norm": 9.011039030303097,
"learning_rate": 5.035829901525258e-06,
"loss": 1.2386332750320435,
"step": 1336
},
{
"epoch": 1.646551724137931,
"grad_norm": 7.326320563707851,
"learning_rate": 5.028664009537835e-06,
"loss": 1.2984986305236816,
"step": 1337
},
{
"epoch": 1.6477832512315271,
"grad_norm": 8.373461994458872,
"learning_rate": 5.021498058672076e-06,
"loss": 1.1399617195129395,
"step": 1338
},
{
"epoch": 1.6490147783251232,
"grad_norm": 7.295316739226097,
"learning_rate": 5.014332063647462e-06,
"loss": 1.9816789627075195,
"step": 1339
},
{
"epoch": 1.6502463054187193,
"grad_norm": 7.86464342129843,
"learning_rate": 5.007166039183561e-06,
"loss": 1.4210541248321533,
"step": 1340
},
{
"epoch": 1.6514778325123154,
"grad_norm": 12.713637168049194,
"learning_rate": 5e-06,
"loss": 1.5061390399932861,
"step": 1341
},
{
"epoch": 1.6527093596059115,
"grad_norm": 8.899156333262312,
"learning_rate": 4.99283396081644e-06,
"loss": 1.4701118469238281,
"step": 1342
},
{
"epoch": 1.6539408866995073,
"grad_norm": 10.54571567541005,
"learning_rate": 4.985667936352538e-06,
"loss": 1.4879779815673828,
"step": 1343
},
{
"epoch": 1.6551724137931034,
"grad_norm": 10.432279538827562,
"learning_rate": 4.978501941327926e-06,
"loss": 1.51373291015625,
"step": 1344
},
{
"epoch": 1.6564039408866995,
"grad_norm": 7.981064947021898,
"learning_rate": 4.971335990462168e-06,
"loss": 1.5439019203186035,
"step": 1345
},
{
"epoch": 1.6576354679802956,
"grad_norm": 14.863181962691362,
"learning_rate": 4.964170098474744e-06,
"loss": 1.7145721912384033,
"step": 1346
},
{
"epoch": 1.6588669950738915,
"grad_norm": 7.816226303611453,
"learning_rate": 4.95700428008501e-06,
"loss": 1.6367833614349365,
"step": 1347
},
{
"epoch": 1.6600985221674875,
"grad_norm": 12.087333147554537,
"learning_rate": 4.949838550012172e-06,
"loss": 1.4300103187561035,
"step": 1348
},
{
"epoch": 1.6613300492610836,
"grad_norm": 6.881924405292677,
"learning_rate": 4.942672922975255e-06,
"loss": 2.0569915771484375,
"step": 1349
},
{
"epoch": 1.6625615763546797,
"grad_norm": 15.296469591183284,
"learning_rate": 4.935507413693071e-06,
"loss": 1.1028980016708374,
"step": 1350
},
{
"epoch": 1.6637931034482758,
"grad_norm": 9.201861102909985,
"learning_rate": 4.928342036884189e-06,
"loss": 1.6323003768920898,
"step": 1351
},
{
"epoch": 1.6650246305418719,
"grad_norm": 10.996157407203105,
"learning_rate": 4.921176807266909e-06,
"loss": 1.5050472021102905,
"step": 1352
},
{
"epoch": 1.666256157635468,
"grad_norm": 17.127722044101333,
"learning_rate": 4.914011739559225e-06,
"loss": 1.3893849849700928,
"step": 1353
},
{
"epoch": 1.667487684729064,
"grad_norm": 13.548169676262727,
"learning_rate": 4.906846848478803e-06,
"loss": 1.1478514671325684,
"step": 1354
},
{
"epoch": 1.6687192118226601,
"grad_norm": 16.337726396970115,
"learning_rate": 4.899682148742944e-06,
"loss": 1.2397665977478027,
"step": 1355
},
{
"epoch": 1.6699507389162562,
"grad_norm": 8.122019629920894,
"learning_rate": 4.892517655068555e-06,
"loss": 1.1658974885940552,
"step": 1356
},
{
"epoch": 1.6711822660098523,
"grad_norm": 10.105771734426996,
"learning_rate": 4.8853533821721175e-06,
"loss": 1.7130283117294312,
"step": 1357
},
{
"epoch": 1.6724137931034484,
"grad_norm": 10.758386009234124,
"learning_rate": 4.878189344769666e-06,
"loss": 0.9516315460205078,
"step": 1358
},
{
"epoch": 1.6736453201970445,
"grad_norm": 11.103808898671073,
"learning_rate": 4.871025557576747e-06,
"loss": 1.143174171447754,
"step": 1359
},
{
"epoch": 1.6748768472906403,
"grad_norm": 11.525961008953772,
"learning_rate": 4.863862035308392e-06,
"loss": 1.7117831707000732,
"step": 1360
},
{
"epoch": 1.6761083743842364,
"grad_norm": 17.64687941795743,
"learning_rate": 4.8566987926790946e-06,
"loss": 2.507868528366089,
"step": 1361
},
{
"epoch": 1.6773399014778325,
"grad_norm": 9.376137745201675,
"learning_rate": 4.849535844402762e-06,
"loss": 1.476400375366211,
"step": 1362
},
{
"epoch": 1.6785714285714286,
"grad_norm": 8.721089378493017,
"learning_rate": 4.8423732051927115e-06,
"loss": 1.3162943124771118,
"step": 1363
},
{
"epoch": 1.6798029556650245,
"grad_norm": 10.422911150427735,
"learning_rate": 4.835210889761614e-06,
"loss": 2.2291440963745117,
"step": 1364
},
{
"epoch": 1.6810344827586206,
"grad_norm": 9.602624562609396,
"learning_rate": 4.82804891282148e-06,
"loss": 1.2231886386871338,
"step": 1365
},
{
"epoch": 1.6822660098522166,
"grad_norm": 14.076238439157445,
"learning_rate": 4.820887289083629e-06,
"loss": 1.3799304962158203,
"step": 1366
},
{
"epoch": 1.6834975369458127,
"grad_norm": 15.54796648321669,
"learning_rate": 4.813726033258643e-06,
"loss": 1.856811761856079,
"step": 1367
},
{
"epoch": 1.6847290640394088,
"grad_norm": 9.64062645814171,
"learning_rate": 4.80656516005636e-06,
"loss": 1.5948967933654785,
"step": 1368
},
{
"epoch": 1.685960591133005,
"grad_norm": 13.962004352631022,
"learning_rate": 4.799404684185828e-06,
"loss": 1.5035887956619263,
"step": 1369
},
{
"epoch": 1.687192118226601,
"grad_norm": 11.27741103317867,
"learning_rate": 4.792244620355275e-06,
"loss": 1.4715675115585327,
"step": 1370
},
{
"epoch": 1.688423645320197,
"grad_norm": 15.373869655729267,
"learning_rate": 4.78508498327209e-06,
"loss": 1.393894076347351,
"step": 1371
},
{
"epoch": 1.6896551724137931,
"grad_norm": 12.537169523242483,
"learning_rate": 4.777925787642781e-06,
"loss": 1.8458061218261719,
"step": 1372
},
{
"epoch": 1.6908866995073892,
"grad_norm": 12.62635000347042,
"learning_rate": 4.770767048172948e-06,
"loss": 1.0604429244995117,
"step": 1373
},
{
"epoch": 1.6921182266009853,
"grad_norm": 10.74648464318841,
"learning_rate": 4.7636087795672565e-06,
"loss": 1.3261964321136475,
"step": 1374
},
{
"epoch": 1.6933497536945814,
"grad_norm": 9.576848082824501,
"learning_rate": 4.756450996529403e-06,
"loss": 1.6243900060653687,
"step": 1375
},
{
"epoch": 1.6945812807881775,
"grad_norm": 13.575969601291865,
"learning_rate": 4.749293713762091e-06,
"loss": 1.8087639808654785,
"step": 1376
},
{
"epoch": 1.6958128078817734,
"grad_norm": 8.48685992922433,
"learning_rate": 4.742136945966991e-06,
"loss": 1.9180892705917358,
"step": 1377
},
{
"epoch": 1.6970443349753694,
"grad_norm": 12.706829097920151,
"learning_rate": 4.734980707844716e-06,
"loss": 1.6797364950180054,
"step": 1378
},
{
"epoch": 1.6982758620689655,
"grad_norm": 10.281614379219002,
"learning_rate": 4.727825014094795e-06,
"loss": 0.9649052023887634,
"step": 1379
},
{
"epoch": 1.6995073891625616,
"grad_norm": 7.785652444986331,
"learning_rate": 4.720669879415637e-06,
"loss": 1.4185916185379028,
"step": 1380
},
{
"epoch": 1.7007389162561575,
"grad_norm": 10.73836489858494,
"learning_rate": 4.713515318504501e-06,
"loss": 1.8681238889694214,
"step": 1381
},
{
"epoch": 1.7019704433497536,
"grad_norm": 9.950804244952993,
"learning_rate": 4.706361346057468e-06,
"loss": 1.2830915451049805,
"step": 1382
},
{
"epoch": 1.7032019704433496,
"grad_norm": 18.988866497939586,
"learning_rate": 4.699207976769416e-06,
"loss": 1.0888878107070923,
"step": 1383
},
{
"epoch": 1.7044334975369457,
"grad_norm": 12.689992799691533,
"learning_rate": 4.692055225333972e-06,
"loss": 1.4439440965652466,
"step": 1384
},
{
"epoch": 1.7056650246305418,
"grad_norm": 7.183191439849756,
"learning_rate": 4.684903106443504e-06,
"loss": 1.0282858610153198,
"step": 1385
},
{
"epoch": 1.706896551724138,
"grad_norm": 13.261845343202891,
"learning_rate": 4.677751634789078e-06,
"loss": 1.6842533349990845,
"step": 1386
},
{
"epoch": 1.708128078817734,
"grad_norm": 14.612290761713947,
"learning_rate": 4.670600825060429e-06,
"loss": 1.5473763942718506,
"step": 1387
},
{
"epoch": 1.70935960591133,
"grad_norm": 19.73106165634469,
"learning_rate": 4.663450691945936e-06,
"loss": 1.839112401008606,
"step": 1388
},
{
"epoch": 1.7105911330049262,
"grad_norm": 10.917539579247505,
"learning_rate": 4.656301250132581e-06,
"loss": 1.5349544286727905,
"step": 1389
},
{
"epoch": 1.7118226600985222,
"grad_norm": 11.132766984186494,
"learning_rate": 4.649152514305934e-06,
"loss": 1.5788905620574951,
"step": 1390
},
{
"epoch": 1.7130541871921183,
"grad_norm": 10.21681078103426,
"learning_rate": 4.6420044991501104e-06,
"loss": 1.4541325569152832,
"step": 1391
},
{
"epoch": 1.7142857142857144,
"grad_norm": 9.227689699191664,
"learning_rate": 4.634857219347746e-06,
"loss": 1.8231902122497559,
"step": 1392
},
{
"epoch": 1.7155172413793105,
"grad_norm": 10.500866364265818,
"learning_rate": 4.627710689579968e-06,
"loss": 1.6302368640899658,
"step": 1393
},
{
"epoch": 1.7167487684729064,
"grad_norm": 17.60594188273056,
"learning_rate": 4.62056492452636e-06,
"loss": 1.497374415397644,
"step": 1394
},
{
"epoch": 1.7179802955665024,
"grad_norm": 15.287585545597818,
"learning_rate": 4.613419938864937e-06,
"loss": 1.1390448808670044,
"step": 1395
},
{
"epoch": 1.7192118226600985,
"grad_norm": 10.328419466218456,
"learning_rate": 4.606275747272112e-06,
"loss": 1.4320652484893799,
"step": 1396
},
{
"epoch": 1.7204433497536946,
"grad_norm": 9.176084187845012,
"learning_rate": 4.599132364422666e-06,
"loss": 1.2651784420013428,
"step": 1397
},
{
"epoch": 1.7216748768472905,
"grad_norm": 15.836729193949362,
"learning_rate": 4.5919898049897225e-06,
"loss": 1.719766616821289,
"step": 1398
},
{
"epoch": 1.7229064039408866,
"grad_norm": 12.937422715545681,
"learning_rate": 4.58484808364471e-06,
"loss": 1.707594394683838,
"step": 1399
},
{
"epoch": 1.7241379310344827,
"grad_norm": 14.730027238842638,
"learning_rate": 4.5777072150573355e-06,
"loss": 1.4608323574066162,
"step": 1400
},
{
"epoch": 1.7253694581280787,
"grad_norm": 9.894706364799527,
"learning_rate": 4.570567213895555e-06,
"loss": 1.5542428493499756,
"step": 1401
},
{
"epoch": 1.7266009852216748,
"grad_norm": 10.251938635324704,
"learning_rate": 4.563428094825546e-06,
"loss": 1.2282288074493408,
"step": 1402
},
{
"epoch": 1.727832512315271,
"grad_norm": 12.91095594163412,
"learning_rate": 4.556289872511669e-06,
"loss": 1.1870850324630737,
"step": 1403
},
{
"epoch": 1.729064039408867,
"grad_norm": 19.656749282746095,
"learning_rate": 4.549152561616445e-06,
"loss": 1.8125461339950562,
"step": 1404
},
{
"epoch": 1.730295566502463,
"grad_norm": 13.055834351152246,
"learning_rate": 4.542016176800527e-06,
"loss": 1.4419995546340942,
"step": 1405
},
{
"epoch": 1.7315270935960592,
"grad_norm": 12.427293973832745,
"learning_rate": 4.534880732722653e-06,
"loss": 1.8834543228149414,
"step": 1406
},
{
"epoch": 1.7327586206896552,
"grad_norm": 9.308568400780414,
"learning_rate": 4.527746244039644e-06,
"loss": 1.120203971862793,
"step": 1407
},
{
"epoch": 1.7339901477832513,
"grad_norm": 10.965136861668267,
"learning_rate": 4.5206127254063495e-06,
"loss": 0.9131630659103394,
"step": 1408
},
{
"epoch": 1.7352216748768474,
"grad_norm": 18.40693337146411,
"learning_rate": 4.513480191475627e-06,
"loss": 1.86919367313385,
"step": 1409
},
{
"epoch": 1.7364532019704435,
"grad_norm": 16.72423206220796,
"learning_rate": 4.506348656898316e-06,
"loss": 1.6573272943496704,
"step": 1410
},
{
"epoch": 1.7376847290640394,
"grad_norm": 12.29145112798753,
"learning_rate": 4.499218136323197e-06,
"loss": 1.2864340543746948,
"step": 1411
},
{
"epoch": 1.7389162561576355,
"grad_norm": 9.205794418080544,
"learning_rate": 4.492088644396972e-06,
"loss": 1.5519993305206299,
"step": 1412
},
{
"epoch": 1.7401477832512315,
"grad_norm": 10.304423144578244,
"learning_rate": 4.4849601957642295e-06,
"loss": 1.7556722164154053,
"step": 1413
},
{
"epoch": 1.7413793103448276,
"grad_norm": 12.170127229505125,
"learning_rate": 4.477832805067412e-06,
"loss": 1.6349589824676514,
"step": 1414
},
{
"epoch": 1.7426108374384235,
"grad_norm": 18.04544459439354,
"learning_rate": 4.470706486946797e-06,
"loss": 1.3583035469055176,
"step": 1415
},
{
"epoch": 1.7438423645320196,
"grad_norm": 16.035788014412844,
"learning_rate": 4.463581256040445e-06,
"loss": 1.5367932319641113,
"step": 1416
},
{
"epoch": 1.7450738916256157,
"grad_norm": 10.971734568897116,
"learning_rate": 4.456457126984196e-06,
"loss": 1.5078128576278687,
"step": 1417
},
{
"epoch": 1.7463054187192117,
"grad_norm": 8.435567334501869,
"learning_rate": 4.449334114411622e-06,
"loss": 1.8653573989868164,
"step": 1418
},
{
"epoch": 1.7475369458128078,
"grad_norm": 11.511023238806931,
"learning_rate": 4.4422122329539996e-06,
"loss": 1.1381313800811768,
"step": 1419
},
{
"epoch": 1.748768472906404,
"grad_norm": 9.115530827164923,
"learning_rate": 4.435091497240287e-06,
"loss": 1.4135184288024902,
"step": 1420
},
{
"epoch": 1.75,
"grad_norm": 19.148242044300115,
"learning_rate": 4.427971921897086e-06,
"loss": 1.2186479568481445,
"step": 1421
},
{
"epoch": 1.751231527093596,
"grad_norm": 11.735225834432583,
"learning_rate": 4.420853521548611e-06,
"loss": 1.3139259815216064,
"step": 1422
},
{
"epoch": 1.7524630541871922,
"grad_norm": 9.908228964820347,
"learning_rate": 4.413736310816669e-06,
"loss": 2.0143887996673584,
"step": 1423
},
{
"epoch": 1.7536945812807883,
"grad_norm": 11.72709904223931,
"learning_rate": 4.4066203043206226e-06,
"loss": 1.5800344944000244,
"step": 1424
},
{
"epoch": 1.7549261083743843,
"grad_norm": 13.351525970289408,
"learning_rate": 4.399505516677358e-06,
"loss": 1.449183702468872,
"step": 1425
},
{
"epoch": 1.7561576354679804,
"grad_norm": 14.449460918267059,
"learning_rate": 4.3923919625012605e-06,
"loss": 0.6957097053527832,
"step": 1426
},
{
"epoch": 1.7573891625615765,
"grad_norm": 16.656517142384814,
"learning_rate": 4.385279656404178e-06,
"loss": 1.0665647983551025,
"step": 1427
},
{
"epoch": 1.7586206896551724,
"grad_norm": 8.728452405950277,
"learning_rate": 4.3781686129953975e-06,
"loss": 1.2771016359329224,
"step": 1428
},
{
"epoch": 1.7598522167487685,
"grad_norm": 9.380843658329356,
"learning_rate": 4.371058846881614e-06,
"loss": 1.4222235679626465,
"step": 1429
},
{
"epoch": 1.7610837438423645,
"grad_norm": 18.6167744042239,
"learning_rate": 4.363950372666896e-06,
"loss": 2.1237497329711914,
"step": 1430
},
{
"epoch": 1.7623152709359606,
"grad_norm": 15.81534835320748,
"learning_rate": 4.356843204952657e-06,
"loss": 1.3875718116760254,
"step": 1431
},
{
"epoch": 1.7635467980295565,
"grad_norm": 11.325736932128727,
"learning_rate": 4.349737358337635e-06,
"loss": 1.2585203647613525,
"step": 1432
},
{
"epoch": 1.7647783251231526,
"grad_norm": 10.890833810787267,
"learning_rate": 4.3426328474178405e-06,
"loss": 1.3183746337890625,
"step": 1433
},
{
"epoch": 1.7660098522167487,
"grad_norm": 11.455742000334912,
"learning_rate": 4.335529686786554e-06,
"loss": 1.7174941301345825,
"step": 1434
},
{
"epoch": 1.7672413793103448,
"grad_norm": 9.946830568051285,
"learning_rate": 4.328427891034273e-06,
"loss": 1.9503614902496338,
"step": 1435
},
{
"epoch": 1.7684729064039408,
"grad_norm": 13.787149559571247,
"learning_rate": 4.321327474748697e-06,
"loss": 1.3797223567962646,
"step": 1436
},
{
"epoch": 1.769704433497537,
"grad_norm": 14.935693009519694,
"learning_rate": 4.3142284525146915e-06,
"loss": 1.4113730192184448,
"step": 1437
},
{
"epoch": 1.770935960591133,
"grad_norm": 11.978351079391912,
"learning_rate": 4.307130838914252e-06,
"loss": 2.383976697921753,
"step": 1438
},
{
"epoch": 1.772167487684729,
"grad_norm": 10.033247535379967,
"learning_rate": 4.300034648526489e-06,
"loss": 1.7687448263168335,
"step": 1439
},
{
"epoch": 1.7733990147783252,
"grad_norm": 15.25338664216219,
"learning_rate": 4.292939895927587e-06,
"loss": 1.5130079984664917,
"step": 1440
},
{
"epoch": 1.7746305418719213,
"grad_norm": 16.671641040457516,
"learning_rate": 4.2858465956907726e-06,
"loss": 1.0863475799560547,
"step": 1441
},
{
"epoch": 1.7758620689655173,
"grad_norm": 21.777249707868723,
"learning_rate": 4.278754762386297e-06,
"loss": 1.1504137516021729,
"step": 1442
},
{
"epoch": 1.7770935960591134,
"grad_norm": 10.960123964926488,
"learning_rate": 4.271664410581392e-06,
"loss": 1.1227596998214722,
"step": 1443
},
{
"epoch": 1.7783251231527095,
"grad_norm": 10.668478758892386,
"learning_rate": 4.264575554840248e-06,
"loss": 1.4501817226409912,
"step": 1444
},
{
"epoch": 1.7795566502463054,
"grad_norm": 8.508770946365994,
"learning_rate": 4.257488209723981e-06,
"loss": 0.48442721366882324,
"step": 1445
},
{
"epoch": 1.7807881773399015,
"grad_norm": 19.774025943442037,
"learning_rate": 4.25040238979061e-06,
"loss": 1.218263864517212,
"step": 1446
},
{
"epoch": 1.7820197044334976,
"grad_norm": 11.107941835251008,
"learning_rate": 4.243318109595014e-06,
"loss": 1.1711516380310059,
"step": 1447
},
{
"epoch": 1.7832512315270936,
"grad_norm": 14.393581709964357,
"learning_rate": 4.2362353836889126e-06,
"loss": 1.3575153350830078,
"step": 1448
},
{
"epoch": 1.7844827586206895,
"grad_norm": 15.514668018354685,
"learning_rate": 4.229154226620832e-06,
"loss": 2.6967573165893555,
"step": 1449
},
{
"epoch": 1.7857142857142856,
"grad_norm": 16.398555290477788,
"learning_rate": 4.2220746529360745e-06,
"loss": 2.2812700271606445,
"step": 1450
},
{
"epoch": 1.7869458128078817,
"grad_norm": 7.44372678737394,
"learning_rate": 4.2149966771766945e-06,
"loss": 1.2746225595474243,
"step": 1451
},
{
"epoch": 1.7881773399014778,
"grad_norm": 24.76309740203676,
"learning_rate": 4.207920313881459e-06,
"loss": 1.4866999387741089,
"step": 1452
},
{
"epoch": 1.7894088669950738,
"grad_norm": 12.129429402231283,
"learning_rate": 4.200845577585827e-06,
"loss": 1.4830021858215332,
"step": 1453
},
{
"epoch": 1.79064039408867,
"grad_norm": 14.927464924948287,
"learning_rate": 4.193772482821914e-06,
"loss": 2.5529747009277344,
"step": 1454
},
{
"epoch": 1.791871921182266,
"grad_norm": 10.342903175989482,
"learning_rate": 4.186701044118459e-06,
"loss": 1.413874626159668,
"step": 1455
},
{
"epoch": 1.793103448275862,
"grad_norm": 25.730295260232445,
"learning_rate": 4.179631276000807e-06,
"loss": 2.1567163467407227,
"step": 1456
},
{
"epoch": 1.7943349753694582,
"grad_norm": 30.70195031797357,
"learning_rate": 4.1725631929908684e-06,
"loss": 1.851858139038086,
"step": 1457
},
{
"epoch": 1.7955665024630543,
"grad_norm": 15.74317099171368,
"learning_rate": 4.165496809607089e-06,
"loss": 1.2765101194381714,
"step": 1458
},
{
"epoch": 1.7967980295566504,
"grad_norm": 10.995413854030392,
"learning_rate": 4.158432140364431e-06,
"loss": 1.9869401454925537,
"step": 1459
},
{
"epoch": 1.7980295566502464,
"grad_norm": 14.263851286153963,
"learning_rate": 4.151369199774325e-06,
"loss": 1.5319430828094482,
"step": 1460
},
{
"epoch": 1.7992610837438425,
"grad_norm": 10.506976676212952,
"learning_rate": 4.1443080023446605e-06,
"loss": 1.487468957901001,
"step": 1461
},
{
"epoch": 1.8004926108374384,
"grad_norm": 23.04137362584248,
"learning_rate": 4.137248562579742e-06,
"loss": 1.6152423620224,
"step": 1462
},
{
"epoch": 1.8017241379310345,
"grad_norm": 8.431434363474125,
"learning_rate": 4.130190894980262e-06,
"loss": 1.5262070894241333,
"step": 1463
},
{
"epoch": 1.8029556650246306,
"grad_norm": 9.129193697661835,
"learning_rate": 4.123135014043279e-06,
"loss": 1.6697289943695068,
"step": 1464
},
{
"epoch": 1.8041871921182266,
"grad_norm": 14.310350877734502,
"learning_rate": 4.116080934262175e-06,
"loss": 1.470789909362793,
"step": 1465
},
{
"epoch": 1.8054187192118225,
"grad_norm": 10.462627135626132,
"learning_rate": 4.109028670126635e-06,
"loss": 1.62421715259552,
"step": 1466
},
{
"epoch": 1.8066502463054186,
"grad_norm": 9.463272161807932,
"learning_rate": 4.101978236122613e-06,
"loss": 2.1249561309814453,
"step": 1467
},
{
"epoch": 1.8078817733990147,
"grad_norm": 10.291280772031216,
"learning_rate": 4.094929646732309e-06,
"loss": 1.3368217945098877,
"step": 1468
},
{
"epoch": 1.8091133004926108,
"grad_norm": 13.897028873169491,
"learning_rate": 4.087882916434126e-06,
"loss": 0.8684915900230408,
"step": 1469
},
{
"epoch": 1.8103448275862069,
"grad_norm": 9.114980502172534,
"learning_rate": 4.080838059702656e-06,
"loss": 1.6997764110565186,
"step": 1470
},
{
"epoch": 1.811576354679803,
"grad_norm": 15.00723435129453,
"learning_rate": 4.0737950910086354e-06,
"loss": 0.8933043479919434,
"step": 1471
},
{
"epoch": 1.812807881773399,
"grad_norm": 8.849165431721978,
"learning_rate": 4.0667540248189265e-06,
"loss": 1.689558982849121,
"step": 1472
},
{
"epoch": 1.814039408866995,
"grad_norm": 8.28022241305891,
"learning_rate": 4.059714875596486e-06,
"loss": 1.797630786895752,
"step": 1473
},
{
"epoch": 1.8152709359605912,
"grad_norm": 8.44088037241126,
"learning_rate": 4.052677657800327e-06,
"loss": 2.023120164871216,
"step": 1474
},
{
"epoch": 1.8165024630541873,
"grad_norm": 13.31766346957086,
"learning_rate": 4.045642385885497e-06,
"loss": 1.5412349700927734,
"step": 1475
},
{
"epoch": 1.8177339901477834,
"grad_norm": 11.713991741569846,
"learning_rate": 4.038609074303055e-06,
"loss": 0.786411464214325,
"step": 1476
},
{
"epoch": 1.8189655172413794,
"grad_norm": 12.300017528117012,
"learning_rate": 4.0315777375000185e-06,
"loss": 1.3470659255981445,
"step": 1477
},
{
"epoch": 1.8201970443349755,
"grad_norm": 10.149728213380525,
"learning_rate": 4.02454838991936e-06,
"loss": 1.3983774185180664,
"step": 1478
},
{
"epoch": 1.8214285714285714,
"grad_norm": 8.907879387840488,
"learning_rate": 4.017521045999961e-06,
"loss": 1.9945271015167236,
"step": 1479
},
{
"epoch": 1.8226600985221675,
"grad_norm": 14.485464092551117,
"learning_rate": 4.0104957201765874e-06,
"loss": 1.6103991270065308,
"step": 1480
},
{
"epoch": 1.8238916256157636,
"grad_norm": 10.17521459795804,
"learning_rate": 4.003472426879866e-06,
"loss": 1.2794644832611084,
"step": 1481
},
{
"epoch": 1.8251231527093597,
"grad_norm": 12.76602401465421,
"learning_rate": 3.996451180536237e-06,
"loss": 1.4485671520233154,
"step": 1482
},
{
"epoch": 1.8263546798029555,
"grad_norm": 10.794290467835673,
"learning_rate": 3.989431995567947e-06,
"loss": 1.1264885663986206,
"step": 1483
},
{
"epoch": 1.8275862068965516,
"grad_norm": 9.866085409894106,
"learning_rate": 3.982414886393002e-06,
"loss": 1.7849301099777222,
"step": 1484
},
{
"epoch": 1.8288177339901477,
"grad_norm": 12.201702589426084,
"learning_rate": 3.975399867425146e-06,
"loss": 2.4955849647521973,
"step": 1485
},
{
"epoch": 1.8300492610837438,
"grad_norm": 9.102568432625791,
"learning_rate": 3.96838695307383e-06,
"loss": 1.3440265655517578,
"step": 1486
},
{
"epoch": 1.8312807881773399,
"grad_norm": 8.145548979456889,
"learning_rate": 3.961376157744183e-06,
"loss": 1.7565090656280518,
"step": 1487
},
{
"epoch": 1.832512315270936,
"grad_norm": 10.525904376218351,
"learning_rate": 3.954367495836978e-06,
"loss": 2.086646318435669,
"step": 1488
},
{
"epoch": 1.833743842364532,
"grad_norm": 11.110223461103494,
"learning_rate": 3.947360981748607e-06,
"loss": 2.0356874465942383,
"step": 1489
},
{
"epoch": 1.8349753694581281,
"grad_norm": 18.648426152647907,
"learning_rate": 3.940356629871051e-06,
"loss": 1.3129501342773438,
"step": 1490
},
{
"epoch": 1.8362068965517242,
"grad_norm": 9.730568476467749,
"learning_rate": 3.933354454591851e-06,
"loss": 1.468184471130371,
"step": 1491
},
{
"epoch": 1.8374384236453203,
"grad_norm": 11.185413004826554,
"learning_rate": 3.926354470294077e-06,
"loss": 1.4110320806503296,
"step": 1492
},
{
"epoch": 1.8386699507389164,
"grad_norm": 12.98897769174535,
"learning_rate": 3.9193566913562915e-06,
"loss": 1.0595703125,
"step": 1493
},
{
"epoch": 1.8399014778325125,
"grad_norm": 10.530840377449582,
"learning_rate": 3.912361132152537e-06,
"loss": 1.628462791442871,
"step": 1494
},
{
"epoch": 1.8411330049261085,
"grad_norm": 14.948049661995398,
"learning_rate": 3.9053678070522904e-06,
"loss": 1.3903121948242188,
"step": 1495
},
{
"epoch": 1.8423645320197044,
"grad_norm": 9.309801488918017,
"learning_rate": 3.898376730420442e-06,
"loss": 1.6935603618621826,
"step": 1496
},
{
"epoch": 1.8435960591133005,
"grad_norm": 12.543386647265335,
"learning_rate": 3.891387916617261e-06,
"loss": 1.2785383462905884,
"step": 1497
},
{
"epoch": 1.8448275862068966,
"grad_norm": 16.302631057977127,
"learning_rate": 3.884401379998375e-06,
"loss": 0.9488393068313599,
"step": 1498
},
{
"epoch": 1.8460591133004927,
"grad_norm": 13.324215983939714,
"learning_rate": 3.877417134914724e-06,
"loss": 1.7822269201278687,
"step": 1499
},
{
"epoch": 1.8472906403940885,
"grad_norm": 18.86267601616338,
"learning_rate": 3.870435195712547e-06,
"loss": 2.0112462043762207,
"step": 1500
},
{
"epoch": 1.8485221674876846,
"grad_norm": 9.69652966834403,
"learning_rate": 3.863455576733349e-06,
"loss": 1.3558632135391235,
"step": 1501
},
{
"epoch": 1.8497536945812807,
"grad_norm": 11.295411751598015,
"learning_rate": 3.856478292313864e-06,
"loss": 1.34049391746521,
"step": 1502
},
{
"epoch": 1.8509852216748768,
"grad_norm": 14.146066291430358,
"learning_rate": 3.849503356786034e-06,
"loss": 1.5048649311065674,
"step": 1503
},
{
"epoch": 1.8522167487684729,
"grad_norm": 15.401780869737596,
"learning_rate": 3.842530784476971e-06,
"loss": 1.595820426940918,
"step": 1504
},
{
"epoch": 1.853448275862069,
"grad_norm": 14.910425010360937,
"learning_rate": 3.83556058970894e-06,
"loss": 1.4003782272338867,
"step": 1505
},
{
"epoch": 1.854679802955665,
"grad_norm": 7.9611824961674476,
"learning_rate": 3.828592786799318e-06,
"loss": 1.6082279682159424,
"step": 1506
},
{
"epoch": 1.8559113300492611,
"grad_norm": 10.255592390028927,
"learning_rate": 3.821627390060568e-06,
"loss": 1.7311087846755981,
"step": 1507
},
{
"epoch": 1.8571428571428572,
"grad_norm": 12.058780526558753,
"learning_rate": 3.8146644138002154e-06,
"loss": 1.2369680404663086,
"step": 1508
},
{
"epoch": 1.8583743842364533,
"grad_norm": 19.050247314658538,
"learning_rate": 3.807703872320809e-06,
"loss": 0.8267203569412231,
"step": 1509
},
{
"epoch": 1.8596059113300494,
"grad_norm": 10.351521057178017,
"learning_rate": 3.8007457799198977e-06,
"loss": 1.310041904449463,
"step": 1510
},
{
"epoch": 1.8608374384236455,
"grad_norm": 10.657442856658305,
"learning_rate": 3.79379015089e-06,
"loss": 1.483811378479004,
"step": 1511
},
{
"epoch": 1.8620689655172413,
"grad_norm": 11.888669790205059,
"learning_rate": 3.7868369995185734e-06,
"loss": 1.7339284420013428,
"step": 1512
},
{
"epoch": 1.8633004926108374,
"grad_norm": 10.593168183344854,
"learning_rate": 3.7798863400879894e-06,
"loss": 0.8915985822677612,
"step": 1513
},
{
"epoch": 1.8645320197044335,
"grad_norm": 10.734489115549072,
"learning_rate": 3.7729381868754985e-06,
"loss": 2.3413619995117188,
"step": 1514
},
{
"epoch": 1.8657635467980296,
"grad_norm": 9.967376867351366,
"learning_rate": 3.7659925541532006e-06,
"loss": 1.422214388847351,
"step": 1515
},
{
"epoch": 1.8669950738916257,
"grad_norm": 9.453365529159266,
"learning_rate": 3.759049456188022e-06,
"loss": 1.435701847076416,
"step": 1516
},
{
"epoch": 1.8682266009852215,
"grad_norm": 13.939960554468646,
"learning_rate": 3.752108907241682e-06,
"loss": 1.0702649354934692,
"step": 1517
},
{
"epoch": 1.8694581280788176,
"grad_norm": 14.375834204057075,
"learning_rate": 3.7451709215706643e-06,
"loss": 1.3625175952911377,
"step": 1518
},
{
"epoch": 1.8706896551724137,
"grad_norm": 14.38912976471083,
"learning_rate": 3.738235513426184e-06,
"loss": 0.6707335710525513,
"step": 1519
},
{
"epoch": 1.8719211822660098,
"grad_norm": 6.68307140655082,
"learning_rate": 3.7313026970541687e-06,
"loss": 0.9573410749435425,
"step": 1520
},
{
"epoch": 1.8731527093596059,
"grad_norm": 8.282620378739653,
"learning_rate": 3.7243724866952114e-06,
"loss": 1.625769853591919,
"step": 1521
},
{
"epoch": 1.874384236453202,
"grad_norm": 12.4684771792282,
"learning_rate": 3.717444896584562e-06,
"loss": 1.2327096462249756,
"step": 1522
},
{
"epoch": 1.875615763546798,
"grad_norm": 13.733071586817578,
"learning_rate": 3.710519940952085e-06,
"loss": 1.9436770677566528,
"step": 1523
},
{
"epoch": 1.8768472906403941,
"grad_norm": 11.428790282383929,
"learning_rate": 3.703597634022232e-06,
"loss": 1.260964274406433,
"step": 1524
},
{
"epoch": 1.8780788177339902,
"grad_norm": 10.74418094547702,
"learning_rate": 3.6966779900140193e-06,
"loss": 0.9448941946029663,
"step": 1525
},
{
"epoch": 1.8793103448275863,
"grad_norm": 14.784266967626037,
"learning_rate": 3.689761023140981e-06,
"loss": 1.0470240116119385,
"step": 1526
},
{
"epoch": 1.8805418719211824,
"grad_norm": 12.626289871406675,
"learning_rate": 3.6828467476111664e-06,
"loss": 1.290519118309021,
"step": 1527
},
{
"epoch": 1.8817733990147785,
"grad_norm": 8.368189133022403,
"learning_rate": 3.675935177627088e-06,
"loss": 1.6617997884750366,
"step": 1528
},
{
"epoch": 1.8830049261083743,
"grad_norm": 22.331563820583295,
"learning_rate": 3.6690263273857035e-06,
"loss": 2.624133825302124,
"step": 1529
},
{
"epoch": 1.8842364532019704,
"grad_norm": 11.125845605261798,
"learning_rate": 3.662120211078385e-06,
"loss": 1.189339518547058,
"step": 1530
},
{
"epoch": 1.8854679802955665,
"grad_norm": 11.063623504952298,
"learning_rate": 3.6552168428908886e-06,
"loss": 1.2045223712921143,
"step": 1531
},
{
"epoch": 1.8866995073891626,
"grad_norm": 21.05973901513674,
"learning_rate": 3.648316237003321e-06,
"loss": 1.4260770082473755,
"step": 1532
},
{
"epoch": 1.8879310344827587,
"grad_norm": 9.70528654459795,
"learning_rate": 3.6414184075901206e-06,
"loss": 1.1973135471343994,
"step": 1533
},
{
"epoch": 1.8891625615763545,
"grad_norm": 18.383885319550775,
"learning_rate": 3.6345233688200195e-06,
"loss": 1.4474105834960938,
"step": 1534
},
{
"epoch": 1.8903940886699506,
"grad_norm": 9.565993696711384,
"learning_rate": 3.62763113485602e-06,
"loss": 1.5732392072677612,
"step": 1535
},
{
"epoch": 1.8916256157635467,
"grad_norm": 18.830417927799424,
"learning_rate": 3.6207417198553624e-06,
"loss": 1.992612361907959,
"step": 1536
},
{
"epoch": 1.8928571428571428,
"grad_norm": 8.528733872408509,
"learning_rate": 3.6138551379694936e-06,
"loss": 1.8015589714050293,
"step": 1537
},
{
"epoch": 1.8940886699507389,
"grad_norm": 20.045548838222032,
"learning_rate": 3.606971403344044e-06,
"loss": 1.1887943744659424,
"step": 1538
},
{
"epoch": 1.895320197044335,
"grad_norm": 8.574686397942823,
"learning_rate": 3.6000905301187953e-06,
"loss": 1.035568118095398,
"step": 1539
},
{
"epoch": 1.896551724137931,
"grad_norm": 8.862677959647126,
"learning_rate": 3.5932125324276524e-06,
"loss": 1.8441094160079956,
"step": 1540
},
{
"epoch": 1.8977832512315271,
"grad_norm": 21.317551937175974,
"learning_rate": 3.586337424398609e-06,
"loss": 2.7305843830108643,
"step": 1541
},
{
"epoch": 1.8990147783251232,
"grad_norm": 12.092619936908829,
"learning_rate": 3.579465220153733e-06,
"loss": 2.1233139038085938,
"step": 1542
},
{
"epoch": 1.9002463054187193,
"grad_norm": 11.705206958955536,
"learning_rate": 3.5725959338091133e-06,
"loss": 1.232177495956421,
"step": 1543
},
{
"epoch": 1.9014778325123154,
"grad_norm": 7.174113743881224,
"learning_rate": 3.565729579474858e-06,
"loss": 1.89857017993927,
"step": 1544
},
{
"epoch": 1.9027093596059115,
"grad_norm": 15.788866110425763,
"learning_rate": 3.5588661712550464e-06,
"loss": 1.1281499862670898,
"step": 1545
},
{
"epoch": 1.9039408866995073,
"grad_norm": 10.470956040036935,
"learning_rate": 3.5520057232477073e-06,
"loss": 1.2526335716247559,
"step": 1546
},
{
"epoch": 1.9051724137931034,
"grad_norm": 9.301464059536526,
"learning_rate": 3.545148249544793e-06,
"loss": 1.8187229633331299,
"step": 1547
},
{
"epoch": 1.9064039408866995,
"grad_norm": 9.75451095353705,
"learning_rate": 3.5382937642321356e-06,
"loss": 2.5140726566314697,
"step": 1548
},
{
"epoch": 1.9076354679802956,
"grad_norm": 12.829934813861579,
"learning_rate": 3.5314422813894413e-06,
"loss": 1.4403750896453857,
"step": 1549
},
{
"epoch": 1.9088669950738915,
"grad_norm": 16.531679337353626,
"learning_rate": 3.524593815090241e-06,
"loss": 2.1372480392456055,
"step": 1550
},
{
"epoch": 1.9100985221674875,
"grad_norm": 15.674375359336546,
"learning_rate": 3.517748379401872e-06,
"loss": 1.3283928632736206,
"step": 1551
},
{
"epoch": 1.9113300492610836,
"grad_norm": 18.1169052598084,
"learning_rate": 3.510905988385449e-06,
"loss": 0.915777325630188,
"step": 1552
},
{
"epoch": 1.9125615763546797,
"grad_norm": 9.21207861248202,
"learning_rate": 3.5040666560958246e-06,
"loss": 1.4235864877700806,
"step": 1553
},
{
"epoch": 1.9137931034482758,
"grad_norm": 10.331880853016509,
"learning_rate": 3.497230396581579e-06,
"loss": 1.0727063417434692,
"step": 1554
},
{
"epoch": 1.9150246305418719,
"grad_norm": 6.2183233261424675,
"learning_rate": 3.4903972238849727e-06,
"loss": 1.2492493391036987,
"step": 1555
},
{
"epoch": 1.916256157635468,
"grad_norm": 8.689347093090742,
"learning_rate": 3.483567152041928e-06,
"loss": 1.855743408203125,
"step": 1556
},
{
"epoch": 1.917487684729064,
"grad_norm": 13.400775432098582,
"learning_rate": 3.4767401950820003e-06,
"loss": 1.2882115840911865,
"step": 1557
},
{
"epoch": 1.9187192118226601,
"grad_norm": 17.24953530796186,
"learning_rate": 3.469916367028345e-06,
"loss": 1.0586508512496948,
"step": 1558
},
{
"epoch": 1.9199507389162562,
"grad_norm": 7.936641918837841,
"learning_rate": 3.4630956818976875e-06,
"loss": 1.6678158044815063,
"step": 1559
},
{
"epoch": 1.9211822660098523,
"grad_norm": 7.533268622313887,
"learning_rate": 3.4562781537003e-06,
"loss": 1.242276906967163,
"step": 1560
},
{
"epoch": 1.9224137931034484,
"grad_norm": 11.64160436044446,
"learning_rate": 3.4494637964399723e-06,
"loss": 1.1909584999084473,
"step": 1561
},
{
"epoch": 1.9236453201970445,
"grad_norm": 10.255728334199201,
"learning_rate": 3.4426526241139778e-06,
"loss": 1.7636524438858032,
"step": 1562
},
{
"epoch": 1.9248768472906403,
"grad_norm": 9.49054957516609,
"learning_rate": 3.4358446507130503e-06,
"loss": 1.709825873374939,
"step": 1563
},
{
"epoch": 1.9261083743842364,
"grad_norm": 10.818350574028944,
"learning_rate": 3.4290398902213473e-06,
"loss": 1.0826925039291382,
"step": 1564
},
{
"epoch": 1.9273399014778325,
"grad_norm": 8.939498431984473,
"learning_rate": 3.4222383566164314e-06,
"loss": 1.2868252992630005,
"step": 1565
},
{
"epoch": 1.9285714285714286,
"grad_norm": 8.295112275795647,
"learning_rate": 3.4154400638692376e-06,
"loss": 1.9238274097442627,
"step": 1566
},
{
"epoch": 1.9298029556650245,
"grad_norm": 15.317456416232107,
"learning_rate": 3.408645025944042e-06,
"loss": 1.615818977355957,
"step": 1567
},
{
"epoch": 1.9310344827586206,
"grad_norm": 10.763654992556582,
"learning_rate": 3.4018532567984326e-06,
"loss": 1.124712586402893,
"step": 1568
},
{
"epoch": 1.9322660098522166,
"grad_norm": 12.365184508586257,
"learning_rate": 3.3950647703832907e-06,
"loss": 1.0411077737808228,
"step": 1569
},
{
"epoch": 1.9334975369458127,
"grad_norm": 12.632249203055522,
"learning_rate": 3.3882795806427437e-06,
"loss": 1.4247188568115234,
"step": 1570
},
{
"epoch": 1.9347290640394088,
"grad_norm": 9.103913844192295,
"learning_rate": 3.3814977015141576e-06,
"loss": 1.9558757543563843,
"step": 1571
},
{
"epoch": 1.935960591133005,
"grad_norm": 13.783502778663575,
"learning_rate": 3.3747191469280917e-06,
"loss": 1.4765770435333252,
"step": 1572
},
{
"epoch": 1.937192118226601,
"grad_norm": 12.11586545643866,
"learning_rate": 3.3679439308082777e-06,
"loss": 1.2025914192199707,
"step": 1573
},
{
"epoch": 1.938423645320197,
"grad_norm": 8.389746847537833,
"learning_rate": 3.361172067071595e-06,
"loss": 1.938293695449829,
"step": 1574
},
{
"epoch": 1.9396551724137931,
"grad_norm": 24.18653835255333,
"learning_rate": 3.3544035696280264e-06,
"loss": 1.9626538753509521,
"step": 1575
},
{
"epoch": 1.9408866995073892,
"grad_norm": 16.707227251461827,
"learning_rate": 3.34763845238065e-06,
"loss": 2.4771430492401123,
"step": 1576
},
{
"epoch": 1.9421182266009853,
"grad_norm": 9.24643762447737,
"learning_rate": 3.340876729225595e-06,
"loss": 1.5694981813430786,
"step": 1577
},
{
"epoch": 1.9433497536945814,
"grad_norm": 12.976086056891674,
"learning_rate": 3.334118414052021e-06,
"loss": 1.3358147144317627,
"step": 1578
},
{
"epoch": 1.9445812807881775,
"grad_norm": 10.05009781073385,
"learning_rate": 3.327363520742087e-06,
"loss": 1.6929140090942383,
"step": 1579
},
{
"epoch": 1.9458128078817734,
"grad_norm": 14.460477433027636,
"learning_rate": 3.320612063170926e-06,
"loss": 1.1454588174819946,
"step": 1580
},
{
"epoch": 1.9470443349753694,
"grad_norm": 15.890241219417488,
"learning_rate": 3.313864055206607e-06,
"loss": 1.3037209510803223,
"step": 1581
},
{
"epoch": 1.9482758620689655,
"grad_norm": 18.657112628058126,
"learning_rate": 3.3071195107101163e-06,
"loss": 1.2016770839691162,
"step": 1582
},
{
"epoch": 1.9495073891625616,
"grad_norm": 8.600208828774889,
"learning_rate": 3.3003784435353304e-06,
"loss": 1.5525718927383423,
"step": 1583
},
{
"epoch": 1.9507389162561575,
"grad_norm": 12.025296512404239,
"learning_rate": 3.293640867528978e-06,
"loss": 1.293796420097351,
"step": 1584
},
{
"epoch": 1.9519704433497536,
"grad_norm": 14.973626912716192,
"learning_rate": 3.2869067965306178e-06,
"loss": 1.544161081314087,
"step": 1585
},
{
"epoch": 1.9532019704433496,
"grad_norm": 12.518775732631475,
"learning_rate": 3.2801762443726087e-06,
"loss": 1.584174633026123,
"step": 1586
},
{
"epoch": 1.9544334975369457,
"grad_norm": 9.595940744200961,
"learning_rate": 3.273449224880081e-06,
"loss": 1.4985432624816895,
"step": 1587
},
{
"epoch": 1.9556650246305418,
"grad_norm": 14.194278219604545,
"learning_rate": 3.2667257518709124e-06,
"loss": 1.4310071468353271,
"step": 1588
},
{
"epoch": 1.956896551724138,
"grad_norm": 6.232251277924355,
"learning_rate": 3.260005839155691e-06,
"loss": 1.2174272537231445,
"step": 1589
},
{
"epoch": 1.958128078817734,
"grad_norm": 8.206207570805137,
"learning_rate": 3.2532895005376943e-06,
"loss": 1.4618067741394043,
"step": 1590
},
{
"epoch": 1.95935960591133,
"grad_norm": 9.028580710101858,
"learning_rate": 3.2465767498128596e-06,
"loss": 1.2786412239074707,
"step": 1591
},
{
"epoch": 1.9605911330049262,
"grad_norm": 14.53956960212149,
"learning_rate": 3.2398676007697495e-06,
"loss": 1.152226209640503,
"step": 1592
},
{
"epoch": 1.9618226600985222,
"grad_norm": 9.573027989064228,
"learning_rate": 3.233162067189533e-06,
"loss": 1.8345131874084473,
"step": 1593
},
{
"epoch": 1.9630541871921183,
"grad_norm": 12.386896406400556,
"learning_rate": 3.2264601628459513e-06,
"loss": 1.310433030128479,
"step": 1594
},
{
"epoch": 1.9642857142857144,
"grad_norm": 18.010952199354442,
"learning_rate": 3.2197619015052893e-06,
"loss": 2.3967676162719727,
"step": 1595
},
{
"epoch": 1.9655172413793105,
"grad_norm": 8.956387198130372,
"learning_rate": 3.2130672969263543e-06,
"loss": 1.7937273979187012,
"step": 1596
},
{
"epoch": 1.9667487684729064,
"grad_norm": 8.393117465017726,
"learning_rate": 3.206376362860432e-06,
"loss": 2.0265514850616455,
"step": 1597
},
{
"epoch": 1.9679802955665024,
"grad_norm": 21.13089299468655,
"learning_rate": 3.1996891130512796e-06,
"loss": 1.9514051675796509,
"step": 1598
},
{
"epoch": 1.9692118226600985,
"grad_norm": 13.738115707885685,
"learning_rate": 3.1930055612350795e-06,
"loss": 1.4068338871002197,
"step": 1599
},
{
"epoch": 1.9704433497536946,
"grad_norm": 11.875525005970715,
"learning_rate": 3.18632572114042e-06,
"loss": 1.9438577890396118,
"step": 1600
},
{
"epoch": 1.9716748768472905,
"grad_norm": 12.6800038807384,
"learning_rate": 3.1796496064882677e-06,
"loss": 1.432902455329895,
"step": 1601
},
{
"epoch": 1.9729064039408866,
"grad_norm": 10.748520734517344,
"learning_rate": 3.172977230991935e-06,
"loss": 1.6505646705627441,
"step": 1602
},
{
"epoch": 1.9741379310344827,
"grad_norm": 9.807738223531803,
"learning_rate": 3.1663086083570493e-06,
"loss": 2.332062005996704,
"step": 1603
},
{
"epoch": 1.9753694581280787,
"grad_norm": 7.777919459923873,
"learning_rate": 3.159643752281536e-06,
"loss": 1.737352967262268,
"step": 1604
},
{
"epoch": 1.9766009852216748,
"grad_norm": 12.828820681008972,
"learning_rate": 3.152982676455581e-06,
"loss": 1.5183820724487305,
"step": 1605
},
{
"epoch": 1.977832512315271,
"grad_norm": 12.058545370748947,
"learning_rate": 3.1463253945616056e-06,
"loss": 1.5560420751571655,
"step": 1606
},
{
"epoch": 1.979064039408867,
"grad_norm": 12.080370196486308,
"learning_rate": 3.1396719202742375e-06,
"loss": 2.2159786224365234,
"step": 1607
},
{
"epoch": 1.980295566502463,
"grad_norm": 11.349700550180101,
"learning_rate": 3.133022267260283e-06,
"loss": 3.4431471824645996,
"step": 1608
},
{
"epoch": 1.9815270935960592,
"grad_norm": 15.960971258656029,
"learning_rate": 3.1263764491786984e-06,
"loss": 1.0674099922180176,
"step": 1609
},
{
"epoch": 1.9827586206896552,
"grad_norm": 10.915353003367029,
"learning_rate": 3.1197344796805675e-06,
"loss": 1.2427492141723633,
"step": 1610
},
{
"epoch": 1.9839901477832513,
"grad_norm": 13.554860694250717,
"learning_rate": 3.1130963724090626e-06,
"loss": 1.5895799398422241,
"step": 1611
},
{
"epoch": 1.9852216748768474,
"grad_norm": 8.558375384118374,
"learning_rate": 3.1064621409994245e-06,
"loss": 1.3781355619430542,
"step": 1612
},
{
"epoch": 1.9864532019704435,
"grad_norm": 17.36928034840775,
"learning_rate": 3.0998317990789378e-06,
"loss": 1.3307732343673706,
"step": 1613
},
{
"epoch": 1.9876847290640394,
"grad_norm": 13.9784605520041,
"learning_rate": 3.0932053602668876e-06,
"loss": 1.340241551399231,
"step": 1614
},
{
"epoch": 1.9889162561576355,
"grad_norm": 9.756766918680166,
"learning_rate": 3.0865828381745515e-06,
"loss": 1.5866634845733643,
"step": 1615
},
{
"epoch": 1.9901477832512315,
"grad_norm": 14.514845100981475,
"learning_rate": 3.0799642464051573e-06,
"loss": 1.363608717918396,
"step": 1616
},
{
"epoch": 1.9913793103448276,
"grad_norm": 13.803723137880525,
"learning_rate": 3.0733495985538575e-06,
"loss": 0.8918144106864929,
"step": 1617
},
{
"epoch": 1.9926108374384235,
"grad_norm": 18.044340986569775,
"learning_rate": 3.0667389082077114e-06,
"loss": 1.4538538455963135,
"step": 1618
},
{
"epoch": 1.9938423645320196,
"grad_norm": 11.435301654271841,
"learning_rate": 3.0601321889456378e-06,
"loss": 1.6913137435913086,
"step": 1619
},
{
"epoch": 1.9950738916256157,
"grad_norm": 9.858778951797417,
"learning_rate": 3.0535294543384074e-06,
"loss": 1.4266109466552734,
"step": 1620
},
{
"epoch": 1.9963054187192117,
"grad_norm": 22.051543439765215,
"learning_rate": 3.046930717948604e-06,
"loss": 1.2479441165924072,
"step": 1621
},
{
"epoch": 1.9975369458128078,
"grad_norm": 9.286359312990374,
"learning_rate": 3.0403359933305965e-06,
"loss": 2.138500213623047,
"step": 1622
},
{
"epoch": 1.998768472906404,
"grad_norm": 7.759425069440999,
"learning_rate": 3.033745294030517e-06,
"loss": 1.7762420177459717,
"step": 1623
},
{
"epoch": 2.0,
"grad_norm": 16.72677410836059,
"learning_rate": 3.0271586335862258e-06,
"loss": 0.858219563961029,
"step": 1624
},
{
"epoch": 2.001231527093596,
"grad_norm": 14.643925249137768,
"learning_rate": 3.0205760255272874e-06,
"loss": 0.5493918657302856,
"step": 1625
},
{
"epoch": 2.002463054187192,
"grad_norm": 6.249448248328766,
"learning_rate": 3.013997483374944e-06,
"loss": 0.25155016779899597,
"step": 1626
},
{
"epoch": 2.0036945812807883,
"grad_norm": 12.443278487913815,
"learning_rate": 3.007423020642084e-06,
"loss": 0.7727752923965454,
"step": 1627
},
{
"epoch": 2.0049261083743843,
"grad_norm": 8.331944645794822,
"learning_rate": 3.0008526508332216e-06,
"loss": 0.43595510721206665,
"step": 1628
},
{
"epoch": 2.0061576354679804,
"grad_norm": 12.199248861649188,
"learning_rate": 2.9942863874444565e-06,
"loss": 0.3856297433376312,
"step": 1629
},
{
"epoch": 2.0073891625615765,
"grad_norm": 10.194964984786639,
"learning_rate": 2.987724243963458e-06,
"loss": 0.8458558917045593,
"step": 1630
},
{
"epoch": 2.0086206896551726,
"grad_norm": 10.400619109316716,
"learning_rate": 2.981166233869429e-06,
"loss": 0.46873772144317627,
"step": 1631
},
{
"epoch": 2.0098522167487687,
"grad_norm": 7.542731982064387,
"learning_rate": 2.9746123706330886e-06,
"loss": 0.42779290676116943,
"step": 1632
},
{
"epoch": 2.0110837438423643,
"grad_norm": 9.375159014521008,
"learning_rate": 2.9680626677166324e-06,
"loss": 0.627717912197113,
"step": 1633
},
{
"epoch": 2.0123152709359604,
"grad_norm": 7.3118642493157155,
"learning_rate": 2.9615171385737107e-06,
"loss": 1.0879265069961548,
"step": 1634
},
{
"epoch": 2.0135467980295565,
"grad_norm": 10.467281128404773,
"learning_rate": 2.9549757966494053e-06,
"loss": 0.6282559037208557,
"step": 1635
},
{
"epoch": 2.0147783251231526,
"grad_norm": 11.126192184454366,
"learning_rate": 2.9484386553801875e-06,
"loss": 0.5774171352386475,
"step": 1636
},
{
"epoch": 2.0160098522167487,
"grad_norm": 10.360450434232337,
"learning_rate": 2.9419057281939106e-06,
"loss": 0.38788995146751404,
"step": 1637
},
{
"epoch": 2.0172413793103448,
"grad_norm": 13.340772113855921,
"learning_rate": 2.935377028509766e-06,
"loss": 1.1726861000061035,
"step": 1638
},
{
"epoch": 2.018472906403941,
"grad_norm": 9.74656398362734,
"learning_rate": 2.9288525697382623e-06,
"loss": 0.7854858636856079,
"step": 1639
},
{
"epoch": 2.019704433497537,
"grad_norm": 11.086797967435993,
"learning_rate": 2.922332365281201e-06,
"loss": 0.25507253408432007,
"step": 1640
},
{
"epoch": 2.020935960591133,
"grad_norm": 13.738902835067712,
"learning_rate": 2.9158164285316356e-06,
"loss": 0.5835862755775452,
"step": 1641
},
{
"epoch": 2.022167487684729,
"grad_norm": 12.908512466729006,
"learning_rate": 2.9093047728738604e-06,
"loss": 0.49123138189315796,
"step": 1642
},
{
"epoch": 2.023399014778325,
"grad_norm": 6.708189349635942,
"learning_rate": 2.9027974116833756e-06,
"loss": 0.20273317396640778,
"step": 1643
},
{
"epoch": 2.0246305418719213,
"grad_norm": 12.517783768989945,
"learning_rate": 2.896294358326862e-06,
"loss": 0.46980565786361694,
"step": 1644
},
{
"epoch": 2.0258620689655173,
"grad_norm": 12.98671748044912,
"learning_rate": 2.889795626162143e-06,
"loss": 0.23243547976016998,
"step": 1645
},
{
"epoch": 2.0270935960591134,
"grad_norm": 21.52509717224934,
"learning_rate": 2.883301228538178e-06,
"loss": 1.3259830474853516,
"step": 1646
},
{
"epoch": 2.0283251231527095,
"grad_norm": 10.539113199927511,
"learning_rate": 2.8768111787950105e-06,
"loss": 0.3021068274974823,
"step": 1647
},
{
"epoch": 2.0295566502463056,
"grad_norm": 9.17401806944997,
"learning_rate": 2.8703254902637646e-06,
"loss": 0.3854427933692932,
"step": 1648
},
{
"epoch": 2.0307881773399017,
"grad_norm": 14.201306893364228,
"learning_rate": 2.8638441762665957e-06,
"loss": 0.3356427848339081,
"step": 1649
},
{
"epoch": 2.0320197044334973,
"grad_norm": 17.83956908779597,
"learning_rate": 2.857367250116682e-06,
"loss": 0.4785861372947693,
"step": 1650
},
{
"epoch": 2.0332512315270934,
"grad_norm": 7.19305688493566,
"learning_rate": 2.8508947251181885e-06,
"loss": 0.1944020539522171,
"step": 1651
},
{
"epoch": 2.0344827586206895,
"grad_norm": 10.046970652926046,
"learning_rate": 2.8444266145662284e-06,
"loss": 0.29677248001098633,
"step": 1652
},
{
"epoch": 2.0357142857142856,
"grad_norm": 24.647186410998657,
"learning_rate": 2.8379629317468604e-06,
"loss": 1.517862319946289,
"step": 1653
},
{
"epoch": 2.0369458128078817,
"grad_norm": 13.23680169167266,
"learning_rate": 2.8315036899370442e-06,
"loss": 0.5191118717193604,
"step": 1654
},
{
"epoch": 2.0381773399014778,
"grad_norm": 13.059908687808356,
"learning_rate": 2.825048902404612e-06,
"loss": 0.42354950308799744,
"step": 1655
},
{
"epoch": 2.039408866995074,
"grad_norm": 12.282344754345834,
"learning_rate": 2.818598582408255e-06,
"loss": 0.6974557638168335,
"step": 1656
},
{
"epoch": 2.04064039408867,
"grad_norm": 11.678426390945974,
"learning_rate": 2.8121527431974838e-06,
"loss": 0.8337801694869995,
"step": 1657
},
{
"epoch": 2.041871921182266,
"grad_norm": 11.653625925472546,
"learning_rate": 2.805711398012604e-06,
"loss": 0.48300114274024963,
"step": 1658
},
{
"epoch": 2.043103448275862,
"grad_norm": 8.699921165351283,
"learning_rate": 2.799274560084688e-06,
"loss": 0.2231900542974472,
"step": 1659
},
{
"epoch": 2.044334975369458,
"grad_norm": 11.080926890704283,
"learning_rate": 2.7928422426355554e-06,
"loss": 0.7431713342666626,
"step": 1660
},
{
"epoch": 2.0455665024630543,
"grad_norm": 10.18242138749306,
"learning_rate": 2.7864144588777403e-06,
"loss": 0.5905585289001465,
"step": 1661
},
{
"epoch": 2.0467980295566504,
"grad_norm": 12.79007023215843,
"learning_rate": 2.779991222014459e-06,
"loss": 0.5379045009613037,
"step": 1662
},
{
"epoch": 2.0480295566502464,
"grad_norm": 10.204627357114346,
"learning_rate": 2.77357254523959e-06,
"loss": 0.4073173403739929,
"step": 1663
},
{
"epoch": 2.0492610837438425,
"grad_norm": 16.54029756463169,
"learning_rate": 2.767158441737646e-06,
"loss": 0.37792834639549255,
"step": 1664
},
{
"epoch": 2.0504926108374386,
"grad_norm": 12.199606214048373,
"learning_rate": 2.7607489246837505e-06,
"loss": 0.5250200629234314,
"step": 1665
},
{
"epoch": 2.0517241379310347,
"grad_norm": 15.23569807667072,
"learning_rate": 2.754344007243594e-06,
"loss": 0.7716425061225891,
"step": 1666
},
{
"epoch": 2.0529556650246303,
"grad_norm": 7.925817755895629,
"learning_rate": 2.74794370257343e-06,
"loss": 0.6505113244056702,
"step": 1667
},
{
"epoch": 2.0541871921182264,
"grad_norm": 13.232372975936459,
"learning_rate": 2.741548023820037e-06,
"loss": 1.237591028213501,
"step": 1668
},
{
"epoch": 2.0554187192118225,
"grad_norm": 7.821194651549222,
"learning_rate": 2.7351569841206792e-06,
"loss": 0.33151859045028687,
"step": 1669
},
{
"epoch": 2.0566502463054186,
"grad_norm": 9.91473906287112,
"learning_rate": 2.728770596603105e-06,
"loss": 0.42522889375686646,
"step": 1670
},
{
"epoch": 2.0578817733990147,
"grad_norm": 10.678926533172987,
"learning_rate": 2.722388874385503e-06,
"loss": 0.3359280824661255,
"step": 1671
},
{
"epoch": 2.0591133004926108,
"grad_norm": 9.193563725792906,
"learning_rate": 2.716011830576475e-06,
"loss": 0.23182198405265808,
"step": 1672
},
{
"epoch": 2.060344827586207,
"grad_norm": 13.12855060675622,
"learning_rate": 2.7096394782750186e-06,
"loss": 0.30262982845306396,
"step": 1673
},
{
"epoch": 2.061576354679803,
"grad_norm": 7.791350721856929,
"learning_rate": 2.7032718305704887e-06,
"loss": 0.23311859369277954,
"step": 1674
},
{
"epoch": 2.062807881773399,
"grad_norm": 12.221292312776084,
"learning_rate": 2.696908900542584e-06,
"loss": 0.6328019499778748,
"step": 1675
},
{
"epoch": 2.064039408866995,
"grad_norm": 10.8289045782447,
"learning_rate": 2.690550701261304e-06,
"loss": 0.30473750829696655,
"step": 1676
},
{
"epoch": 2.065270935960591,
"grad_norm": 8.921318423622994,
"learning_rate": 2.684197245786938e-06,
"loss": 0.2824372947216034,
"step": 1677
},
{
"epoch": 2.0665024630541873,
"grad_norm": 15.101179094698006,
"learning_rate": 2.677848547170029e-06,
"loss": 0.3543265163898468,
"step": 1678
},
{
"epoch": 2.0677339901477834,
"grad_norm": 8.79612621311314,
"learning_rate": 2.671504618451348e-06,
"loss": 0.6176484823226929,
"step": 1679
},
{
"epoch": 2.0689655172413794,
"grad_norm": 10.985306627235934,
"learning_rate": 2.665165472661866e-06,
"loss": 0.5290611386299133,
"step": 1680
},
{
"epoch": 2.0701970443349755,
"grad_norm": 8.398062035832517,
"learning_rate": 2.658831122822735e-06,
"loss": 0.5321454405784607,
"step": 1681
},
{
"epoch": 2.0714285714285716,
"grad_norm": 11.540193919775621,
"learning_rate": 2.6525015819452504e-06,
"loss": 0.27902156114578247,
"step": 1682
},
{
"epoch": 2.0726600985221673,
"grad_norm": 12.60801369495054,
"learning_rate": 2.6461768630308326e-06,
"loss": 0.46582847833633423,
"step": 1683
},
{
"epoch": 2.0738916256157633,
"grad_norm": 15.322116984466021,
"learning_rate": 2.6398569790710007e-06,
"loss": 0.651951014995575,
"step": 1684
},
{
"epoch": 2.0751231527093594,
"grad_norm": 9.74038331873093,
"learning_rate": 2.633541943047334e-06,
"loss": 0.36612239480018616,
"step": 1685
},
{
"epoch": 2.0763546798029555,
"grad_norm": 7.730903286765135,
"learning_rate": 2.6272317679314573e-06,
"loss": 0.22278031706809998,
"step": 1686
},
{
"epoch": 2.0775862068965516,
"grad_norm": 7.781634586207103,
"learning_rate": 2.620926466685013e-06,
"loss": 0.33012956380844116,
"step": 1687
},
{
"epoch": 2.0788177339901477,
"grad_norm": 9.397683957095191,
"learning_rate": 2.6146260522596334e-06,
"loss": 0.7396690845489502,
"step": 1688
},
{
"epoch": 2.0800492610837438,
"grad_norm": 11.988801603692485,
"learning_rate": 2.608330537596907e-06,
"loss": 0.8257578611373901,
"step": 1689
},
{
"epoch": 2.08128078817734,
"grad_norm": 8.855369489146483,
"learning_rate": 2.6020399356283586e-06,
"loss": 0.4538348317146301,
"step": 1690
},
{
"epoch": 2.082512315270936,
"grad_norm": 9.991399228257757,
"learning_rate": 2.595754259275428e-06,
"loss": 0.992777943611145,
"step": 1691
},
{
"epoch": 2.083743842364532,
"grad_norm": 11.406818947912145,
"learning_rate": 2.589473521449434e-06,
"loss": 0.346379816532135,
"step": 1692
},
{
"epoch": 2.084975369458128,
"grad_norm": 18.61665504561422,
"learning_rate": 2.583197735051546e-06,
"loss": 0.4523533284664154,
"step": 1693
},
{
"epoch": 2.086206896551724,
"grad_norm": 9.296672908995824,
"learning_rate": 2.576926912972771e-06,
"loss": 0.11842907965183258,
"step": 1694
},
{
"epoch": 2.0874384236453203,
"grad_norm": 8.459525770988064,
"learning_rate": 2.5706610680939186e-06,
"loss": 0.381897896528244,
"step": 1695
},
{
"epoch": 2.0886699507389164,
"grad_norm": 11.109371262298351,
"learning_rate": 2.564400213285564e-06,
"loss": 0.3824227452278137,
"step": 1696
},
{
"epoch": 2.0899014778325125,
"grad_norm": 7.622915250326246,
"learning_rate": 2.5581443614080433e-06,
"loss": 0.4153192639350891,
"step": 1697
},
{
"epoch": 2.0911330049261085,
"grad_norm": 12.840140963343943,
"learning_rate": 2.5518935253114153e-06,
"loss": 0.3284783959388733,
"step": 1698
},
{
"epoch": 2.0923645320197046,
"grad_norm": 9.586633818986163,
"learning_rate": 2.545647717835428e-06,
"loss": 0.7730638980865479,
"step": 1699
},
{
"epoch": 2.0935960591133007,
"grad_norm": 9.329889124511917,
"learning_rate": 2.539406951809512e-06,
"loss": 0.31647253036499023,
"step": 1700
},
{
"epoch": 2.0948275862068964,
"grad_norm": 12.004447197114908,
"learning_rate": 2.53317124005273e-06,
"loss": 0.5977708101272583,
"step": 1701
},
{
"epoch": 2.0960591133004924,
"grad_norm": 8.69992433934411,
"learning_rate": 2.5269405953737735e-06,
"loss": 0.2646758556365967,
"step": 1702
},
{
"epoch": 2.0972906403940885,
"grad_norm": 8.02489022856674,
"learning_rate": 2.5207150305709167e-06,
"loss": 0.5242122411727905,
"step": 1703
},
{
"epoch": 2.0985221674876846,
"grad_norm": 13.343080912035035,
"learning_rate": 2.5144945584320056e-06,
"loss": 0.43271976709365845,
"step": 1704
},
{
"epoch": 2.0997536945812807,
"grad_norm": 16.386560709178422,
"learning_rate": 2.5082791917344256e-06,
"loss": 0.902009904384613,
"step": 1705
},
{
"epoch": 2.100985221674877,
"grad_norm": 8.363747351262921,
"learning_rate": 2.5020689432450706e-06,
"loss": 0.5218071937561035,
"step": 1706
},
{
"epoch": 2.102216748768473,
"grad_norm": 13.441523308623053,
"learning_rate": 2.495863825720322e-06,
"loss": 0.7475143671035767,
"step": 1707
},
{
"epoch": 2.103448275862069,
"grad_norm": 9.20779623087441,
"learning_rate": 2.4896638519060257e-06,
"loss": 0.31655290722846985,
"step": 1708
},
{
"epoch": 2.104679802955665,
"grad_norm": 12.453919142267711,
"learning_rate": 2.4834690345374608e-06,
"loss": 0.30808842182159424,
"step": 1709
},
{
"epoch": 2.105911330049261,
"grad_norm": 12.241452294332287,
"learning_rate": 2.477279386339309e-06,
"loss": 0.7037611603736877,
"step": 1710
},
{
"epoch": 2.107142857142857,
"grad_norm": 14.091630182879387,
"learning_rate": 2.471094920025644e-06,
"loss": 0.4699273407459259,
"step": 1711
},
{
"epoch": 2.1083743842364533,
"grad_norm": 13.920276564221119,
"learning_rate": 2.4649156482998873e-06,
"loss": 0.5032830238342285,
"step": 1712
},
{
"epoch": 2.1096059113300494,
"grad_norm": 12.895772980307312,
"learning_rate": 2.45874158385479e-06,
"loss": 1.2563080787658691,
"step": 1713
},
{
"epoch": 2.1108374384236455,
"grad_norm": 7.446774906593091,
"learning_rate": 2.4525727393724136e-06,
"loss": 0.29728978872299194,
"step": 1714
},
{
"epoch": 2.1120689655172415,
"grad_norm": 9.446867560016528,
"learning_rate": 2.446409127524094e-06,
"loss": 0.2391032576560974,
"step": 1715
},
{
"epoch": 2.1133004926108376,
"grad_norm": 13.287475847065688,
"learning_rate": 2.4402507609704163e-06,
"loss": 0.4612117409706116,
"step": 1716
},
{
"epoch": 2.1145320197044333,
"grad_norm": 9.000836025460185,
"learning_rate": 2.4340976523611957e-06,
"loss": 0.36539849638938904,
"step": 1717
},
{
"epoch": 2.1157635467980294,
"grad_norm": 6.954876550316873,
"learning_rate": 2.427949814335443e-06,
"loss": 0.2918080687522888,
"step": 1718
},
{
"epoch": 2.1169950738916254,
"grad_norm": 12.290862216055704,
"learning_rate": 2.4218072595213467e-06,
"loss": 0.4508627653121948,
"step": 1719
},
{
"epoch": 2.1182266009852215,
"grad_norm": 10.395578945684981,
"learning_rate": 2.4156700005362384e-06,
"loss": 0.43477705121040344,
"step": 1720
},
{
"epoch": 2.1194581280788176,
"grad_norm": 13.97203519429258,
"learning_rate": 2.409538049986576e-06,
"loss": 0.36739200353622437,
"step": 1721
},
{
"epoch": 2.1206896551724137,
"grad_norm": 10.000232328294244,
"learning_rate": 2.403411420467916e-06,
"loss": 0.722801923751831,
"step": 1722
},
{
"epoch": 2.12192118226601,
"grad_norm": 8.047857628714285,
"learning_rate": 2.3972901245648724e-06,
"loss": 0.3729158043861389,
"step": 1723
},
{
"epoch": 2.123152709359606,
"grad_norm": 9.083191980371518,
"learning_rate": 2.3911741748511163e-06,
"loss": 0.741644024848938,
"step": 1724
},
{
"epoch": 2.124384236453202,
"grad_norm": 11.04614906019948,
"learning_rate": 2.385063583889335e-06,
"loss": 0.21925917267799377,
"step": 1725
},
{
"epoch": 2.125615763546798,
"grad_norm": 8.204563983460345,
"learning_rate": 2.378958364231202e-06,
"loss": 0.3161308765411377,
"step": 1726
},
{
"epoch": 2.126847290640394,
"grad_norm": 9.198617981495676,
"learning_rate": 2.3728585284173646e-06,
"loss": 0.2520957887172699,
"step": 1727
},
{
"epoch": 2.12807881773399,
"grad_norm": 17.99753939345998,
"learning_rate": 2.3667640889774096e-06,
"loss": 0.5538915991783142,
"step": 1728
},
{
"epoch": 2.1293103448275863,
"grad_norm": 15.205601395041407,
"learning_rate": 2.3606750584298375e-06,
"loss": 0.5438660979270935,
"step": 1729
},
{
"epoch": 2.1305418719211824,
"grad_norm": 11.445216371439214,
"learning_rate": 2.3545914492820366e-06,
"loss": 0.39724698662757874,
"step": 1730
},
{
"epoch": 2.1317733990147785,
"grad_norm": 13.240651517787109,
"learning_rate": 2.348513274030264e-06,
"loss": 0.3480866551399231,
"step": 1731
},
{
"epoch": 2.1330049261083746,
"grad_norm": 8.909285636059167,
"learning_rate": 2.3424405451596143e-06,
"loss": 0.9076392650604248,
"step": 1732
},
{
"epoch": 2.1342364532019706,
"grad_norm": 10.08773566622176,
"learning_rate": 2.3363732751439926e-06,
"loss": 0.19863876700401306,
"step": 1733
},
{
"epoch": 2.1354679802955667,
"grad_norm": 18.974399402946254,
"learning_rate": 2.3303114764460887e-06,
"loss": 0.5347404479980469,
"step": 1734
},
{
"epoch": 2.1366995073891624,
"grad_norm": 13.439122993751143,
"learning_rate": 2.32425516151736e-06,
"loss": 0.4876821041107178,
"step": 1735
},
{
"epoch": 2.1379310344827585,
"grad_norm": 11.45775521594229,
"learning_rate": 2.3182043427979973e-06,
"loss": 0.24914954602718353,
"step": 1736
},
{
"epoch": 2.1391625615763545,
"grad_norm": 8.201340069963411,
"learning_rate": 2.3121590327168987e-06,
"loss": 0.5773565769195557,
"step": 1737
},
{
"epoch": 2.1403940886699506,
"grad_norm": 11.57987957433396,
"learning_rate": 2.30611924369165e-06,
"loss": 0.7779598832130432,
"step": 1738
},
{
"epoch": 2.1416256157635467,
"grad_norm": 10.793230544693655,
"learning_rate": 2.3000849881285016e-06,
"loss": 0.27866464853286743,
"step": 1739
},
{
"epoch": 2.142857142857143,
"grad_norm": 10.857850500188468,
"learning_rate": 2.2940562784223224e-06,
"loss": 0.5243108868598938,
"step": 1740
},
{
"epoch": 2.144088669950739,
"grad_norm": 11.19069440448601,
"learning_rate": 2.2880331269566043e-06,
"loss": 0.6560786366462708,
"step": 1741
},
{
"epoch": 2.145320197044335,
"grad_norm": 13.01584696243558,
"learning_rate": 2.282015546103418e-06,
"loss": 0.6339880228042603,
"step": 1742
},
{
"epoch": 2.146551724137931,
"grad_norm": 9.571310950804556,
"learning_rate": 2.2760035482233868e-06,
"loss": 0.2517808973789215,
"step": 1743
},
{
"epoch": 2.147783251231527,
"grad_norm": 20.291798315352697,
"learning_rate": 2.269997145665674e-06,
"loss": 0.40347909927368164,
"step": 1744
},
{
"epoch": 2.149014778325123,
"grad_norm": 9.550073631094609,
"learning_rate": 2.263996350767942e-06,
"loss": 0.4681488573551178,
"step": 1745
},
{
"epoch": 2.1502463054187193,
"grad_norm": 9.340283980757114,
"learning_rate": 2.2580011758563418e-06,
"loss": 0.6371068954467773,
"step": 1746
},
{
"epoch": 2.1514778325123154,
"grad_norm": 21.612590436052542,
"learning_rate": 2.2520116332454726e-06,
"loss": 0.4741581678390503,
"step": 1747
},
{
"epoch": 2.1527093596059115,
"grad_norm": 8.523455664504207,
"learning_rate": 2.2460277352383713e-06,
"loss": 0.3354438543319702,
"step": 1748
},
{
"epoch": 2.1539408866995076,
"grad_norm": 14.050991791769299,
"learning_rate": 2.240049494126479e-06,
"loss": 0.593233585357666,
"step": 1749
},
{
"epoch": 2.1551724137931036,
"grad_norm": 11.626128632656414,
"learning_rate": 2.234076922189613e-06,
"loss": 0.32123100757598877,
"step": 1750
},
{
"epoch": 2.1564039408866993,
"grad_norm": 17.381626157091297,
"learning_rate": 2.2281100316959476e-06,
"loss": 1.0594584941864014,
"step": 1751
},
{
"epoch": 2.1576354679802954,
"grad_norm": 9.794184199968742,
"learning_rate": 2.2221488349019903e-06,
"loss": 0.8586208820343018,
"step": 1752
},
{
"epoch": 2.1588669950738915,
"grad_norm": 10.979739823361593,
"learning_rate": 2.2161933440525474e-06,
"loss": 0.38074642419815063,
"step": 1753
},
{
"epoch": 2.1600985221674875,
"grad_norm": 10.732650739543086,
"learning_rate": 2.21024357138071e-06,
"loss": 0.28768736124038696,
"step": 1754
},
{
"epoch": 2.1613300492610836,
"grad_norm": 10.263056998284627,
"learning_rate": 2.2042995291078227e-06,
"loss": 1.1843211650848389,
"step": 1755
},
{
"epoch": 2.1625615763546797,
"grad_norm": 13.635797719225163,
"learning_rate": 2.1983612294434563e-06,
"loss": 0.7616925835609436,
"step": 1756
},
{
"epoch": 2.163793103448276,
"grad_norm": 9.78260695772624,
"learning_rate": 2.192428684585386e-06,
"loss": 0.4518227279186249,
"step": 1757
},
{
"epoch": 2.165024630541872,
"grad_norm": 14.669561384919394,
"learning_rate": 2.1865019067195685e-06,
"loss": 0.9173997640609741,
"step": 1758
},
{
"epoch": 2.166256157635468,
"grad_norm": 9.861706475635476,
"learning_rate": 2.180580908020117e-06,
"loss": 0.4044645428657532,
"step": 1759
},
{
"epoch": 2.167487684729064,
"grad_norm": 11.783858103052328,
"learning_rate": 2.174665700649267e-06,
"loss": 0.7771418690681458,
"step": 1760
},
{
"epoch": 2.16871921182266,
"grad_norm": 12.555695641041428,
"learning_rate": 2.1687562967573645e-06,
"loss": 0.39461982250213623,
"step": 1761
},
{
"epoch": 2.1699507389162562,
"grad_norm": 8.510682084443147,
"learning_rate": 2.1628527084828283e-06,
"loss": 0.2924491763114929,
"step": 1762
},
{
"epoch": 2.1711822660098523,
"grad_norm": 7.789254339344862,
"learning_rate": 2.156954947952139e-06,
"loss": 0.2507514953613281,
"step": 1763
},
{
"epoch": 2.1724137931034484,
"grad_norm": 9.474786369957261,
"learning_rate": 2.151063027279798e-06,
"loss": 0.44257861375808716,
"step": 1764
},
{
"epoch": 2.1736453201970445,
"grad_norm": 9.165088005805186,
"learning_rate": 2.1451769585683196e-06,
"loss": 0.2863251268863678,
"step": 1765
},
{
"epoch": 2.1748768472906406,
"grad_norm": 14.506373027900759,
"learning_rate": 2.139296753908195e-06,
"loss": 0.6882431507110596,
"step": 1766
},
{
"epoch": 2.1761083743842367,
"grad_norm": 10.237681928740948,
"learning_rate": 2.1334224253778628e-06,
"loss": 0.8318816423416138,
"step": 1767
},
{
"epoch": 2.1773399014778327,
"grad_norm": 8.92298078848023,
"learning_rate": 2.1275539850437006e-06,
"loss": 0.3899531364440918,
"step": 1768
},
{
"epoch": 2.1785714285714284,
"grad_norm": 10.24700092560103,
"learning_rate": 2.1216914449599905e-06,
"loss": 0.6424532532691956,
"step": 1769
},
{
"epoch": 2.1798029556650245,
"grad_norm": 10.006066437806421,
"learning_rate": 2.1158348171688888e-06,
"loss": 0.6676028370857239,
"step": 1770
},
{
"epoch": 2.1810344827586206,
"grad_norm": 11.577953051638056,
"learning_rate": 2.109984113700413e-06,
"loss": 0.4219639301300049,
"step": 1771
},
{
"epoch": 2.1822660098522166,
"grad_norm": 6.842671899586793,
"learning_rate": 2.1041393465724114e-06,
"loss": 0.32283568382263184,
"step": 1772
},
{
"epoch": 2.1834975369458127,
"grad_norm": 9.373944237506624,
"learning_rate": 2.0983005277905348e-06,
"loss": 0.26172614097595215,
"step": 1773
},
{
"epoch": 2.184729064039409,
"grad_norm": 8.04859888971959,
"learning_rate": 2.092467669348217e-06,
"loss": 0.585732638835907,
"step": 1774
},
{
"epoch": 2.185960591133005,
"grad_norm": 17.13691371915511,
"learning_rate": 2.0866407832266506e-06,
"loss": 0.42734187841415405,
"step": 1775
},
{
"epoch": 2.187192118226601,
"grad_norm": 9.353812644763135,
"learning_rate": 2.0808198813947606e-06,
"loss": 0.24151989817619324,
"step": 1776
},
{
"epoch": 2.188423645320197,
"grad_norm": 6.491521280477716,
"learning_rate": 2.0750049758091778e-06,
"loss": 0.12940426170825958,
"step": 1777
},
{
"epoch": 2.189655172413793,
"grad_norm": 12.137046868295176,
"learning_rate": 2.0691960784142143e-06,
"loss": 0.7501548528671265,
"step": 1778
},
{
"epoch": 2.1908866995073892,
"grad_norm": 8.28614035816523,
"learning_rate": 2.063393201141846e-06,
"loss": 0.43730083107948303,
"step": 1779
},
{
"epoch": 2.1921182266009853,
"grad_norm": 7.426728577487124,
"learning_rate": 2.0575963559116823e-06,
"loss": 0.3335978388786316,
"step": 1780
},
{
"epoch": 2.1933497536945814,
"grad_norm": 7.727814229698406,
"learning_rate": 2.0518055546309362e-06,
"loss": 0.3262137174606323,
"step": 1781
},
{
"epoch": 2.1945812807881775,
"grad_norm": 12.218163734992793,
"learning_rate": 2.0460208091944122e-06,
"loss": 0.3336663544178009,
"step": 1782
},
{
"epoch": 2.1958128078817736,
"grad_norm": 12.61978263562606,
"learning_rate": 2.0402421314844774e-06,
"loss": 0.6050255298614502,
"step": 1783
},
{
"epoch": 2.1970443349753697,
"grad_norm": 10.058297792191603,
"learning_rate": 2.0344695333710234e-06,
"loss": 0.33584898710250854,
"step": 1784
},
{
"epoch": 2.1982758620689653,
"grad_norm": 7.629807101727278,
"learning_rate": 2.0287030267114665e-06,
"loss": 0.4711458683013916,
"step": 1785
},
{
"epoch": 2.1995073891625614,
"grad_norm": 7.348268103503395,
"learning_rate": 2.0229426233507067e-06,
"loss": 0.6127311587333679,
"step": 1786
},
{
"epoch": 2.2007389162561575,
"grad_norm": 8.230284472347915,
"learning_rate": 2.0171883351211038e-06,
"loss": 0.7195362448692322,
"step": 1787
},
{
"epoch": 2.2019704433497536,
"grad_norm": 20.032548588100823,
"learning_rate": 2.0114401738424618e-06,
"loss": 1.412251591682434,
"step": 1788
},
{
"epoch": 2.2032019704433496,
"grad_norm": 11.361862300830705,
"learning_rate": 2.0056981513219944e-06,
"loss": 0.48954465985298157,
"step": 1789
},
{
"epoch": 2.2044334975369457,
"grad_norm": 10.14335903404985,
"learning_rate": 1.999962279354311e-06,
"loss": 0.32414451241493225,
"step": 1790
},
{
"epoch": 2.205665024630542,
"grad_norm": 11.365030809564745,
"learning_rate": 1.9942325697213817e-06,
"loss": 0.4072822034358978,
"step": 1791
},
{
"epoch": 2.206896551724138,
"grad_norm": 9.518825727757552,
"learning_rate": 1.988509034192522e-06,
"loss": 0.25958192348480225,
"step": 1792
},
{
"epoch": 2.208128078817734,
"grad_norm": 7.689606665993246,
"learning_rate": 1.9827916845243687e-06,
"loss": 0.2943662405014038,
"step": 1793
},
{
"epoch": 2.20935960591133,
"grad_norm": 11.749853788306439,
"learning_rate": 1.9770805324608446e-06,
"loss": 0.6713488698005676,
"step": 1794
},
{
"epoch": 2.210591133004926,
"grad_norm": 8.987827629233262,
"learning_rate": 1.971375589733145e-06,
"loss": 0.5103387236595154,
"step": 1795
},
{
"epoch": 2.2118226600985222,
"grad_norm": 14.84712925009146,
"learning_rate": 1.965676868059714e-06,
"loss": 0.4981153905391693,
"step": 1796
},
{
"epoch": 2.2130541871921183,
"grad_norm": 9.829434549611708,
"learning_rate": 1.9599843791462123e-06,
"loss": 0.2828434407711029,
"step": 1797
},
{
"epoch": 2.2142857142857144,
"grad_norm": 11.531079285990483,
"learning_rate": 1.9542981346855015e-06,
"loss": 0.36899659037590027,
"step": 1798
},
{
"epoch": 2.2155172413793105,
"grad_norm": 10.264635301771921,
"learning_rate": 1.9486181463576176e-06,
"loss": 0.46039581298828125,
"step": 1799
},
{
"epoch": 2.2167487684729066,
"grad_norm": 7.994315710714336,
"learning_rate": 1.942944425829741e-06,
"loss": 0.611553966999054,
"step": 1800
},
{
"epoch": 2.2179802955665027,
"grad_norm": 10.64295367375575,
"learning_rate": 1.937276984756179e-06,
"loss": 0.23928876221179962,
"step": 1801
},
{
"epoch": 2.2192118226600988,
"grad_norm": 11.919180580141987,
"learning_rate": 1.9316158347783436e-06,
"loss": 0.3270934820175171,
"step": 1802
},
{
"epoch": 2.2204433497536944,
"grad_norm": 9.438403907761801,
"learning_rate": 1.925960987524724e-06,
"loss": 0.30926424264907837,
"step": 1803
},
{
"epoch": 2.2216748768472905,
"grad_norm": 11.903671185207038,
"learning_rate": 1.9203124546108583e-06,
"loss": 0.6049486994743347,
"step": 1804
},
{
"epoch": 2.2229064039408866,
"grad_norm": 14.861992075187999,
"learning_rate": 1.91467024763932e-06,
"loss": 0.7592355012893677,
"step": 1805
},
{
"epoch": 2.2241379310344827,
"grad_norm": 11.790018718519686,
"learning_rate": 1.9090343781996828e-06,
"loss": 0.26057887077331543,
"step": 1806
},
{
"epoch": 2.2253694581280787,
"grad_norm": 17.03673279052151,
"learning_rate": 1.9034048578685099e-06,
"loss": 0.4014609754085541,
"step": 1807
},
{
"epoch": 2.226600985221675,
"grad_norm": 10.412774433531801,
"learning_rate": 1.897781698209315e-06,
"loss": 0.26397138833999634,
"step": 1808
},
{
"epoch": 2.227832512315271,
"grad_norm": 11.809020308728643,
"learning_rate": 1.8921649107725525e-06,
"loss": 0.8727256059646606,
"step": 1809
},
{
"epoch": 2.229064039408867,
"grad_norm": 8.838116472787092,
"learning_rate": 1.8865545070955882e-06,
"loss": 0.45729875564575195,
"step": 1810
},
{
"epoch": 2.230295566502463,
"grad_norm": 13.341384604613445,
"learning_rate": 1.880950498702666e-06,
"loss": 0.3261849880218506,
"step": 1811
},
{
"epoch": 2.231527093596059,
"grad_norm": 16.210141929264246,
"learning_rate": 1.875352897104903e-06,
"loss": 0.682532787322998,
"step": 1812
},
{
"epoch": 2.2327586206896552,
"grad_norm": 16.44333196476405,
"learning_rate": 1.8697617138002545e-06,
"loss": 0.4255359470844269,
"step": 1813
},
{
"epoch": 2.2339901477832513,
"grad_norm": 8.460123548003127,
"learning_rate": 1.8641769602734872e-06,
"loss": 0.3307432234287262,
"step": 1814
},
{
"epoch": 2.2352216748768474,
"grad_norm": 9.96917434972206,
"learning_rate": 1.8585986479961653e-06,
"loss": 0.26837313175201416,
"step": 1815
},
{
"epoch": 2.2364532019704435,
"grad_norm": 12.410587151566334,
"learning_rate": 1.8530267884266228e-06,
"loss": 0.5036531686782837,
"step": 1816
},
{
"epoch": 2.2376847290640396,
"grad_norm": 13.229449859916322,
"learning_rate": 1.8474613930099356e-06,
"loss": 0.4444383680820465,
"step": 1817
},
{
"epoch": 2.2389162561576357,
"grad_norm": 10.366174513602477,
"learning_rate": 1.8419024731779e-06,
"loss": 0.24592629075050354,
"step": 1818
},
{
"epoch": 2.2401477832512313,
"grad_norm": 21.212742320307363,
"learning_rate": 1.8363500403490175e-06,
"loss": 0.9310093522071838,
"step": 1819
},
{
"epoch": 2.2413793103448274,
"grad_norm": 10.041916938686702,
"learning_rate": 1.8308041059284621e-06,
"loss": 0.3252318799495697,
"step": 1820
},
{
"epoch": 2.2426108374384235,
"grad_norm": 10.169102582875109,
"learning_rate": 1.8252646813080566e-06,
"loss": 0.44218361377716064,
"step": 1821
},
{
"epoch": 2.2438423645320196,
"grad_norm": 13.658159402672133,
"learning_rate": 1.8197317778662533e-06,
"loss": 0.631632924079895,
"step": 1822
},
{
"epoch": 2.2450738916256157,
"grad_norm": 11.284192076783485,
"learning_rate": 1.814205406968112e-06,
"loss": 0.2570488154888153,
"step": 1823
},
{
"epoch": 2.2463054187192117,
"grad_norm": 10.661610786830831,
"learning_rate": 1.8086855799652737e-06,
"loss": 0.6113500595092773,
"step": 1824
},
{
"epoch": 2.247536945812808,
"grad_norm": 9.883591422459872,
"learning_rate": 1.8031723081959334e-06,
"loss": 0.5997953414916992,
"step": 1825
},
{
"epoch": 2.248768472906404,
"grad_norm": 12.888281661513009,
"learning_rate": 1.7976656029848271e-06,
"loss": 0.501262903213501,
"step": 1826
},
{
"epoch": 2.25,
"grad_norm": 9.87397702836225,
"learning_rate": 1.792165475643199e-06,
"loss": 0.9116629362106323,
"step": 1827
},
{
"epoch": 2.251231527093596,
"grad_norm": 8.421237466791723,
"learning_rate": 1.786671937468779e-06,
"loss": 0.3302918076515198,
"step": 1828
},
{
"epoch": 2.252463054187192,
"grad_norm": 9.25026361639238,
"learning_rate": 1.7811849997457681e-06,
"loss": 0.26528751850128174,
"step": 1829
},
{
"epoch": 2.2536945812807883,
"grad_norm": 11.490820404812338,
"learning_rate": 1.775704673744809e-06,
"loss": 0.25929901003837585,
"step": 1830
},
{
"epoch": 2.2549261083743843,
"grad_norm": 13.127115940994786,
"learning_rate": 1.7702309707229576e-06,
"loss": 0.4980836808681488,
"step": 1831
},
{
"epoch": 2.2561576354679804,
"grad_norm": 16.054819413361866,
"learning_rate": 1.764763901923673e-06,
"loss": 0.5196325182914734,
"step": 1832
},
{
"epoch": 2.2573891625615765,
"grad_norm": 8.101995143129717,
"learning_rate": 1.7593034785767788e-06,
"loss": 0.20513209700584412,
"step": 1833
},
{
"epoch": 2.2586206896551726,
"grad_norm": 11.005823004560217,
"learning_rate": 1.753849711898457e-06,
"loss": 0.3052961826324463,
"step": 1834
},
{
"epoch": 2.2598522167487687,
"grad_norm": 14.916636143940408,
"learning_rate": 1.7484026130912097e-06,
"loss": 0.32289302349090576,
"step": 1835
},
{
"epoch": 2.2610837438423648,
"grad_norm": 10.783629716557854,
"learning_rate": 1.742962193343845e-06,
"loss": 0.5892568826675415,
"step": 1836
},
{
"epoch": 2.2623152709359604,
"grad_norm": 8.680159409558001,
"learning_rate": 1.737528463831456e-06,
"loss": 0.24824300408363342,
"step": 1837
},
{
"epoch": 2.2635467980295565,
"grad_norm": 28.059213249121456,
"learning_rate": 1.7321014357153815e-06,
"loss": 0.23833397030830383,
"step": 1838
},
{
"epoch": 2.2647783251231526,
"grad_norm": 10.866697094389515,
"learning_rate": 1.726681120143207e-06,
"loss": 0.4855925738811493,
"step": 1839
},
{
"epoch": 2.2660098522167487,
"grad_norm": 11.048047137574908,
"learning_rate": 1.7212675282487269e-06,
"loss": 0.44992727041244507,
"step": 1840
},
{
"epoch": 2.2672413793103448,
"grad_norm": 19.236329816785574,
"learning_rate": 1.7158606711519193e-06,
"loss": 0.41251128911972046,
"step": 1841
},
{
"epoch": 2.268472906403941,
"grad_norm": 8.021805078822515,
"learning_rate": 1.7104605599589353e-06,
"loss": 0.4418972134590149,
"step": 1842
},
{
"epoch": 2.269704433497537,
"grad_norm": 14.577958176696848,
"learning_rate": 1.7050672057620666e-06,
"loss": 0.4425298571586609,
"step": 1843
},
{
"epoch": 2.270935960591133,
"grad_norm": 13.33684949043127,
"learning_rate": 1.6996806196397243e-06,
"loss": 0.3141231834888458,
"step": 1844
},
{
"epoch": 2.272167487684729,
"grad_norm": 14.191190475097011,
"learning_rate": 1.6943008126564164e-06,
"loss": 0.2843426764011383,
"step": 1845
},
{
"epoch": 2.273399014778325,
"grad_norm": 8.774563230877245,
"learning_rate": 1.6889277958627293e-06,
"loss": 0.36104702949523926,
"step": 1846
},
{
"epoch": 2.2746305418719213,
"grad_norm": 8.915062589804638,
"learning_rate": 1.6835615802953026e-06,
"loss": 0.3061131536960602,
"step": 1847
},
{
"epoch": 2.2758620689655173,
"grad_norm": 14.006563372468205,
"learning_rate": 1.6782021769768015e-06,
"loss": 0.26009926199913025,
"step": 1848
},
{
"epoch": 2.2770935960591134,
"grad_norm": 8.127500944165664,
"learning_rate": 1.6728495969158976e-06,
"loss": 0.33785128593444824,
"step": 1849
},
{
"epoch": 2.2783251231527095,
"grad_norm": 13.84769147602863,
"learning_rate": 1.6675038511072518e-06,
"loss": 0.675277829170227,
"step": 1850
},
{
"epoch": 2.2795566502463056,
"grad_norm": 10.2024379894797,
"learning_rate": 1.6621649505314853e-06,
"loss": 0.30536460876464844,
"step": 1851
},
{
"epoch": 2.2807881773399012,
"grad_norm": 13.905669065241,
"learning_rate": 1.6568329061551552e-06,
"loss": 0.483297735452652,
"step": 1852
},
{
"epoch": 2.2820197044334973,
"grad_norm": 13.831832440802502,
"learning_rate": 1.6515077289307391e-06,
"loss": 1.2728561162948608,
"step": 1853
},
{
"epoch": 2.2832512315270934,
"grad_norm": 12.809334971632179,
"learning_rate": 1.6461894297966113e-06,
"loss": 1.2634159326553345,
"step": 1854
},
{
"epoch": 2.2844827586206895,
"grad_norm": 7.191323391539922,
"learning_rate": 1.640878019677008e-06,
"loss": 0.2823532819747925,
"step": 1855
},
{
"epoch": 2.2857142857142856,
"grad_norm": 10.11071089918571,
"learning_rate": 1.6355735094820236e-06,
"loss": 0.34143221378326416,
"step": 1856
},
{
"epoch": 2.2869458128078817,
"grad_norm": 21.093284752390208,
"learning_rate": 1.6302759101075788e-06,
"loss": 1.6820435523986816,
"step": 1857
},
{
"epoch": 2.2881773399014778,
"grad_norm": 10.354309593440153,
"learning_rate": 1.6249852324353943e-06,
"loss": 0.5194296836853027,
"step": 1858
},
{
"epoch": 2.289408866995074,
"grad_norm": 17.44623842314838,
"learning_rate": 1.619701487332978e-06,
"loss": 0.5637781023979187,
"step": 1859
},
{
"epoch": 2.29064039408867,
"grad_norm": 25.69777716112705,
"learning_rate": 1.6144246856535933e-06,
"loss": 0.34875303506851196,
"step": 1860
},
{
"epoch": 2.291871921182266,
"grad_norm": 12.072258734899453,
"learning_rate": 1.609154838236246e-06,
"loss": 1.098509430885315,
"step": 1861
},
{
"epoch": 2.293103448275862,
"grad_norm": 9.38995256932923,
"learning_rate": 1.603891955905652e-06,
"loss": 0.28303658962249756,
"step": 1862
},
{
"epoch": 2.294334975369458,
"grad_norm": 8.876257541157115,
"learning_rate": 1.5986360494722237e-06,
"loss": 0.2923981547355652,
"step": 1863
},
{
"epoch": 2.2955665024630543,
"grad_norm": 12.816591257478263,
"learning_rate": 1.5933871297320458e-06,
"loss": 0.7381842136383057,
"step": 1864
},
{
"epoch": 2.2967980295566504,
"grad_norm": 11.151348038557627,
"learning_rate": 1.5881452074668474e-06,
"loss": 0.3092786371707916,
"step": 1865
},
{
"epoch": 2.2980295566502464,
"grad_norm": 7.288277848225151,
"learning_rate": 1.5829102934439855e-06,
"loss": 0.23155847191810608,
"step": 1866
},
{
"epoch": 2.2992610837438425,
"grad_norm": 6.9100983038059685,
"learning_rate": 1.577682398416424e-06,
"loss": 0.28587496280670166,
"step": 1867
},
{
"epoch": 2.3004926108374386,
"grad_norm": 10.179482607383743,
"learning_rate": 1.572461533122709e-06,
"loss": 0.28047090768814087,
"step": 1868
},
{
"epoch": 2.3017241379310347,
"grad_norm": 9.853152635402589,
"learning_rate": 1.567247708286942e-06,
"loss": 0.23015758395195007,
"step": 1869
},
{
"epoch": 2.302955665024631,
"grad_norm": 11.277401391934358,
"learning_rate": 1.5620409346187697e-06,
"loss": 0.4323405623435974,
"step": 1870
},
{
"epoch": 2.3041871921182264,
"grad_norm": 11.297467766496554,
"learning_rate": 1.5568412228133506e-06,
"loss": 0.23572880029678345,
"step": 1871
},
{
"epoch": 2.3054187192118225,
"grad_norm": 13.421885123492197,
"learning_rate": 1.5516485835513368e-06,
"loss": 0.3727877140045166,
"step": 1872
},
{
"epoch": 2.3066502463054186,
"grad_norm": 12.62430001790282,
"learning_rate": 1.5464630274988558e-06,
"loss": 0.45042985677719116,
"step": 1873
},
{
"epoch": 2.3078817733990147,
"grad_norm": 14.933222032568711,
"learning_rate": 1.5412845653074871e-06,
"loss": 0.2898573875427246,
"step": 1874
},
{
"epoch": 2.3091133004926108,
"grad_norm": 13.678732792764093,
"learning_rate": 1.5361132076142316e-06,
"loss": 0.5285981893539429,
"step": 1875
},
{
"epoch": 2.310344827586207,
"grad_norm": 11.195106285237618,
"learning_rate": 1.5309489650415056e-06,
"loss": 0.32582932710647583,
"step": 1876
},
{
"epoch": 2.311576354679803,
"grad_norm": 10.519489956392377,
"learning_rate": 1.5257918481971028e-06,
"loss": 0.2169458121061325,
"step": 1877
},
{
"epoch": 2.312807881773399,
"grad_norm": 13.764556882530254,
"learning_rate": 1.5206418676741868e-06,
"loss": 0.618523359298706,
"step": 1878
},
{
"epoch": 2.314039408866995,
"grad_norm": 11.040931356433024,
"learning_rate": 1.515499034051256e-06,
"loss": 0.7014099359512329,
"step": 1879
},
{
"epoch": 2.315270935960591,
"grad_norm": 13.213679491063276,
"learning_rate": 1.510363357892133e-06,
"loss": 0.44798558950424194,
"step": 1880
},
{
"epoch": 2.3165024630541873,
"grad_norm": 77.68330951092015,
"learning_rate": 1.50523484974594e-06,
"loss": 0.4824434220790863,
"step": 1881
},
{
"epoch": 2.3177339901477834,
"grad_norm": 5.871453538227446,
"learning_rate": 1.5001135201470673e-06,
"loss": 0.16904819011688232,
"step": 1882
},
{
"epoch": 2.3189655172413794,
"grad_norm": 10.296708154719132,
"learning_rate": 1.4949993796151675e-06,
"loss": 0.8792778253555298,
"step": 1883
},
{
"epoch": 2.3201970443349755,
"grad_norm": 12.549086016226653,
"learning_rate": 1.4898924386551256e-06,
"loss": 0.6592487096786499,
"step": 1884
},
{
"epoch": 2.3214285714285716,
"grad_norm": 20.275701743724124,
"learning_rate": 1.4847927077570324e-06,
"loss": 1.6036354303359985,
"step": 1885
},
{
"epoch": 2.3226600985221673,
"grad_norm": 9.24831145241808,
"learning_rate": 1.4797001973961755e-06,
"loss": 0.34490981698036194,
"step": 1886
},
{
"epoch": 2.3238916256157633,
"grad_norm": 8.476000589981345,
"learning_rate": 1.4746149180330082e-06,
"loss": 0.3186146914958954,
"step": 1887
},
{
"epoch": 2.3251231527093594,
"grad_norm": 18.44274912327115,
"learning_rate": 1.4695368801131293e-06,
"loss": 0.5050108432769775,
"step": 1888
},
{
"epoch": 2.3263546798029555,
"grad_norm": 12.028503330268482,
"learning_rate": 1.4644660940672628e-06,
"loss": 0.3541644215583801,
"step": 1889
},
{
"epoch": 2.3275862068965516,
"grad_norm": 6.910684312350736,
"learning_rate": 1.4594025703112397e-06,
"loss": 0.3495083749294281,
"step": 1890
},
{
"epoch": 2.3288177339901477,
"grad_norm": 11.582636749838006,
"learning_rate": 1.4543463192459728e-06,
"loss": 0.9918674826622009,
"step": 1891
},
{
"epoch": 2.3300492610837438,
"grad_norm": 12.929277927199294,
"learning_rate": 1.4492973512574348e-06,
"loss": 0.9601753950119019,
"step": 1892
},
{
"epoch": 2.33128078817734,
"grad_norm": 8.289898772410082,
"learning_rate": 1.4442556767166371e-06,
"loss": 0.48341238498687744,
"step": 1893
},
{
"epoch": 2.332512315270936,
"grad_norm": 11.044218498303557,
"learning_rate": 1.4392213059796133e-06,
"loss": 0.38372108340263367,
"step": 1894
},
{
"epoch": 2.333743842364532,
"grad_norm": 17.672025418443823,
"learning_rate": 1.4341942493873934e-06,
"loss": 0.45662760734558105,
"step": 1895
},
{
"epoch": 2.334975369458128,
"grad_norm": 8.57989944923008,
"learning_rate": 1.4291745172659804e-06,
"loss": 0.6601132154464722,
"step": 1896
},
{
"epoch": 2.336206896551724,
"grad_norm": 10.831792328536467,
"learning_rate": 1.4241621199263362e-06,
"loss": 0.7569577097892761,
"step": 1897
},
{
"epoch": 2.3374384236453203,
"grad_norm": 14.76295283801852,
"learning_rate": 1.4191570676643573e-06,
"loss": 0.7162508964538574,
"step": 1898
},
{
"epoch": 2.3386699507389164,
"grad_norm": 16.808898262444146,
"learning_rate": 1.4141593707608441e-06,
"loss": 0.6121374368667603,
"step": 1899
},
{
"epoch": 2.3399014778325125,
"grad_norm": 14.404980275639364,
"learning_rate": 1.4091690394814989e-06,
"loss": 0.550343930721283,
"step": 1900
},
{
"epoch": 2.3411330049261085,
"grad_norm": 13.189507504332187,
"learning_rate": 1.40418608407689e-06,
"loss": 0.644547700881958,
"step": 1901
},
{
"epoch": 2.3423645320197046,
"grad_norm": 10.144794457121083,
"learning_rate": 1.3992105147824326e-06,
"loss": 0.463761568069458,
"step": 1902
},
{
"epoch": 2.3435960591133007,
"grad_norm": 9.21109140090456,
"learning_rate": 1.3942423418183764e-06,
"loss": 0.5593357682228088,
"step": 1903
},
{
"epoch": 2.344827586206897,
"grad_norm": 12.967643967580644,
"learning_rate": 1.3892815753897708e-06,
"loss": 0.5090635418891907,
"step": 1904
},
{
"epoch": 2.3460591133004924,
"grad_norm": 13.46983908302652,
"learning_rate": 1.3843282256864599e-06,
"loss": 0.4595394432544708,
"step": 1905
},
{
"epoch": 2.3472906403940885,
"grad_norm": 11.392389994781835,
"learning_rate": 1.379382302883044e-06,
"loss": 0.8381729125976562,
"step": 1906
},
{
"epoch": 2.3485221674876846,
"grad_norm": 8.85214424769499,
"learning_rate": 1.3744438171388752e-06,
"loss": 0.37937110662460327,
"step": 1907
},
{
"epoch": 2.3497536945812807,
"grad_norm": 17.78975528440709,
"learning_rate": 1.3695127785980279e-06,
"loss": 0.4255325496196747,
"step": 1908
},
{
"epoch": 2.350985221674877,
"grad_norm": 11.69369455239838,
"learning_rate": 1.3645891973892772e-06,
"loss": 1.1354942321777344,
"step": 1909
},
{
"epoch": 2.352216748768473,
"grad_norm": 7.241901848192273,
"learning_rate": 1.359673083626079e-06,
"loss": 0.30018460750579834,
"step": 1910
},
{
"epoch": 2.353448275862069,
"grad_norm": 10.130306855965305,
"learning_rate": 1.3547644474065557e-06,
"loss": 0.22174029052257538,
"step": 1911
},
{
"epoch": 2.354679802955665,
"grad_norm": 10.818242567623516,
"learning_rate": 1.349863298813464e-06,
"loss": 0.27310076355934143,
"step": 1912
},
{
"epoch": 2.355911330049261,
"grad_norm": 13.041781733429923,
"learning_rate": 1.3449696479141855e-06,
"loss": 0.39454638957977295,
"step": 1913
},
{
"epoch": 2.357142857142857,
"grad_norm": 10.18283763523278,
"learning_rate": 1.3400835047606997e-06,
"loss": 0.39921119809150696,
"step": 1914
},
{
"epoch": 2.3583743842364533,
"grad_norm": 10.365856020003331,
"learning_rate": 1.3352048793895623e-06,
"loss": 0.45110660791397095,
"step": 1915
},
{
"epoch": 2.3596059113300494,
"grad_norm": 8.256618178243365,
"learning_rate": 1.330333781821887e-06,
"loss": 0.5453286170959473,
"step": 1916
},
{
"epoch": 2.3608374384236455,
"grad_norm": 7.676268533106476,
"learning_rate": 1.325470222063327e-06,
"loss": 0.21928450465202332,
"step": 1917
},
{
"epoch": 2.3620689655172415,
"grad_norm": 11.703145589738702,
"learning_rate": 1.3206142101040525e-06,
"loss": 0.8491370677947998,
"step": 1918
},
{
"epoch": 2.363300492610837,
"grad_norm": 11.375579827407606,
"learning_rate": 1.3157657559187264e-06,
"loss": 0.5052551031112671,
"step": 1919
},
{
"epoch": 2.3645320197044333,
"grad_norm": 14.124196950433179,
"learning_rate": 1.3109248694664917e-06,
"loss": 1.0034559965133667,
"step": 1920
},
{
"epoch": 2.3657635467980294,
"grad_norm": 16.92878880493155,
"learning_rate": 1.3060915606909413e-06,
"loss": 0.3685661554336548,
"step": 1921
},
{
"epoch": 2.3669950738916254,
"grad_norm": 9.744666272771802,
"learning_rate": 1.301265839520109e-06,
"loss": 0.33304983377456665,
"step": 1922
},
{
"epoch": 2.3682266009852215,
"grad_norm": 9.861413232471296,
"learning_rate": 1.2964477158664367e-06,
"loss": 1.3396000862121582,
"step": 1923
},
{
"epoch": 2.3694581280788176,
"grad_norm": 13.403135613317723,
"learning_rate": 1.2916371996267656e-06,
"loss": 0.3852962851524353,
"step": 1924
},
{
"epoch": 2.3706896551724137,
"grad_norm": 12.989833739172669,
"learning_rate": 1.2868343006823113e-06,
"loss": 0.5070800185203552,
"step": 1925
},
{
"epoch": 2.37192118226601,
"grad_norm": 10.592089371352348,
"learning_rate": 1.2820390288986345e-06,
"loss": 0.1917571723461151,
"step": 1926
},
{
"epoch": 2.373152709359606,
"grad_norm": 6.248268258840329,
"learning_rate": 1.2772513941256371e-06,
"loss": 0.19884659349918365,
"step": 1927
},
{
"epoch": 2.374384236453202,
"grad_norm": 13.319990126266617,
"learning_rate": 1.2724714061975335e-06,
"loss": 0.27710244059562683,
"step": 1928
},
{
"epoch": 2.375615763546798,
"grad_norm": 12.638294589181001,
"learning_rate": 1.2676990749328255e-06,
"loss": 0.7216998338699341,
"step": 1929
},
{
"epoch": 2.376847290640394,
"grad_norm": 7.68797287512978,
"learning_rate": 1.262934410134292e-06,
"loss": 0.35512983798980713,
"step": 1930
},
{
"epoch": 2.37807881773399,
"grad_norm": 7.682504760826181,
"learning_rate": 1.2581774215889653e-06,
"loss": 0.21548208594322205,
"step": 1931
},
{
"epoch": 2.3793103448275863,
"grad_norm": 10.576319148708158,
"learning_rate": 1.2534281190681059e-06,
"loss": 0.7191505432128906,
"step": 1932
},
{
"epoch": 2.3805418719211824,
"grad_norm": 28.03273248427961,
"learning_rate": 1.2486865123271868e-06,
"loss": 0.5658040046691895,
"step": 1933
},
{
"epoch": 2.3817733990147785,
"grad_norm": 7.429440108605395,
"learning_rate": 1.243952611105877e-06,
"loss": 0.42820805311203003,
"step": 1934
},
{
"epoch": 2.3830049261083746,
"grad_norm": 8.913271204535084,
"learning_rate": 1.2392264251280167e-06,
"loss": 0.3223640024662018,
"step": 1935
},
{
"epoch": 2.3842364532019706,
"grad_norm": 16.39061337542185,
"learning_rate": 1.2345079641015955e-06,
"loss": 0.5262437462806702,
"step": 1936
},
{
"epoch": 2.3854679802955667,
"grad_norm": 12.040132799234067,
"learning_rate": 1.2297972377187361e-06,
"loss": 0.32022416591644287,
"step": 1937
},
{
"epoch": 2.386699507389163,
"grad_norm": 10.197992684406291,
"learning_rate": 1.2250942556556754e-06,
"loss": 0.76932692527771,
"step": 1938
},
{
"epoch": 2.3879310344827585,
"grad_norm": 9.459909563147203,
"learning_rate": 1.2203990275727435e-06,
"loss": 0.23026564717292786,
"step": 1939
},
{
"epoch": 2.3891625615763545,
"grad_norm": 11.035875303455253,
"learning_rate": 1.2157115631143384e-06,
"loss": 0.4533492624759674,
"step": 1940
},
{
"epoch": 2.3903940886699506,
"grad_norm": 10.823301129205994,
"learning_rate": 1.211031871908916e-06,
"loss": 0.6235211491584778,
"step": 1941
},
{
"epoch": 2.3916256157635467,
"grad_norm": 9.073613663519735,
"learning_rate": 1.206359963568966e-06,
"loss": 0.2519042193889618,
"step": 1942
},
{
"epoch": 2.392857142857143,
"grad_norm": 9.128265200465231,
"learning_rate": 1.201695847690983e-06,
"loss": 0.3229137659072876,
"step": 1943
},
{
"epoch": 2.394088669950739,
"grad_norm": 11.336508477709275,
"learning_rate": 1.1970395338554642e-06,
"loss": 0.19324302673339844,
"step": 1944
},
{
"epoch": 2.395320197044335,
"grad_norm": 11.07861313896692,
"learning_rate": 1.1923910316268783e-06,
"loss": 0.6342459917068481,
"step": 1945
},
{
"epoch": 2.396551724137931,
"grad_norm": 11.018070634448504,
"learning_rate": 1.1877503505536453e-06,
"loss": 0.3010944724082947,
"step": 1946
},
{
"epoch": 2.397783251231527,
"grad_norm": 8.241609243061369,
"learning_rate": 1.183117500168125e-06,
"loss": 0.40499716997146606,
"step": 1947
},
{
"epoch": 2.399014778325123,
"grad_norm": 18.259844198245478,
"learning_rate": 1.1784924899865856e-06,
"loss": 0.9692997336387634,
"step": 1948
},
{
"epoch": 2.4002463054187193,
"grad_norm": 15.459619863404178,
"learning_rate": 1.1738753295091986e-06,
"loss": 0.3848229646682739,
"step": 1949
},
{
"epoch": 2.4014778325123154,
"grad_norm": 10.437656103417114,
"learning_rate": 1.169266028220004e-06,
"loss": 0.4472384750843048,
"step": 1950
},
{
"epoch": 2.4027093596059115,
"grad_norm": 8.14141154883163,
"learning_rate": 1.164664595586904e-06,
"loss": 0.21374854445457458,
"step": 1951
},
{
"epoch": 2.4039408866995076,
"grad_norm": 9.895182845073167,
"learning_rate": 1.1600710410616367e-06,
"loss": 0.4789981544017792,
"step": 1952
},
{
"epoch": 2.405172413793103,
"grad_norm": 14.330046153248214,
"learning_rate": 1.1554853740797556e-06,
"loss": 0.6235543489456177,
"step": 1953
},
{
"epoch": 2.4064039408866993,
"grad_norm": 11.28922905122106,
"learning_rate": 1.1509076040606127e-06,
"loss": 0.42575669288635254,
"step": 1954
},
{
"epoch": 2.4076354679802954,
"grad_norm": 10.213241448714898,
"learning_rate": 1.1463377404073433e-06,
"loss": 0.22154280543327332,
"step": 1955
},
{
"epoch": 2.4088669950738915,
"grad_norm": 9.867650979911392,
"learning_rate": 1.1417757925068362e-06,
"loss": 0.5722556114196777,
"step": 1956
},
{
"epoch": 2.4100985221674875,
"grad_norm": 7.554394124376038,
"learning_rate": 1.137221769729725e-06,
"loss": 0.6502832174301147,
"step": 1957
},
{
"epoch": 2.4113300492610836,
"grad_norm": 13.191804943156788,
"learning_rate": 1.132675681430364e-06,
"loss": 0.41717976331710815,
"step": 1958
},
{
"epoch": 2.4125615763546797,
"grad_norm": 12.040721504656855,
"learning_rate": 1.1281375369468078e-06,
"loss": 0.3705020248889923,
"step": 1959
},
{
"epoch": 2.413793103448276,
"grad_norm": 19.08924876929562,
"learning_rate": 1.1236073456007928e-06,
"loss": 0.8128242492675781,
"step": 1960
},
{
"epoch": 2.415024630541872,
"grad_norm": 16.296662141524465,
"learning_rate": 1.1190851166977218e-06,
"loss": 0.7350403070449829,
"step": 1961
},
{
"epoch": 2.416256157635468,
"grad_norm": 7.0582572680809195,
"learning_rate": 1.1145708595266418e-06,
"loss": 0.5837904214859009,
"step": 1962
},
{
"epoch": 2.417487684729064,
"grad_norm": 8.875645426047061,
"learning_rate": 1.1100645833602231e-06,
"loss": 0.436983585357666,
"step": 1963
},
{
"epoch": 2.41871921182266,
"grad_norm": 9.396076477777111,
"learning_rate": 1.105566297454742e-06,
"loss": 0.4708068370819092,
"step": 1964
},
{
"epoch": 2.4199507389162562,
"grad_norm": 12.540961285951255,
"learning_rate": 1.1010760110500652e-06,
"loss": 0.37972012162208557,
"step": 1965
},
{
"epoch": 2.4211822660098523,
"grad_norm": 9.511768233063343,
"learning_rate": 1.0965937333696264e-06,
"loss": 0.3167269229888916,
"step": 1966
},
{
"epoch": 2.4224137931034484,
"grad_norm": 8.997618711574894,
"learning_rate": 1.0921194736204066e-06,
"loss": 0.3407049775123596,
"step": 1967
},
{
"epoch": 2.4236453201970445,
"grad_norm": 26.50748327469745,
"learning_rate": 1.0876532409929208e-06,
"loss": 0.7673642635345459,
"step": 1968
},
{
"epoch": 2.4248768472906406,
"grad_norm": 7.428296790887836,
"learning_rate": 1.083195044661195e-06,
"loss": 0.3029213845729828,
"step": 1969
},
{
"epoch": 2.4261083743842367,
"grad_norm": 16.297521234369484,
"learning_rate": 1.0787448937827428e-06,
"loss": 0.5143488049507141,
"step": 1970
},
{
"epoch": 2.4273399014778327,
"grad_norm": 9.838022492363262,
"learning_rate": 1.0743027974985576e-06,
"loss": 0.5086369514465332,
"step": 1971
},
{
"epoch": 2.4285714285714284,
"grad_norm": 11.760234490761677,
"learning_rate": 1.069868764933088e-06,
"loss": 0.7999781966209412,
"step": 1972
},
{
"epoch": 2.4298029556650245,
"grad_norm": 8.348930224912683,
"learning_rate": 1.065442805194214e-06,
"loss": 0.2686223089694977,
"step": 1973
},
{
"epoch": 2.4310344827586206,
"grad_norm": 10.189321214439989,
"learning_rate": 1.0610249273732393e-06,
"loss": 0.2520446181297302,
"step": 1974
},
{
"epoch": 2.4322660098522166,
"grad_norm": 11.006280468973555,
"learning_rate": 1.056615140544861e-06,
"loss": 0.28887757658958435,
"step": 1975
},
{
"epoch": 2.4334975369458127,
"grad_norm": 17.908792965669562,
"learning_rate": 1.0522134537671625e-06,
"loss": 0.3709273338317871,
"step": 1976
},
{
"epoch": 2.434729064039409,
"grad_norm": 8.261377574040777,
"learning_rate": 1.0478198760815833e-06,
"loss": 0.6718100309371948,
"step": 1977
},
{
"epoch": 2.435960591133005,
"grad_norm": 8.787835782948932,
"learning_rate": 1.0434344165129095e-06,
"loss": 0.17143529653549194,
"step": 1978
},
{
"epoch": 2.437192118226601,
"grad_norm": 15.115289039167425,
"learning_rate": 1.0390570840692527e-06,
"loss": 0.7128796577453613,
"step": 1979
},
{
"epoch": 2.438423645320197,
"grad_norm": 13.46718512167487,
"learning_rate": 1.034687887742028e-06,
"loss": 0.24575555324554443,
"step": 1980
},
{
"epoch": 2.439655172413793,
"grad_norm": 15.637303471440513,
"learning_rate": 1.0303268365059383e-06,
"loss": 0.5631250739097595,
"step": 1981
},
{
"epoch": 2.4408866995073892,
"grad_norm": 10.921107789227744,
"learning_rate": 1.0259739393189573e-06,
"loss": 0.3094029128551483,
"step": 1982
},
{
"epoch": 2.4421182266009853,
"grad_norm": 9.876371637108129,
"learning_rate": 1.021629205122311e-06,
"loss": 0.4754146635532379,
"step": 1983
},
{
"epoch": 2.4433497536945814,
"grad_norm": 11.197843935010443,
"learning_rate": 1.0172926428404527e-06,
"loss": 0.18599992990493774,
"step": 1984
},
{
"epoch": 2.4445812807881775,
"grad_norm": 11.60242134696919,
"learning_rate": 1.0129642613810576e-06,
"loss": 0.3831806480884552,
"step": 1985
},
{
"epoch": 2.4458128078817736,
"grad_norm": 10.915359357263476,
"learning_rate": 1.008644069634989e-06,
"loss": 0.7717353105545044,
"step": 1986
},
{
"epoch": 2.447044334975369,
"grad_norm": 16.40151326361354,
"learning_rate": 1.0043320764762915e-06,
"loss": 0.3248934745788574,
"step": 1987
},
{
"epoch": 2.4482758620689653,
"grad_norm": 7.869645643343828,
"learning_rate": 1.0000282907621694e-06,
"loss": 0.27836111187934875,
"step": 1988
},
{
"epoch": 2.4495073891625614,
"grad_norm": 10.609052698858209,
"learning_rate": 9.957327213329687e-07,
"loss": 0.20251630246639252,
"step": 1989
},
{
"epoch": 2.4507389162561575,
"grad_norm": 15.802681481740834,
"learning_rate": 9.914453770121557e-07,
"loss": 0.6009274125099182,
"step": 1990
},
{
"epoch": 2.4519704433497536,
"grad_norm": 12.5975867275524,
"learning_rate": 9.871662666063054e-07,
"loss": 0.3312684893608093,
"step": 1991
},
{
"epoch": 2.4532019704433496,
"grad_norm": 11.710094793009787,
"learning_rate": 9.828953989050744e-07,
"loss": 0.38521629571914673,
"step": 1992
},
{
"epoch": 2.4544334975369457,
"grad_norm": 7.249324950790913,
"learning_rate": 9.786327826811942e-07,
"loss": 0.2508774995803833,
"step": 1993
},
{
"epoch": 2.455665024630542,
"grad_norm": 9.220463260574913,
"learning_rate": 9.743784266904422e-07,
"loss": 0.36097291111946106,
"step": 1994
},
{
"epoch": 2.456896551724138,
"grad_norm": 22.22398053360695,
"learning_rate": 9.701323396716312e-07,
"loss": 0.6703237295150757,
"step": 1995
},
{
"epoch": 2.458128078817734,
"grad_norm": 10.185390156514575,
"learning_rate": 9.6589453034659e-07,
"loss": 0.9553302526473999,
"step": 1996
},
{
"epoch": 2.45935960591133,
"grad_norm": 10.103225854124274,
"learning_rate": 9.616650074201383e-07,
"loss": 0.3288821578025818,
"step": 1997
},
{
"epoch": 2.460591133004926,
"grad_norm": 9.00369401838797,
"learning_rate": 9.574437795800806e-07,
"loss": 0.3195754885673523,
"step": 1998
},
{
"epoch": 2.4618226600985222,
"grad_norm": 15.805795563779297,
"learning_rate": 9.532308554971831e-07,
"loss": 0.26505401730537415,
"step": 1999
},
{
"epoch": 2.4630541871921183,
"grad_norm": 11.25947467258853,
"learning_rate": 9.490262438251496e-07,
"loss": 0.43558627367019653,
"step": 2000
},
{
"epoch": 2.4642857142857144,
"grad_norm": 10.457734518302678,
"learning_rate": 9.44829953200615e-07,
"loss": 0.3582439720630646,
"step": 2001
},
{
"epoch": 2.4655172413793105,
"grad_norm": 12.231152863168465,
"learning_rate": 9.406419922431214e-07,
"loss": 0.7142423987388611,
"step": 2002
},
{
"epoch": 2.4667487684729066,
"grad_norm": 12.479544686562418,
"learning_rate": 9.364623695550979e-07,
"loss": 0.24947094917297363,
"step": 2003
},
{
"epoch": 2.4679802955665027,
"grad_norm": 16.323337348543824,
"learning_rate": 9.322910937218471e-07,
"loss": 1.0376765727996826,
"step": 2004
},
{
"epoch": 2.4692118226600988,
"grad_norm": 12.025786233159009,
"learning_rate": 9.281281733115288e-07,
"loss": 0.39291733503341675,
"step": 2005
},
{
"epoch": 2.4704433497536944,
"grad_norm": 15.526509163555014,
"learning_rate": 9.239736168751395e-07,
"loss": 1.1038362979888916,
"step": 2006
},
{
"epoch": 2.4716748768472905,
"grad_norm": 10.027251067087649,
"learning_rate": 9.198274329464929e-07,
"loss": 0.8542830944061279,
"step": 2007
},
{
"epoch": 2.4729064039408866,
"grad_norm": 20.306111450694207,
"learning_rate": 9.156896300422053e-07,
"loss": 0.807994544506073,
"step": 2008
},
{
"epoch": 2.4741379310344827,
"grad_norm": 5.653479787843331,
"learning_rate": 9.115602166616805e-07,
"loss": 0.17016081511974335,
"step": 2009
},
{
"epoch": 2.4753694581280787,
"grad_norm": 11.492766886926658,
"learning_rate": 9.07439201287088e-07,
"loss": 0.7831156849861145,
"step": 2010
},
{
"epoch": 2.476600985221675,
"grad_norm": 9.3732349373237,
"learning_rate": 9.033265923833446e-07,
"loss": 0.5146660804748535,
"step": 2011
},
{
"epoch": 2.477832512315271,
"grad_norm": 13.78559435557381,
"learning_rate": 8.992223983981035e-07,
"loss": 0.5641926527023315,
"step": 2012
},
{
"epoch": 2.479064039408867,
"grad_norm": 7.867545716232377,
"learning_rate": 8.951266277617326e-07,
"loss": 0.2155514359474182,
"step": 2013
},
{
"epoch": 2.480295566502463,
"grad_norm": 11.172087233714553,
"learning_rate": 8.91039288887292e-07,
"loss": 0.28125351667404175,
"step": 2014
},
{
"epoch": 2.481527093596059,
"grad_norm": 10.827596711387834,
"learning_rate": 8.869603901705287e-07,
"loss": 0.5349509716033936,
"step": 2015
},
{
"epoch": 2.4827586206896552,
"grad_norm": 10.652684351436065,
"learning_rate": 8.82889939989851e-07,
"loss": 0.43747422099113464,
"step": 2016
},
{
"epoch": 2.4839901477832513,
"grad_norm": 8.656359342370678,
"learning_rate": 8.78827946706311e-07,
"loss": 0.4629102647304535,
"step": 2017
},
{
"epoch": 2.4852216748768474,
"grad_norm": 9.302169561481923,
"learning_rate": 8.747744186635932e-07,
"loss": 0.41271477937698364,
"step": 2018
},
{
"epoch": 2.4864532019704435,
"grad_norm": 7.585718354318216,
"learning_rate": 8.707293641879888e-07,
"loss": 0.27247580885887146,
"step": 2019
},
{
"epoch": 2.4876847290640396,
"grad_norm": 11.7662978456361,
"learning_rate": 8.666927915883905e-07,
"loss": 1.4255273342132568,
"step": 2020
},
{
"epoch": 2.4889162561576352,
"grad_norm": 12.62783666106837,
"learning_rate": 8.626647091562612e-07,
"loss": 0.8762021660804749,
"step": 2021
},
{
"epoch": 2.4901477832512313,
"grad_norm": 7.781392053224673,
"learning_rate": 8.586451251656286e-07,
"loss": 0.43475109338760376,
"step": 2022
},
{
"epoch": 2.4913793103448274,
"grad_norm": 8.647004326334777,
"learning_rate": 8.546340478730647e-07,
"loss": 0.16091346740722656,
"step": 2023
},
{
"epoch": 2.4926108374384235,
"grad_norm": 10.050856051691818,
"learning_rate": 8.506314855176651e-07,
"loss": 0.491144061088562,
"step": 2024
},
{
"epoch": 2.4938423645320196,
"grad_norm": 15.049291696206959,
"learning_rate": 8.466374463210348e-07,
"loss": 0.792976438999176,
"step": 2025
},
{
"epoch": 2.4950738916256157,
"grad_norm": 13.192276803646186,
"learning_rate": 8.426519384872733e-07,
"loss": 0.8023815155029297,
"step": 2026
},
{
"epoch": 2.4963054187192117,
"grad_norm": 10.183319190154988,
"learning_rate": 8.386749702029578e-07,
"loss": 0.7008549571037292,
"step": 2027
},
{
"epoch": 2.497536945812808,
"grad_norm": 9.306826775675583,
"learning_rate": 8.347065496371193e-07,
"loss": 0.3158326745033264,
"step": 2028
},
{
"epoch": 2.498768472906404,
"grad_norm": 11.439845656368037,
"learning_rate": 8.307466849412365e-07,
"loss": 0.4847475588321686,
"step": 2029
},
{
"epoch": 2.5,
"grad_norm": 8.392845077442193,
"learning_rate": 8.2679538424921e-07,
"loss": 0.42490729689598083,
"step": 2030
},
{
"epoch": 2.501231527093596,
"grad_norm": 8.86668163556195,
"learning_rate": 8.228526556773486e-07,
"loss": 0.4303053021430969,
"step": 2031
},
{
"epoch": 2.502463054187192,
"grad_norm": 9.647239720582808,
"learning_rate": 8.18918507324356e-07,
"loss": 0.20669305324554443,
"step": 2032
},
{
"epoch": 2.5036945812807883,
"grad_norm": 14.868819185388821,
"learning_rate": 8.149929472713126e-07,
"loss": 0.4146193265914917,
"step": 2033
},
{
"epoch": 2.5049261083743843,
"grad_norm": 8.521845217294674,
"learning_rate": 8.110759835816518e-07,
"loss": 0.2852465510368347,
"step": 2034
},
{
"epoch": 2.5061576354679804,
"grad_norm": 9.65764576867383,
"learning_rate": 8.071676243011556e-07,
"loss": 0.5811144113540649,
"step": 2035
},
{
"epoch": 2.5073891625615765,
"grad_norm": 13.619550034189677,
"learning_rate": 8.032678774579272e-07,
"loss": 0.6767745614051819,
"step": 2036
},
{
"epoch": 2.5086206896551726,
"grad_norm": 10.986185907881213,
"learning_rate": 7.993767510623834e-07,
"loss": 0.5063849687576294,
"step": 2037
},
{
"epoch": 2.5098522167487687,
"grad_norm": 11.539593137413142,
"learning_rate": 7.954942531072285e-07,
"loss": 0.534786581993103,
"step": 2038
},
{
"epoch": 2.5110837438423648,
"grad_norm": 12.505177711554532,
"learning_rate": 7.91620391567448e-07,
"loss": 0.45122361183166504,
"step": 2039
},
{
"epoch": 2.512315270935961,
"grad_norm": 8.839741542848381,
"learning_rate": 7.877551744002881e-07,
"loss": 0.2832280099391937,
"step": 2040
},
{
"epoch": 2.5135467980295565,
"grad_norm": 11.718433441522615,
"learning_rate": 7.838986095452311e-07,
"loss": 0.8926963806152344,
"step": 2041
},
{
"epoch": 2.5147783251231526,
"grad_norm": 9.73145152883671,
"learning_rate": 7.800507049239947e-07,
"loss": 0.9263632893562317,
"step": 2042
},
{
"epoch": 2.5160098522167487,
"grad_norm": 16.48224794173804,
"learning_rate": 7.762114684405064e-07,
"loss": 0.3994196653366089,
"step": 2043
},
{
"epoch": 2.5172413793103448,
"grad_norm": 10.084446546675132,
"learning_rate": 7.723809079808842e-07,
"loss": 0.3273079991340637,
"step": 2044
},
{
"epoch": 2.518472906403941,
"grad_norm": 19.899209678081235,
"learning_rate": 7.685590314134294e-07,
"loss": 0.4566258192062378,
"step": 2045
},
{
"epoch": 2.519704433497537,
"grad_norm": 16.13317422246351,
"learning_rate": 7.647458465886055e-07,
"loss": 0.4199177026748657,
"step": 2046
},
{
"epoch": 2.520935960591133,
"grad_norm": 7.584665550484686,
"learning_rate": 7.609413613390199e-07,
"loss": 0.2789694666862488,
"step": 2047
},
{
"epoch": 2.522167487684729,
"grad_norm": 12.08003380462593,
"learning_rate": 7.571455834794095e-07,
"loss": 0.39359426498413086,
"step": 2048
},
{
"epoch": 2.523399014778325,
"grad_norm": 16.766513036441403,
"learning_rate": 7.533585208066302e-07,
"loss": 0.38510677218437195,
"step": 2049
},
{
"epoch": 2.5246305418719213,
"grad_norm": 14.332573036568608,
"learning_rate": 7.495801810996334e-07,
"loss": 1.0861276388168335,
"step": 2050
},
{
"epoch": 2.5258620689655173,
"grad_norm": 13.180696978229305,
"learning_rate": 7.458105721194525e-07,
"loss": 0.35866010189056396,
"step": 2051
},
{
"epoch": 2.5270935960591134,
"grad_norm": 8.80983116890946,
"learning_rate": 7.420497016091866e-07,
"loss": 0.3436219394207001,
"step": 2052
},
{
"epoch": 2.5283251231527095,
"grad_norm": 12.383092324048317,
"learning_rate": 7.382975772939866e-07,
"loss": 0.3687105178833008,
"step": 2053
},
{
"epoch": 2.529556650246305,
"grad_norm": 8.240739854437226,
"learning_rate": 7.34554206881039e-07,
"loss": 0.32671070098876953,
"step": 2054
},
{
"epoch": 2.5307881773399012,
"grad_norm": 11.575392957436732,
"learning_rate": 7.308195980595462e-07,
"loss": 0.7302184104919434,
"step": 2055
},
{
"epoch": 2.5320197044334973,
"grad_norm": 13.7288446044892,
"learning_rate": 7.270937585007149e-07,
"loss": 0.7430564761161804,
"step": 2056
},
{
"epoch": 2.5332512315270934,
"grad_norm": 8.666358783874388,
"learning_rate": 7.233766958577421e-07,
"loss": 0.305151104927063,
"step": 2057
},
{
"epoch": 2.5344827586206895,
"grad_norm": 17.881705697560324,
"learning_rate": 7.196684177657887e-07,
"loss": 0.4311235547065735,
"step": 2058
},
{
"epoch": 2.5357142857142856,
"grad_norm": 13.989195036115625,
"learning_rate": 7.159689318419777e-07,
"loss": 0.29697108268737793,
"step": 2059
},
{
"epoch": 2.5369458128078817,
"grad_norm": 10.004375359602093,
"learning_rate": 7.122782456853722e-07,
"loss": 0.5012999176979065,
"step": 2060
},
{
"epoch": 2.5381773399014778,
"grad_norm": 10.441122865704237,
"learning_rate": 7.085963668769552e-07,
"loss": 0.24754227697849274,
"step": 2061
},
{
"epoch": 2.539408866995074,
"grad_norm": 7.415294238465162,
"learning_rate": 7.049233029796243e-07,
"loss": 0.1311894953250885,
"step": 2062
},
{
"epoch": 2.54064039408867,
"grad_norm": 11.745936375906483,
"learning_rate": 7.012590615381654e-07,
"loss": 0.3458009958267212,
"step": 2063
},
{
"epoch": 2.541871921182266,
"grad_norm": 19.579629082198277,
"learning_rate": 6.976036500792466e-07,
"loss": 0.6216360330581665,
"step": 2064
},
{
"epoch": 2.543103448275862,
"grad_norm": 17.511409594621433,
"learning_rate": 6.939570761113939e-07,
"loss": 0.41114604473114014,
"step": 2065
},
{
"epoch": 2.544334975369458,
"grad_norm": 12.769592062525021,
"learning_rate": 6.903193471249853e-07,
"loss": 0.35362619161605835,
"step": 2066
},
{
"epoch": 2.5455665024630543,
"grad_norm": 15.37068507816602,
"learning_rate": 6.866904705922284e-07,
"loss": 1.7280857563018799,
"step": 2067
},
{
"epoch": 2.5467980295566504,
"grad_norm": 12.864848425460373,
"learning_rate": 6.830704539671462e-07,
"loss": 1.3645777702331543,
"step": 2068
},
{
"epoch": 2.5480295566502464,
"grad_norm": 8.663375537691056,
"learning_rate": 6.794593046855613e-07,
"loss": 0.46488872170448303,
"step": 2069
},
{
"epoch": 2.5492610837438425,
"grad_norm": 11.746641376676559,
"learning_rate": 6.758570301650869e-07,
"loss": 0.9913250803947449,
"step": 2070
},
{
"epoch": 2.5504926108374386,
"grad_norm": 14.714182423444447,
"learning_rate": 6.722636378051011e-07,
"loss": 0.8180273771286011,
"step": 2071
},
{
"epoch": 2.5517241379310347,
"grad_norm": 7.848050259431333,
"learning_rate": 6.686791349867422e-07,
"loss": 0.5234679579734802,
"step": 2072
},
{
"epoch": 2.552955665024631,
"grad_norm": 6.903410737354236,
"learning_rate": 6.651035290728858e-07,
"loss": 0.08975313603878021,
"step": 2073
},
{
"epoch": 2.554187192118227,
"grad_norm": 11.27527783341364,
"learning_rate": 6.615368274081335e-07,
"loss": 0.35545456409454346,
"step": 2074
},
{
"epoch": 2.5554187192118225,
"grad_norm": 11.726857926860664,
"learning_rate": 6.579790373187944e-07,
"loss": 1.192006230354309,
"step": 2075
},
{
"epoch": 2.5566502463054186,
"grad_norm": 18.37387229568444,
"learning_rate": 6.54430166112876e-07,
"loss": 0.35069915652275085,
"step": 2076
},
{
"epoch": 2.5578817733990147,
"grad_norm": 9.620718531681447,
"learning_rate": 6.508902210800649e-07,
"loss": 0.20691820979118347,
"step": 2077
},
{
"epoch": 2.5591133004926108,
"grad_norm": 16.343394062782135,
"learning_rate": 6.473592094917092e-07,
"loss": 0.4561042785644531,
"step": 2078
},
{
"epoch": 2.560344827586207,
"grad_norm": 11.889860706895831,
"learning_rate": 6.43837138600813e-07,
"loss": 0.32198822498321533,
"step": 2079
},
{
"epoch": 2.561576354679803,
"grad_norm": 10.519181625251578,
"learning_rate": 6.403240156420087e-07,
"loss": 0.35681653022766113,
"step": 2080
},
{
"epoch": 2.562807881773399,
"grad_norm": 9.426944191114051,
"learning_rate": 6.36819847831554e-07,
"loss": 0.5826268196105957,
"step": 2081
},
{
"epoch": 2.564039408866995,
"grad_norm": 10.18400417142911,
"learning_rate": 6.333246423673096e-07,
"loss": 0.23084279894828796,
"step": 2082
},
{
"epoch": 2.565270935960591,
"grad_norm": 8.146966381833735,
"learning_rate": 6.298384064287261e-07,
"loss": 0.5527750253677368,
"step": 2083
},
{
"epoch": 2.5665024630541873,
"grad_norm": 7.581778739386861,
"learning_rate": 6.263611471768349e-07,
"loss": 0.4125085175037384,
"step": 2084
},
{
"epoch": 2.5677339901477834,
"grad_norm": 9.31385960486644,
"learning_rate": 6.228928717542205e-07,
"loss": 0.37431174516677856,
"step": 2085
},
{
"epoch": 2.5689655172413794,
"grad_norm": 9.72676402112677,
"learning_rate": 6.194335872850188e-07,
"loss": 0.17119471728801727,
"step": 2086
},
{
"epoch": 2.5701970443349755,
"grad_norm": 11.790310632986847,
"learning_rate": 6.159833008748988e-07,
"loss": 0.9465748071670532,
"step": 2087
},
{
"epoch": 2.571428571428571,
"grad_norm": 25.018614409312026,
"learning_rate": 6.125420196110426e-07,
"loss": 0.48980847001075745,
"step": 2088
},
{
"epoch": 2.5726600985221673,
"grad_norm": 8.85280166601153,
"learning_rate": 6.091097505621374e-07,
"loss": 0.7195557951927185,
"step": 2089
},
{
"epoch": 2.5738916256157633,
"grad_norm": 12.112085029881426,
"learning_rate": 6.056865007783602e-07,
"loss": 1.83125638961792,
"step": 2090
},
{
"epoch": 2.5751231527093594,
"grad_norm": 9.94028667902401,
"learning_rate": 6.022722772913581e-07,
"loss": 0.3298517167568207,
"step": 2091
},
{
"epoch": 2.5763546798029555,
"grad_norm": 11.18503180129702,
"learning_rate": 5.988670871142377e-07,
"loss": 0.47125905752182007,
"step": 2092
},
{
"epoch": 2.5775862068965516,
"grad_norm": 9.413844300619951,
"learning_rate": 5.954709372415524e-07,
"loss": 0.288496196269989,
"step": 2093
},
{
"epoch": 2.5788177339901477,
"grad_norm": 7.1811144983138675,
"learning_rate": 5.920838346492874e-07,
"loss": 0.3627285957336426,
"step": 2094
},
{
"epoch": 2.5800492610837438,
"grad_norm": 14.830294096591077,
"learning_rate": 5.887057862948403e-07,
"loss": 0.7072806358337402,
"step": 2095
},
{
"epoch": 2.58128078817734,
"grad_norm": 10.644924386002677,
"learning_rate": 5.853367991170106e-07,
"loss": 0.3386034071445465,
"step": 2096
},
{
"epoch": 2.582512315270936,
"grad_norm": 14.094564220777247,
"learning_rate": 5.819768800359882e-07,
"loss": 0.4901737570762634,
"step": 2097
},
{
"epoch": 2.583743842364532,
"grad_norm": 10.630160715256755,
"learning_rate": 5.786260359533369e-07,
"loss": 1.683629035949707,
"step": 2098
},
{
"epoch": 2.584975369458128,
"grad_norm": 8.221455266315619,
"learning_rate": 5.752842737519743e-07,
"loss": 0.4275779128074646,
"step": 2099
},
{
"epoch": 2.586206896551724,
"grad_norm": 8.989808316079593,
"learning_rate": 5.7195160029617e-07,
"loss": 0.6892256736755371,
"step": 2100
},
{
"epoch": 2.5874384236453203,
"grad_norm": 10.390493407130242,
"learning_rate": 5.686280224315189e-07,
"loss": 0.6548988819122314,
"step": 2101
},
{
"epoch": 2.5886699507389164,
"grad_norm": 8.365114703591324,
"learning_rate": 5.653135469849347e-07,
"loss": 0.4431142807006836,
"step": 2102
},
{
"epoch": 2.5899014778325125,
"grad_norm": 20.296284889046316,
"learning_rate": 5.62008180764635e-07,
"loss": 0.5730191469192505,
"step": 2103
},
{
"epoch": 2.5911330049261085,
"grad_norm": 7.886033521206941,
"learning_rate": 5.587119305601263e-07,
"loss": 0.8734421730041504,
"step": 2104
},
{
"epoch": 2.5923645320197046,
"grad_norm": 7.851476190792639,
"learning_rate": 5.554248031421872e-07,
"loss": 0.30810514092445374,
"step": 2105
},
{
"epoch": 2.5935960591133007,
"grad_norm": 10.114012805058133,
"learning_rate": 5.521468052628615e-07,
"loss": 0.5941227078437805,
"step": 2106
},
{
"epoch": 2.594827586206897,
"grad_norm": 11.5276807645432,
"learning_rate": 5.488779436554359e-07,
"loss": 0.32648181915283203,
"step": 2107
},
{
"epoch": 2.596059113300493,
"grad_norm": 12.384461199116616,
"learning_rate": 5.456182250344349e-07,
"loss": 0.2934610843658447,
"step": 2108
},
{
"epoch": 2.5972906403940885,
"grad_norm": 9.420595645239136,
"learning_rate": 5.423676560955976e-07,
"loss": 0.20387941598892212,
"step": 2109
},
{
"epoch": 2.5985221674876846,
"grad_norm": 10.459297088933635,
"learning_rate": 5.391262435158722e-07,
"loss": 0.6115235090255737,
"step": 2110
},
{
"epoch": 2.5997536945812807,
"grad_norm": 13.891885044549888,
"learning_rate": 5.358939939534002e-07,
"loss": 0.45280611515045166,
"step": 2111
},
{
"epoch": 2.600985221674877,
"grad_norm": 8.172861215602202,
"learning_rate": 5.326709140474962e-07,
"loss": 0.29169538617134094,
"step": 2112
},
{
"epoch": 2.602216748768473,
"grad_norm": 6.844042320685791,
"learning_rate": 5.294570104186436e-07,
"loss": 0.4924798011779785,
"step": 2113
},
{
"epoch": 2.603448275862069,
"grad_norm": 12.392169249298135,
"learning_rate": 5.262522896684774e-07,
"loss": 0.6751348376274109,
"step": 2114
},
{
"epoch": 2.604679802955665,
"grad_norm": 13.993739996881734,
"learning_rate": 5.230567583797674e-07,
"loss": 0.6676002740859985,
"step": 2115
},
{
"epoch": 2.605911330049261,
"grad_norm": 12.746427038097593,
"learning_rate": 5.198704231164093e-07,
"loss": 0.3112475275993347,
"step": 2116
},
{
"epoch": 2.607142857142857,
"grad_norm": 9.88854663199865,
"learning_rate": 5.166932904234101e-07,
"loss": 0.5024739503860474,
"step": 2117
},
{
"epoch": 2.6083743842364533,
"grad_norm": 18.4856178419616,
"learning_rate": 5.135253668268724e-07,
"loss": 2.6769824028015137,
"step": 2118
},
{
"epoch": 2.6096059113300494,
"grad_norm": 12.280278924091732,
"learning_rate": 5.103666588339812e-07,
"loss": 0.4120222330093384,
"step": 2119
},
{
"epoch": 2.6108374384236455,
"grad_norm": 8.106704210398478,
"learning_rate": 5.072171729329944e-07,
"loss": 0.3238741457462311,
"step": 2120
},
{
"epoch": 2.612068965517241,
"grad_norm": 9.476233543897594,
"learning_rate": 5.040769155932285e-07,
"loss": 0.41853106021881104,
"step": 2121
},
{
"epoch": 2.613300492610837,
"grad_norm": 9.382868411266552,
"learning_rate": 5.00945893265039e-07,
"loss": 0.5511228442192078,
"step": 2122
},
{
"epoch": 2.6145320197044333,
"grad_norm": 10.011756541997418,
"learning_rate": 4.978241123798133e-07,
"loss": 0.6076939105987549,
"step": 2123
},
{
"epoch": 2.6157635467980294,
"grad_norm": 11.969458383094386,
"learning_rate": 4.94711579349959e-07,
"loss": 0.32137832045555115,
"step": 2124
},
{
"epoch": 2.6169950738916254,
"grad_norm": 9.120309940189742,
"learning_rate": 4.916083005688865e-07,
"loss": 0.2919730246067047,
"step": 2125
},
{
"epoch": 2.6182266009852215,
"grad_norm": 11.012298283555321,
"learning_rate": 4.885142824109946e-07,
"loss": 0.3521897792816162,
"step": 2126
},
{
"epoch": 2.6194581280788176,
"grad_norm": 10.719771585992975,
"learning_rate": 4.85429531231662e-07,
"loss": 0.5645777583122253,
"step": 2127
},
{
"epoch": 2.6206896551724137,
"grad_norm": 8.564760545887571,
"learning_rate": 4.823540533672355e-07,
"loss": 0.21364668011665344,
"step": 2128
},
{
"epoch": 2.62192118226601,
"grad_norm": 10.461100625681352,
"learning_rate": 4.792878551350055e-07,
"loss": 0.3472633957862854,
"step": 2129
},
{
"epoch": 2.623152709359606,
"grad_norm": 7.7796379590314295,
"learning_rate": 4.7623094283320905e-07,
"loss": 0.2312706857919693,
"step": 2130
},
{
"epoch": 2.624384236453202,
"grad_norm": 10.908716191951015,
"learning_rate": 4.7318332274100595e-07,
"loss": 0.4227292835712433,
"step": 2131
},
{
"epoch": 2.625615763546798,
"grad_norm": 11.077941430018797,
"learning_rate": 4.701450011184677e-07,
"loss": 0.4835679531097412,
"step": 2132
},
{
"epoch": 2.626847290640394,
"grad_norm": 8.011667181424437,
"learning_rate": 4.671159842065698e-07,
"loss": 0.30153489112854004,
"step": 2133
},
{
"epoch": 2.62807881773399,
"grad_norm": 9.961423240887521,
"learning_rate": 4.640962782271707e-07,
"loss": 0.19820570945739746,
"step": 2134
},
{
"epoch": 2.6293103448275863,
"grad_norm": 18.168474918209572,
"learning_rate": 4.6108588938300725e-07,
"loss": 0.5798308253288269,
"step": 2135
},
{
"epoch": 2.6305418719211824,
"grad_norm": 14.982461578988175,
"learning_rate": 4.5808482385767407e-07,
"loss": 0.4840395450592041,
"step": 2136
},
{
"epoch": 2.6317733990147785,
"grad_norm": 12.540506897781501,
"learning_rate": 4.5509308781561846e-07,
"loss": 0.33036884665489197,
"step": 2137
},
{
"epoch": 2.6330049261083746,
"grad_norm": 10.69964555424519,
"learning_rate": 4.521106874021242e-07,
"loss": 0.4032250642776489,
"step": 2138
},
{
"epoch": 2.6342364532019706,
"grad_norm": 10.190070867602095,
"learning_rate": 4.4913762874329527e-07,
"loss": 0.5196541547775269,
"step": 2139
},
{
"epoch": 2.6354679802955667,
"grad_norm": 15.414254295489695,
"learning_rate": 4.4617391794604946e-07,
"loss": 0.5049697160720825,
"step": 2140
},
{
"epoch": 2.636699507389163,
"grad_norm": 11.232489708483897,
"learning_rate": 4.4321956109810327e-07,
"loss": 0.6910302639007568,
"step": 2141
},
{
"epoch": 2.637931034482759,
"grad_norm": 17.874353794074672,
"learning_rate": 4.4027456426796014e-07,
"loss": 0.8860565423965454,
"step": 2142
},
{
"epoch": 2.6391625615763545,
"grad_norm": 8.315561152824909,
"learning_rate": 4.3733893350489386e-07,
"loss": 0.3347795307636261,
"step": 2143
},
{
"epoch": 2.6403940886699506,
"grad_norm": 8.406655821874109,
"learning_rate": 4.344126748389438e-07,
"loss": 0.5979218482971191,
"step": 2144
},
{
"epoch": 2.6416256157635467,
"grad_norm": 10.633642678256232,
"learning_rate": 4.314957942808956e-07,
"loss": 0.6724722385406494,
"step": 2145
},
{
"epoch": 2.642857142857143,
"grad_norm": 11.37770126439957,
"learning_rate": 4.2858829782227107e-07,
"loss": 0.23655423521995544,
"step": 2146
},
{
"epoch": 2.644088669950739,
"grad_norm": 13.564798867932334,
"learning_rate": 4.2569019143531845e-07,
"loss": 0.7535929679870605,
"step": 2147
},
{
"epoch": 2.645320197044335,
"grad_norm": 7.225057762729149,
"learning_rate": 4.228014810729963e-07,
"loss": 0.5065590143203735,
"step": 2148
},
{
"epoch": 2.646551724137931,
"grad_norm": 11.646047154930116,
"learning_rate": 4.199221726689634e-07,
"loss": 0.8232078552246094,
"step": 2149
},
{
"epoch": 2.647783251231527,
"grad_norm": 12.627075206048184,
"learning_rate": 4.170522721375669e-07,
"loss": 0.3928985595703125,
"step": 2150
},
{
"epoch": 2.649014778325123,
"grad_norm": 11.823044988035218,
"learning_rate": 4.1419178537382756e-07,
"loss": 0.6924771070480347,
"step": 2151
},
{
"epoch": 2.6502463054187193,
"grad_norm": 8.99171598727701,
"learning_rate": 4.1134071825343124e-07,
"loss": 0.3323458135128021,
"step": 2152
},
{
"epoch": 2.6514778325123154,
"grad_norm": 8.020309669901565,
"learning_rate": 4.0849907663271346e-07,
"loss": 0.6068896651268005,
"step": 2153
},
{
"epoch": 2.6527093596059115,
"grad_norm": 9.698785865473045,
"learning_rate": 4.0566686634865016e-07,
"loss": 0.2112211287021637,
"step": 2154
},
{
"epoch": 2.653940886699507,
"grad_norm": 8.70939943207942,
"learning_rate": 4.028440932188465e-07,
"loss": 0.3340219259262085,
"step": 2155
},
{
"epoch": 2.655172413793103,
"grad_norm": 16.06563756982883,
"learning_rate": 4.0003076304151624e-07,
"loss": 0.4172120690345764,
"step": 2156
},
{
"epoch": 2.6564039408866993,
"grad_norm": 10.448504154619048,
"learning_rate": 3.972268815954833e-07,
"loss": 0.3891775608062744,
"step": 2157
},
{
"epoch": 2.6576354679802954,
"grad_norm": 14.733135115767965,
"learning_rate": 3.944324546401607e-07,
"loss": 0.4906957149505615,
"step": 2158
},
{
"epoch": 2.6588669950738915,
"grad_norm": 9.613272858024363,
"learning_rate": 3.916474879155402e-07,
"loss": 0.8216167688369751,
"step": 2159
},
{
"epoch": 2.6600985221674875,
"grad_norm": 10.257611413751764,
"learning_rate": 3.8887198714218255e-07,
"loss": 0.2030409872531891,
"step": 2160
},
{
"epoch": 2.6613300492610836,
"grad_norm": 7.648297896745766,
"learning_rate": 3.8610595802120564e-07,
"loss": 0.24565047025680542,
"step": 2161
},
{
"epoch": 2.6625615763546797,
"grad_norm": 10.822762486642535,
"learning_rate": 3.833494062342691e-07,
"loss": 0.3111516833305359,
"step": 2162
},
{
"epoch": 2.663793103448276,
"grad_norm": 7.318326050197103,
"learning_rate": 3.8060233744356634e-07,
"loss": 0.32978883385658264,
"step": 2163
},
{
"epoch": 2.665024630541872,
"grad_norm": 12.599543466460439,
"learning_rate": 3.7786475729181314e-07,
"loss": 0.5468876361846924,
"step": 2164
},
{
"epoch": 2.666256157635468,
"grad_norm": 8.338604416987764,
"learning_rate": 3.751366714022342e-07,
"loss": 0.25511908531188965,
"step": 2165
},
{
"epoch": 2.667487684729064,
"grad_norm": 10.389301741607085,
"learning_rate": 3.724180853785514e-07,
"loss": 0.9938629269599915,
"step": 2166
},
{
"epoch": 2.66871921182266,
"grad_norm": 12.267953130443164,
"learning_rate": 3.6970900480497287e-07,
"loss": 0.4233144223690033,
"step": 2167
},
{
"epoch": 2.6699507389162562,
"grad_norm": 11.571711586702998,
"learning_rate": 3.6700943524618284e-07,
"loss": 0.39373546838760376,
"step": 2168
},
{
"epoch": 2.6711822660098523,
"grad_norm": 9.063048538209927,
"learning_rate": 3.643193822473301e-07,
"loss": 0.40346717834472656,
"step": 2169
},
{
"epoch": 2.6724137931034484,
"grad_norm": 14.384271085159352,
"learning_rate": 3.616388513340124e-07,
"loss": 0.35343194007873535,
"step": 2170
},
{
"epoch": 2.6736453201970445,
"grad_norm": 16.277411971018296,
"learning_rate": 3.5896784801227046e-07,
"loss": 0.38300061225891113,
"step": 2171
},
{
"epoch": 2.6748768472906406,
"grad_norm": 7.950757575573031,
"learning_rate": 3.56306377768576e-07,
"loss": 0.5319961905479431,
"step": 2172
},
{
"epoch": 2.6761083743842367,
"grad_norm": 19.004855778838706,
"learning_rate": 3.5365444606981434e-07,
"loss": 0.45474281907081604,
"step": 2173
},
{
"epoch": 2.6773399014778327,
"grad_norm": 13.211081908527799,
"learning_rate": 3.5101205836328144e-07,
"loss": 0.41422080993652344,
"step": 2174
},
{
"epoch": 2.678571428571429,
"grad_norm": 12.892521639907137,
"learning_rate": 3.4837922007667e-07,
"loss": 0.5486617088317871,
"step": 2175
},
{
"epoch": 2.6798029556650245,
"grad_norm": 10.113357639811962,
"learning_rate": 3.4575593661805296e-07,
"loss": 0.27931463718414307,
"step": 2176
},
{
"epoch": 2.6810344827586206,
"grad_norm": 9.357499790574233,
"learning_rate": 3.4314221337588217e-07,
"loss": 0.45936134457588196,
"step": 2177
},
{
"epoch": 2.6822660098522166,
"grad_norm": 12.597881278175105,
"learning_rate": 3.405380557189669e-07,
"loss": 0.5659298896789551,
"step": 2178
},
{
"epoch": 2.6834975369458127,
"grad_norm": 16.9103130329337,
"learning_rate": 3.379434689964728e-07,
"loss": 0.3952332139015198,
"step": 2179
},
{
"epoch": 2.684729064039409,
"grad_norm": 13.280154300410791,
"learning_rate": 3.3535845853790105e-07,
"loss": 0.36344432830810547,
"step": 2180
},
{
"epoch": 2.685960591133005,
"grad_norm": 8.267427758719474,
"learning_rate": 3.3278302965308593e-07,
"loss": 0.29526573419570923,
"step": 2181
},
{
"epoch": 2.687192118226601,
"grad_norm": 14.172270303989801,
"learning_rate": 3.3021718763218025e-07,
"loss": 0.35098952054977417,
"step": 2182
},
{
"epoch": 2.688423645320197,
"grad_norm": 15.442089142249914,
"learning_rate": 3.276609377456419e-07,
"loss": 0.9407736659049988,
"step": 2183
},
{
"epoch": 2.689655172413793,
"grad_norm": 10.545470371926038,
"learning_rate": 3.2511428524422793e-07,
"loss": 0.29226356744766235,
"step": 2184
},
{
"epoch": 2.6908866995073892,
"grad_norm": 11.590832336497728,
"learning_rate": 3.2257723535898177e-07,
"loss": 0.78415846824646,
"step": 2185
},
{
"epoch": 2.6921182266009853,
"grad_norm": 10.523504017171055,
"learning_rate": 3.200497933012198e-07,
"loss": 0.22600015997886658,
"step": 2186
},
{
"epoch": 2.6933497536945814,
"grad_norm": 16.18317423891681,
"learning_rate": 3.1753196426252573e-07,
"loss": 0.3907809853553772,
"step": 2187
},
{
"epoch": 2.6945812807881775,
"grad_norm": 12.272867485671698,
"learning_rate": 3.150237534147366e-07,
"loss": 0.7056915760040283,
"step": 2188
},
{
"epoch": 2.695812807881773,
"grad_norm": 11.590493499262351,
"learning_rate": 3.125251659099332e-07,
"loss": 0.35921359062194824,
"step": 2189
},
{
"epoch": 2.697044334975369,
"grad_norm": 7.139507013908415,
"learning_rate": 3.1003620688042636e-07,
"loss": 0.17715278267860413,
"step": 2190
},
{
"epoch": 2.6982758620689653,
"grad_norm": 6.945336769527092,
"learning_rate": 3.0755688143875253e-07,
"loss": 0.20512376725673676,
"step": 2191
},
{
"epoch": 2.6995073891625614,
"grad_norm": 11.666932414854655,
"learning_rate": 3.050871946776596e-07,
"loss": 0.38939356803894043,
"step": 2192
},
{
"epoch": 2.7007389162561575,
"grad_norm": 8.970559885182587,
"learning_rate": 3.026271516700946e-07,
"loss": 0.3292514681816101,
"step": 2193
},
{
"epoch": 2.7019704433497536,
"grad_norm": 8.920484564263525,
"learning_rate": 3.0017675746919883e-07,
"loss": 0.2732661962509155,
"step": 2194
},
{
"epoch": 2.7032019704433496,
"grad_norm": 14.273169657648177,
"learning_rate": 2.9773601710828937e-07,
"loss": 0.3058941960334778,
"step": 2195
},
{
"epoch": 2.7044334975369457,
"grad_norm": 16.20827847981958,
"learning_rate": 2.953049356008586e-07,
"loss": 0.7454397082328796,
"step": 2196
},
{
"epoch": 2.705665024630542,
"grad_norm": 17.54054653840535,
"learning_rate": 2.928835179405548e-07,
"loss": 0.3679504692554474,
"step": 2197
},
{
"epoch": 2.706896551724138,
"grad_norm": 9.77472352239386,
"learning_rate": 2.9047176910117824e-07,
"loss": 0.2241794466972351,
"step": 2198
},
{
"epoch": 2.708128078817734,
"grad_norm": 8.561542797938362,
"learning_rate": 2.8806969403666897e-07,
"loss": 0.19927407801151276,
"step": 2199
},
{
"epoch": 2.70935960591133,
"grad_norm": 7.0959519302312195,
"learning_rate": 2.856772976810929e-07,
"loss": 0.2808955907821655,
"step": 2200
},
{
"epoch": 2.710591133004926,
"grad_norm": 21.456216648925764,
"learning_rate": 2.8329458494863846e-07,
"loss": 0.7279784083366394,
"step": 2201
},
{
"epoch": 2.7118226600985222,
"grad_norm": 8.853404617031957,
"learning_rate": 2.809215607336024e-07,
"loss": 0.47690945863723755,
"step": 2202
},
{
"epoch": 2.7130541871921183,
"grad_norm": 9.19562501308832,
"learning_rate": 2.7855822991037895e-07,
"loss": 0.1997358649969101,
"step": 2203
},
{
"epoch": 2.7142857142857144,
"grad_norm": 12.418182947084489,
"learning_rate": 2.762045973334526e-07,
"loss": 0.3269602954387665,
"step": 2204
},
{
"epoch": 2.7155172413793105,
"grad_norm": 9.253477256115538,
"learning_rate": 2.738606678373873e-07,
"loss": 0.5450934767723083,
"step": 2205
},
{
"epoch": 2.7167487684729066,
"grad_norm": 12.029880579085864,
"learning_rate": 2.7152644623681503e-07,
"loss": 0.4732050895690918,
"step": 2206
},
{
"epoch": 2.7179802955665027,
"grad_norm": 13.561046323857816,
"learning_rate": 2.6920193732642594e-07,
"loss": 0.26588505506515503,
"step": 2207
},
{
"epoch": 2.7192118226600988,
"grad_norm": 4.326966860689474,
"learning_rate": 2.668871458809613e-07,
"loss": 0.09280772507190704,
"step": 2208
},
{
"epoch": 2.720443349753695,
"grad_norm": 12.851246166510439,
"learning_rate": 2.6458207665520266e-07,
"loss": 0.3763241767883301,
"step": 2209
},
{
"epoch": 2.7216748768472905,
"grad_norm": 11.562947215162826,
"learning_rate": 2.6228673438395804e-07,
"loss": 0.46730220317840576,
"step": 2210
},
{
"epoch": 2.7229064039408866,
"grad_norm": 11.5850144160988,
"learning_rate": 2.600011237820577e-07,
"loss": 0.42677825689315796,
"step": 2211
},
{
"epoch": 2.7241379310344827,
"grad_norm": 15.077683389725815,
"learning_rate": 2.577252495443422e-07,
"loss": 0.4460552930831909,
"step": 2212
},
{
"epoch": 2.7253694581280787,
"grad_norm": 8.23073307445448,
"learning_rate": 2.5545911634565266e-07,
"loss": 0.5031150579452515,
"step": 2213
},
{
"epoch": 2.726600985221675,
"grad_norm": 11.590947176695321,
"learning_rate": 2.5320272884081955e-07,
"loss": 0.18559831380844116,
"step": 2214
},
{
"epoch": 2.727832512315271,
"grad_norm": 10.364105747898172,
"learning_rate": 2.5095609166465805e-07,
"loss": 0.2087395340204239,
"step": 2215
},
{
"epoch": 2.729064039408867,
"grad_norm": 7.72131921454244,
"learning_rate": 2.4871920943195404e-07,
"loss": 0.21503375470638275,
"step": 2216
},
{
"epoch": 2.730295566502463,
"grad_norm": 13.07348837914591,
"learning_rate": 2.4649208673745317e-07,
"loss": 0.20347240567207336,
"step": 2217
},
{
"epoch": 2.731527093596059,
"grad_norm": 7.396681990877147,
"learning_rate": 2.442747281558572e-07,
"loss": 0.20019523799419403,
"step": 2218
},
{
"epoch": 2.7327586206896552,
"grad_norm": 7.384056914568049,
"learning_rate": 2.420671382418122e-07,
"loss": 0.6672437191009521,
"step": 2219
},
{
"epoch": 2.7339901477832513,
"grad_norm": 9.4227706186618,
"learning_rate": 2.398693215298953e-07,
"loss": 0.28304070234298706,
"step": 2220
},
{
"epoch": 2.7352216748768474,
"grad_norm": 13.10398470275865,
"learning_rate": 2.3768128253461253e-07,
"loss": 0.7915571331977844,
"step": 2221
},
{
"epoch": 2.7364532019704435,
"grad_norm": 14.271199864374358,
"learning_rate": 2.3550302575038154e-07,
"loss": 0.2920302152633667,
"step": 2222
},
{
"epoch": 2.737684729064039,
"grad_norm": 9.98798818011476,
"learning_rate": 2.333345556515304e-07,
"loss": 0.7924119830131531,
"step": 2223
},
{
"epoch": 2.7389162561576352,
"grad_norm": 16.52502448354582,
"learning_rate": 2.311758766922806e-07,
"loss": 2.4264345169067383,
"step": 2224
},
{
"epoch": 2.7401477832512313,
"grad_norm": 11.115670896935416,
"learning_rate": 2.290269933067457e-07,
"loss": 0.6286523342132568,
"step": 2225
},
{
"epoch": 2.7413793103448274,
"grad_norm": 10.041583417397344,
"learning_rate": 2.2688790990891606e-07,
"loss": 0.4733774662017822,
"step": 2226
},
{
"epoch": 2.7426108374384235,
"grad_norm": 9.613596914422414,
"learning_rate": 2.2475863089265193e-07,
"loss": 0.41262203454971313,
"step": 2227
},
{
"epoch": 2.7438423645320196,
"grad_norm": 12.211203634057204,
"learning_rate": 2.2263916063167523e-07,
"loss": 0.9069987535476685,
"step": 2228
},
{
"epoch": 2.7450738916256157,
"grad_norm": 8.477983222031407,
"learning_rate": 2.205295034795596e-07,
"loss": 0.33371949195861816,
"step": 2229
},
{
"epoch": 2.7463054187192117,
"grad_norm": 10.672673009053705,
"learning_rate": 2.1842966376972142e-07,
"loss": 0.2515576183795929,
"step": 2230
},
{
"epoch": 2.747536945812808,
"grad_norm": 15.919489094243,
"learning_rate": 2.1633964581541212e-07,
"loss": 0.5854448080062866,
"step": 2231
},
{
"epoch": 2.748768472906404,
"grad_norm": 8.34813593109363,
"learning_rate": 2.1425945390970816e-07,
"loss": 0.36172378063201904,
"step": 2232
},
{
"epoch": 2.75,
"grad_norm": 13.095561050747872,
"learning_rate": 2.1218909232550156e-07,
"loss": 0.8217978477478027,
"step": 2233
},
{
"epoch": 2.751231527093596,
"grad_norm": 10.987521536719951,
"learning_rate": 2.1012856531549163e-07,
"loss": 0.5560616850852966,
"step": 2234
},
{
"epoch": 2.752463054187192,
"grad_norm": 15.220877022032928,
"learning_rate": 2.0807787711217887e-07,
"loss": 0.3503821790218353,
"step": 2235
},
{
"epoch": 2.7536945812807883,
"grad_norm": 17.985871130679012,
"learning_rate": 2.0603703192785264e-07,
"loss": 0.6000460982322693,
"step": 2236
},
{
"epoch": 2.7549261083743843,
"grad_norm": 10.345272170286153,
"learning_rate": 2.0400603395458408e-07,
"loss": 0.20410886406898499,
"step": 2237
},
{
"epoch": 2.7561576354679804,
"grad_norm": 10.777826560400182,
"learning_rate": 2.0198488736421607e-07,
"loss": 0.2497151494026184,
"step": 2238
},
{
"epoch": 2.7573891625615765,
"grad_norm": 9.330808767879285,
"learning_rate": 1.999735963083571e-07,
"loss": 0.2881111800670624,
"step": 2239
},
{
"epoch": 2.7586206896551726,
"grad_norm": 19.301319480093145,
"learning_rate": 1.9797216491837356e-07,
"loss": 0.38934653997421265,
"step": 2240
},
{
"epoch": 2.7598522167487687,
"grad_norm": 13.511728912052765,
"learning_rate": 1.9598059730537465e-07,
"loss": 0.3553803563117981,
"step": 2241
},
{
"epoch": 2.7610837438423648,
"grad_norm": 13.74634988747894,
"learning_rate": 1.9399889756021196e-07,
"loss": 0.3653762936592102,
"step": 2242
},
{
"epoch": 2.762315270935961,
"grad_norm": 9.247962499458838,
"learning_rate": 1.9202706975346875e-07,
"loss": 0.2600834369659424,
"step": 2243
},
{
"epoch": 2.7635467980295565,
"grad_norm": 11.458094202817868,
"learning_rate": 1.9006511793544458e-07,
"loss": 0.4601256847381592,
"step": 2244
},
{
"epoch": 2.7647783251231526,
"grad_norm": 17.193961086363156,
"learning_rate": 1.881130461361591e-07,
"loss": 0.33677470684051514,
"step": 2245
},
{
"epoch": 2.7660098522167487,
"grad_norm": 8.524927066266194,
"learning_rate": 1.8617085836533544e-07,
"loss": 0.8099600672721863,
"step": 2246
},
{
"epoch": 2.7672413793103448,
"grad_norm": 15.804119634424612,
"learning_rate": 1.8423855861239238e-07,
"loss": 0.6992620229721069,
"step": 2247
},
{
"epoch": 2.768472906403941,
"grad_norm": 9.647846553411064,
"learning_rate": 1.8231615084644105e-07,
"loss": 0.3640286326408386,
"step": 2248
},
{
"epoch": 2.769704433497537,
"grad_norm": 8.955751617734634,
"learning_rate": 1.8040363901627001e-07,
"loss": 0.2996286451816559,
"step": 2249
},
{
"epoch": 2.770935960591133,
"grad_norm": 11.938038283583609,
"learning_rate": 1.7850102705034455e-07,
"loss": 0.43687328696250916,
"step": 2250
},
{
"epoch": 2.772167487684729,
"grad_norm": 17.093390601969645,
"learning_rate": 1.7660831885679074e-07,
"loss": 0.7942696809768677,
"step": 2251
},
{
"epoch": 2.773399014778325,
"grad_norm": 13.100096515382093,
"learning_rate": 1.747255183233948e-07,
"loss": 1.1030818223953247,
"step": 2252
},
{
"epoch": 2.7746305418719213,
"grad_norm": 8.873613224852555,
"learning_rate": 1.7285262931759084e-07,
"loss": 0.5030316114425659,
"step": 2253
},
{
"epoch": 2.7758620689655173,
"grad_norm": 12.14741952725113,
"learning_rate": 1.7098965568645264e-07,
"loss": 0.6707223653793335,
"step": 2254
},
{
"epoch": 2.7770935960591134,
"grad_norm": 11.75778232712136,
"learning_rate": 1.6913660125668806e-07,
"loss": 0.2983396351337433,
"step": 2255
},
{
"epoch": 2.7783251231527095,
"grad_norm": 14.41974913977501,
"learning_rate": 1.6729346983462957e-07,
"loss": 0.6233869791030884,
"step": 2256
},
{
"epoch": 2.779556650246305,
"grad_norm": 13.000501735636352,
"learning_rate": 1.654602652062276e-07,
"loss": 0.2838573455810547,
"step": 2257
},
{
"epoch": 2.7807881773399012,
"grad_norm": 8.269339223606165,
"learning_rate": 1.636369911370417e-07,
"loss": 0.516904354095459,
"step": 2258
},
{
"epoch": 2.7820197044334973,
"grad_norm": 12.228570926666848,
"learning_rate": 1.6182365137223266e-07,
"loss": 0.2637355625629425,
"step": 2259
},
{
"epoch": 2.7832512315270934,
"grad_norm": 12.77963989317756,
"learning_rate": 1.600202496365566e-07,
"loss": 0.2973381280899048,
"step": 2260
},
{
"epoch": 2.7844827586206895,
"grad_norm": 12.028070410415097,
"learning_rate": 1.5822678963435479e-07,
"loss": 0.731842041015625,
"step": 2261
},
{
"epoch": 2.7857142857142856,
"grad_norm": 16.480537506483405,
"learning_rate": 1.564432750495476e-07,
"loss": 0.9091979265213013,
"step": 2262
},
{
"epoch": 2.7869458128078817,
"grad_norm": 14.778758482272446,
"learning_rate": 1.5466970954562786e-07,
"loss": 0.9223085641860962,
"step": 2263
},
{
"epoch": 2.7881773399014778,
"grad_norm": 12.767601072668027,
"learning_rate": 1.5290609676564982e-07,
"loss": 0.35786327719688416,
"step": 2264
},
{
"epoch": 2.789408866995074,
"grad_norm": 10.468097971683415,
"learning_rate": 1.5115244033222732e-07,
"loss": 0.7312544584274292,
"step": 2265
},
{
"epoch": 2.79064039408867,
"grad_norm": 9.834986856814911,
"learning_rate": 1.4940874384751947e-07,
"loss": 0.8420913219451904,
"step": 2266
},
{
"epoch": 2.791871921182266,
"grad_norm": 16.21429528610728,
"learning_rate": 1.47675010893229e-07,
"loss": 0.3239392042160034,
"step": 2267
},
{
"epoch": 2.793103448275862,
"grad_norm": 8.629439268560123,
"learning_rate": 1.4595124503059165e-07,
"loss": 0.3498873710632324,
"step": 2268
},
{
"epoch": 2.794334975369458,
"grad_norm": 6.690308017489741,
"learning_rate": 1.4423744980037068e-07,
"loss": 0.22733798623085022,
"step": 2269
},
{
"epoch": 2.7955665024630543,
"grad_norm": 8.212515181619986,
"learning_rate": 1.425336287228496e-07,
"loss": 0.2721923291683197,
"step": 2270
},
{
"epoch": 2.7967980295566504,
"grad_norm": 9.080877903298425,
"learning_rate": 1.408397852978205e-07,
"loss": 0.344375342130661,
"step": 2271
},
{
"epoch": 2.7980295566502464,
"grad_norm": 9.45480785329488,
"learning_rate": 1.391559230045847e-07,
"loss": 0.4529953896999359,
"step": 2272
},
{
"epoch": 2.7992610837438425,
"grad_norm": 9.214190080042984,
"learning_rate": 1.3748204530193987e-07,
"loss": 0.1639999896287918,
"step": 2273
},
{
"epoch": 2.8004926108374386,
"grad_norm": 13.6280899298915,
"learning_rate": 1.3581815562817402e-07,
"loss": 0.23326484858989716,
"step": 2274
},
{
"epoch": 2.8017241379310347,
"grad_norm": 8.920482755226637,
"learning_rate": 1.341642574010582e-07,
"loss": 0.22694149613380432,
"step": 2275
},
{
"epoch": 2.802955665024631,
"grad_norm": 8.710884196173295,
"learning_rate": 1.3252035401784324e-07,
"loss": 0.3588021993637085,
"step": 2276
},
{
"epoch": 2.804187192118227,
"grad_norm": 11.632314435280234,
"learning_rate": 1.3088644885524637e-07,
"loss": 0.4335256516933441,
"step": 2277
},
{
"epoch": 2.8054187192118225,
"grad_norm": 6.272067777885255,
"learning_rate": 1.2926254526944904e-07,
"loss": 0.1874769926071167,
"step": 2278
},
{
"epoch": 2.8066502463054186,
"grad_norm": 8.936224496797552,
"learning_rate": 1.27648646596088e-07,
"loss": 0.3144474923610687,
"step": 2279
},
{
"epoch": 2.8078817733990147,
"grad_norm": 19.58883398368707,
"learning_rate": 1.2604475615025092e-07,
"loss": 0.7241795063018799,
"step": 2280
},
{
"epoch": 2.8091133004926108,
"grad_norm": 16.726363332544537,
"learning_rate": 1.2445087722646576e-07,
"loss": 0.5169468522071838,
"step": 2281
},
{
"epoch": 2.810344827586207,
"grad_norm": 30.94634458747577,
"learning_rate": 1.228670130986953e-07,
"loss": 1.6869860887527466,
"step": 2282
},
{
"epoch": 2.811576354679803,
"grad_norm": 10.707666993688912,
"learning_rate": 1.212931670203338e-07,
"loss": 0.47550255060195923,
"step": 2283
},
{
"epoch": 2.812807881773399,
"grad_norm": 9.540335234729794,
"learning_rate": 1.197293422241952e-07,
"loss": 0.2437782883644104,
"step": 2284
},
{
"epoch": 2.814039408866995,
"grad_norm": 6.665490888518648,
"learning_rate": 1.1817554192251002e-07,
"loss": 0.37867432832717896,
"step": 2285
},
{
"epoch": 2.815270935960591,
"grad_norm": 9.667222509113516,
"learning_rate": 1.1663176930691744e-07,
"loss": 0.8604614734649658,
"step": 2286
},
{
"epoch": 2.8165024630541873,
"grad_norm": 12.759555548828967,
"learning_rate": 1.1509802754845978e-07,
"loss": 1.1947153806686401,
"step": 2287
},
{
"epoch": 2.8177339901477834,
"grad_norm": 9.33176290924216,
"learning_rate": 1.1357431979757194e-07,
"loss": 0.30131372809410095,
"step": 2288
},
{
"epoch": 2.8189655172413794,
"grad_norm": 10.72676065785706,
"learning_rate": 1.1206064918408143e-07,
"loss": 0.47112587094306946,
"step": 2289
},
{
"epoch": 2.8201970443349755,
"grad_norm": 11.488110070600202,
"learning_rate": 1.1055701881719838e-07,
"loss": 0.2062550187110901,
"step": 2290
},
{
"epoch": 2.821428571428571,
"grad_norm": 8.859910558029405,
"learning_rate": 1.0906343178550715e-07,
"loss": 0.30918222665786743,
"step": 2291
},
{
"epoch": 2.8226600985221673,
"grad_norm": 7.645494812767514,
"learning_rate": 1.0757989115696421e-07,
"loss": 0.46675896644592285,
"step": 2292
},
{
"epoch": 2.8238916256157633,
"grad_norm": 7.696373009746994,
"learning_rate": 1.0610639997888917e-07,
"loss": 0.2514066696166992,
"step": 2293
},
{
"epoch": 2.8251231527093594,
"grad_norm": 20.301202253116305,
"learning_rate": 1.0464296127795926e-07,
"loss": 0.37799739837646484,
"step": 2294
},
{
"epoch": 2.8263546798029555,
"grad_norm": 10.51342866650685,
"learning_rate": 1.0318957806020269e-07,
"loss": 1.170919418334961,
"step": 2295
},
{
"epoch": 2.8275862068965516,
"grad_norm": 10.322546313834785,
"learning_rate": 1.0174625331099363e-07,
"loss": 0.34683138132095337,
"step": 2296
},
{
"epoch": 2.8288177339901477,
"grad_norm": 13.218925485338286,
"learning_rate": 1.0031298999504557e-07,
"loss": 0.24154211580753326,
"step": 2297
},
{
"epoch": 2.8300492610837438,
"grad_norm": 11.94151576403668,
"learning_rate": 9.888979105640295e-08,
"loss": 0.3270137906074524,
"step": 2298
},
{
"epoch": 2.83128078817734,
"grad_norm": 10.157922840931477,
"learning_rate": 9.747665941843953e-08,
"loss": 0.33205774426460266,
"step": 2299
},
{
"epoch": 2.832512315270936,
"grad_norm": 15.674554832536234,
"learning_rate": 9.607359798384785e-08,
"loss": 1.5672454833984375,
"step": 2300
},
{
"epoch": 2.833743842364532,
"grad_norm": 7.89425528282641,
"learning_rate": 9.468060963463754e-08,
"loss": 0.1868615597486496,
"step": 2301
},
{
"epoch": 2.834975369458128,
"grad_norm": 16.06809449939127,
"learning_rate": 9.329769723212478e-08,
"loss": 0.3485974371433258,
"step": 2302
},
{
"epoch": 2.836206896551724,
"grad_norm": 22.06944110945676,
"learning_rate": 9.192486361693175e-08,
"loss": 0.5702242851257324,
"step": 2303
},
{
"epoch": 2.8374384236453203,
"grad_norm": 13.611203107193855,
"learning_rate": 9.056211160897555e-08,
"loss": 0.7004730105400085,
"step": 2304
},
{
"epoch": 2.8386699507389164,
"grad_norm": 10.23772277567979,
"learning_rate": 8.920944400746589e-08,
"loss": 0.29311710596084595,
"step": 2305
},
{
"epoch": 2.8399014778325125,
"grad_norm": 7.167372063418741,
"learning_rate": 8.786686359089747e-08,
"loss": 0.18041157722473145,
"step": 2306
},
{
"epoch": 2.8411330049261085,
"grad_norm": 8.672887051600437,
"learning_rate": 8.653437311704648e-08,
"loss": 0.2873387634754181,
"step": 2307
},
{
"epoch": 2.8423645320197046,
"grad_norm": 9.699021546064241,
"learning_rate": 8.521197532296188e-08,
"loss": 0.23781178891658783,
"step": 2308
},
{
"epoch": 2.8435960591133007,
"grad_norm": 11.643059711853965,
"learning_rate": 8.38996729249636e-08,
"loss": 0.5913131833076477,
"step": 2309
},
{
"epoch": 2.844827586206897,
"grad_norm": 12.799008291574818,
"learning_rate": 8.259746861863094e-08,
"loss": 0.9139914512634277,
"step": 2310
},
{
"epoch": 2.846059113300493,
"grad_norm": 10.980579183559623,
"learning_rate": 8.130536507880538e-08,
"loss": 0.22883841395378113,
"step": 2311
},
{
"epoch": 2.8472906403940885,
"grad_norm": 9.488904590414009,
"learning_rate": 8.002336495957664e-08,
"loss": 0.6467199325561523,
"step": 2312
},
{
"epoch": 2.8485221674876846,
"grad_norm": 17.044793614561804,
"learning_rate": 7.875147089428436e-08,
"loss": 0.48100385069847107,
"step": 2313
},
{
"epoch": 2.8497536945812807,
"grad_norm": 6.232324566569768,
"learning_rate": 7.748968549550761e-08,
"loss": 0.22535499930381775,
"step": 2314
},
{
"epoch": 2.850985221674877,
"grad_norm": 16.357795976490426,
"learning_rate": 7.623801135506148e-08,
"loss": 0.7971012592315674,
"step": 2315
},
{
"epoch": 2.852216748768473,
"grad_norm": 10.56546293503534,
"learning_rate": 7.499645104399156e-08,
"loss": 0.6965846419334412,
"step": 2316
},
{
"epoch": 2.853448275862069,
"grad_norm": 10.699552582949096,
"learning_rate": 7.376500711257062e-08,
"loss": 0.2827698588371277,
"step": 2317
},
{
"epoch": 2.854679802955665,
"grad_norm": 11.75504997847818,
"learning_rate": 7.254368209028862e-08,
"loss": 0.4453064203262329,
"step": 2318
},
{
"epoch": 2.855911330049261,
"grad_norm": 10.373311779049724,
"learning_rate": 7.133247848585268e-08,
"loss": 0.5363994836807251,
"step": 2319
},
{
"epoch": 2.857142857142857,
"grad_norm": 10.742091428994968,
"learning_rate": 7.013139878717934e-08,
"loss": 0.33071067929267883,
"step": 2320
},
{
"epoch": 2.8583743842364533,
"grad_norm": 10.02135718464731,
"learning_rate": 6.894044546138845e-08,
"loss": 0.6118582487106323,
"step": 2321
},
{
"epoch": 2.8596059113300494,
"grad_norm": 11.952226631897975,
"learning_rate": 6.775962095480037e-08,
"loss": 0.4941851496696472,
"step": 2322
},
{
"epoch": 2.8608374384236455,
"grad_norm": 12.467253293652027,
"learning_rate": 6.65889276929299e-08,
"loss": 0.9043294191360474,
"step": 2323
},
{
"epoch": 2.862068965517241,
"grad_norm": 9.372107033246923,
"learning_rate": 6.542836808048181e-08,
"loss": 0.5352662801742554,
"step": 2324
},
{
"epoch": 2.863300492610837,
"grad_norm": 13.465637997675985,
"learning_rate": 6.427794450134529e-08,
"loss": 0.622706413269043,
"step": 2325
},
{
"epoch": 2.8645320197044333,
"grad_norm": 10.951531479275452,
"learning_rate": 6.313765931858785e-08,
"loss": 0.32065168023109436,
"step": 2326
},
{
"epoch": 2.8657635467980294,
"grad_norm": 11.940905797523131,
"learning_rate": 6.200751487445367e-08,
"loss": 0.5308477878570557,
"step": 2327
},
{
"epoch": 2.8669950738916254,
"grad_norm": 12.032315008603385,
"learning_rate": 6.088751349035693e-08,
"loss": 0.4006965756416321,
"step": 2328
},
{
"epoch": 2.8682266009852215,
"grad_norm": 14.936202143915887,
"learning_rate": 5.977765746687569e-08,
"loss": 0.29346001148223877,
"step": 2329
},
{
"epoch": 2.8694581280788176,
"grad_norm": 12.39243720991369,
"learning_rate": 5.8677949083749686e-08,
"loss": 0.17921757698059082,
"step": 2330
},
{
"epoch": 2.8706896551724137,
"grad_norm": 9.58038552158238,
"learning_rate": 5.758839059987531e-08,
"loss": 0.3909390866756439,
"step": 2331
},
{
"epoch": 2.87192118226601,
"grad_norm": 15.9782663440221,
"learning_rate": 5.650898425329676e-08,
"loss": 0.2947097420692444,
"step": 2332
},
{
"epoch": 2.873152709359606,
"grad_norm": 10.207214673211949,
"learning_rate": 5.5439732261209356e-08,
"loss": 0.27580755949020386,
"step": 2333
},
{
"epoch": 2.874384236453202,
"grad_norm": 10.944513423861029,
"learning_rate": 5.438063681994732e-08,
"loss": 0.5352618098258972,
"step": 2334
},
{
"epoch": 2.875615763546798,
"grad_norm": 11.026909219005717,
"learning_rate": 5.333170010498434e-08,
"loss": 0.4425346553325653,
"step": 2335
},
{
"epoch": 2.876847290640394,
"grad_norm": 10.718057032304046,
"learning_rate": 5.229292427092525e-08,
"loss": 0.3107433319091797,
"step": 2336
},
{
"epoch": 2.87807881773399,
"grad_norm": 12.247326551233483,
"learning_rate": 5.126431145150546e-08,
"loss": 0.8459264039993286,
"step": 2337
},
{
"epoch": 2.8793103448275863,
"grad_norm": 9.9858024833323,
"learning_rate": 5.024586375958429e-08,
"loss": 0.6122205257415771,
"step": 2338
},
{
"epoch": 2.8805418719211824,
"grad_norm": 8.326107009918898,
"learning_rate": 4.9237583287139454e-08,
"loss": 0.28234463930130005,
"step": 2339
},
{
"epoch": 2.8817733990147785,
"grad_norm": 9.707118891697133,
"learning_rate": 4.823947210526647e-08,
"loss": 0.26258403062820435,
"step": 2340
},
{
"epoch": 2.8830049261083746,
"grad_norm": 11.37690573459154,
"learning_rate": 4.72515322641709e-08,
"loss": 0.16676993668079376,
"step": 2341
},
{
"epoch": 2.8842364532019706,
"grad_norm": 10.744107147683183,
"learning_rate": 4.627376579316667e-08,
"loss": 0.5982980132102966,
"step": 2342
},
{
"epoch": 2.8854679802955667,
"grad_norm": 11.814730049244856,
"learning_rate": 4.530617470066834e-08,
"loss": 0.3576871156692505,
"step": 2343
},
{
"epoch": 2.886699507389163,
"grad_norm": 7.558098865292991,
"learning_rate": 4.4348760974192715e-08,
"loss": 0.22213858366012573,
"step": 2344
},
{
"epoch": 2.887931034482759,
"grad_norm": 31.227769055767126,
"learning_rate": 4.340152658034835e-08,
"loss": 0.7075624465942383,
"step": 2345
},
{
"epoch": 2.8891625615763545,
"grad_norm": 13.602269942674353,
"learning_rate": 4.246447346483662e-08,
"loss": 0.35476282238960266,
"step": 2346
},
{
"epoch": 2.8903940886699506,
"grad_norm": 11.66167288478714,
"learning_rate": 4.153760355244507e-08,
"loss": 0.4569534659385681,
"step": 2347
},
{
"epoch": 2.8916256157635467,
"grad_norm": 12.232619433370953,
"learning_rate": 4.062091874704355e-08,
"loss": 0.8425757884979248,
"step": 2348
},
{
"epoch": 2.892857142857143,
"grad_norm": 15.584381566055246,
"learning_rate": 3.971442093158195e-08,
"loss": 0.6543349623680115,
"step": 2349
},
{
"epoch": 2.894088669950739,
"grad_norm": 12.232909525407603,
"learning_rate": 3.8818111968083607e-08,
"loss": 0.4949587285518646,
"step": 2350
},
{
"epoch": 2.895320197044335,
"grad_norm": 28.009977519758436,
"learning_rate": 3.7931993697644664e-08,
"loss": 1.0205111503601074,
"step": 2351
},
{
"epoch": 2.896551724137931,
"grad_norm": 8.083430035021566,
"learning_rate": 3.7056067940427484e-08,
"loss": 0.429599404335022,
"step": 2352
},
{
"epoch": 2.897783251231527,
"grad_norm": 11.304307823971973,
"learning_rate": 3.6190336495659504e-08,
"loss": 0.6471319198608398,
"step": 2353
},
{
"epoch": 2.899014778325123,
"grad_norm": 11.052274245265034,
"learning_rate": 3.533480114162713e-08,
"loss": 0.6227458715438843,
"step": 2354
},
{
"epoch": 2.9002463054187193,
"grad_norm": 10.145305358695179,
"learning_rate": 3.448946363567296e-08,
"loss": 0.35620149970054626,
"step": 2355
},
{
"epoch": 2.9014778325123154,
"grad_norm": 9.735362530555188,
"learning_rate": 3.365432571419247e-08,
"loss": 0.41157659888267517,
"step": 2356
},
{
"epoch": 2.9027093596059115,
"grad_norm": 16.113614254695477,
"learning_rate": 3.282938909263122e-08,
"loss": 0.39660418033599854,
"step": 2357
},
{
"epoch": 2.903940886699507,
"grad_norm": 12.303598539070832,
"learning_rate": 3.201465546547988e-08,
"loss": 0.37891146540641785,
"step": 2358
},
{
"epoch": 2.905172413793103,
"grad_norm": 11.49013243084427,
"learning_rate": 3.121012650627031e-08,
"loss": 0.4459425210952759,
"step": 2359
},
{
"epoch": 2.9064039408866993,
"grad_norm": 12.062068468114942,
"learning_rate": 3.041580386757448e-08,
"loss": 0.4933587610721588,
"step": 2360
},
{
"epoch": 2.9076354679802954,
"grad_norm": 7.691939807180967,
"learning_rate": 2.9631689180999457e-08,
"loss": 0.16229723393917084,
"step": 2361
},
{
"epoch": 2.9088669950738915,
"grad_norm": 11.649633348013484,
"learning_rate": 2.885778405718409e-08,
"loss": 0.4784936308860779,
"step": 2362
},
{
"epoch": 2.9100985221674875,
"grad_norm": 20.64984541908695,
"learning_rate": 2.8094090085795112e-08,
"loss": 0.6622560620307922,
"step": 2363
},
{
"epoch": 2.9113300492610836,
"grad_norm": 9.783513206502265,
"learning_rate": 2.7340608835526584e-08,
"loss": 0.3672278821468353,
"step": 2364
},
{
"epoch": 2.9125615763546797,
"grad_norm": 6.04349473256102,
"learning_rate": 2.6597341854092685e-08,
"loss": 0.3247770667076111,
"step": 2365
},
{
"epoch": 2.913793103448276,
"grad_norm": 11.650085297412613,
"learning_rate": 2.586429066822771e-08,
"loss": 0.3467229902744293,
"step": 2366
},
{
"epoch": 2.915024630541872,
"grad_norm": 11.842612737683362,
"learning_rate": 2.514145678368163e-08,
"loss": 0.6725019812583923,
"step": 2367
},
{
"epoch": 2.916256157635468,
"grad_norm": 8.454338307427385,
"learning_rate": 2.4428841685217863e-08,
"loss": 0.6760755777359009,
"step": 2368
},
{
"epoch": 2.917487684729064,
"grad_norm": 13.555178809367312,
"learning_rate": 2.3726446836608298e-08,
"loss": 0.5354422330856323,
"step": 2369
},
{
"epoch": 2.91871921182266,
"grad_norm": 11.004737348047312,
"learning_rate": 2.3034273680632157e-08,
"loss": 0.3656280040740967,
"step": 2370
},
{
"epoch": 2.9199507389162562,
"grad_norm": 9.99595612427158,
"learning_rate": 2.235232363907269e-08,
"loss": 0.28186920285224915,
"step": 2371
},
{
"epoch": 2.9211822660098523,
"grad_norm": 16.789031513751276,
"learning_rate": 2.168059811271439e-08,
"loss": 0.31556010246276855,
"step": 2372
},
{
"epoch": 2.9224137931034484,
"grad_norm": 7.870447962098653,
"learning_rate": 2.101909848133743e-08,
"loss": 0.33978280425071716,
"step": 2373
},
{
"epoch": 2.9236453201970445,
"grad_norm": 13.322556254888749,
"learning_rate": 2.0367826103720457e-08,
"loss": 0.5645813941955566,
"step": 2374
},
{
"epoch": 2.9248768472906406,
"grad_norm": 6.936377752521131,
"learning_rate": 1.9726782317632255e-08,
"loss": 0.21976767480373383,
"step": 2375
},
{
"epoch": 2.9261083743842367,
"grad_norm": 16.201679118604396,
"learning_rate": 1.9095968439830637e-08,
"loss": 0.6068276166915894,
"step": 2376
},
{
"epoch": 2.9273399014778327,
"grad_norm": 10.683769815067068,
"learning_rate": 1.8475385766063002e-08,
"loss": 0.2844882607460022,
"step": 2377
},
{
"epoch": 2.928571428571429,
"grad_norm": 22.182288301690132,
"learning_rate": 1.786503557105912e-08,
"loss": 1.1885827779769897,
"step": 2378
},
{
"epoch": 2.9298029556650245,
"grad_norm": 8.221573464179809,
"learning_rate": 1.7264919108529455e-08,
"loss": 0.4241114854812622,
"step": 2379
},
{
"epoch": 2.9310344827586206,
"grad_norm": 10.23479597630979,
"learning_rate": 1.6675037611165735e-08,
"loss": 0.9062713980674744,
"step": 2380
},
{
"epoch": 2.9322660098522166,
"grad_norm": 9.83143734077978,
"learning_rate": 1.6095392290635393e-08,
"loss": 0.29996055364608765,
"step": 2381
},
{
"epoch": 2.9334975369458127,
"grad_norm": 9.191744534619497,
"learning_rate": 1.552598433757879e-08,
"loss": 0.3901692032814026,
"step": 2382
},
{
"epoch": 2.934729064039409,
"grad_norm": 10.314975796862411,
"learning_rate": 1.4966814921608674e-08,
"loss": 0.36974531412124634,
"step": 2383
},
{
"epoch": 2.935960591133005,
"grad_norm": 10.965587726479475,
"learning_rate": 1.441788519130738e-08,
"loss": 0.2913818359375,
"step": 2384
},
{
"epoch": 2.937192118226601,
"grad_norm": 26.225721932440074,
"learning_rate": 1.3879196274224626e-08,
"loss": 2.8897290229797363,
"step": 2385
},
{
"epoch": 2.938423645320197,
"grad_norm": 16.567199226805975,
"learning_rate": 1.335074927687141e-08,
"loss": 0.7396224141120911,
"step": 2386
},
{
"epoch": 2.939655172413793,
"grad_norm": 10.384159480919202,
"learning_rate": 1.2832545284724995e-08,
"loss": 0.2923913896083832,
"step": 2387
},
{
"epoch": 2.9408866995073892,
"grad_norm": 12.315507900916186,
"learning_rate": 1.2324585362220032e-08,
"loss": 0.60726398229599,
"step": 2388
},
{
"epoch": 2.9421182266009853,
"grad_norm": 10.077538225946919,
"learning_rate": 1.1826870552749669e-08,
"loss": 0.3081626892089844,
"step": 2389
},
{
"epoch": 2.9433497536945814,
"grad_norm": 15.192636407836343,
"learning_rate": 1.1339401878663337e-08,
"loss": 0.7774905562400818,
"step": 2390
},
{
"epoch": 2.9445812807881775,
"grad_norm": 12.649581445218459,
"learning_rate": 1.0862180341263962e-08,
"loss": 0.5568622350692749,
"step": 2391
},
{
"epoch": 2.945812807881773,
"grad_norm": 11.4557765341612,
"learning_rate": 1.039520692080409e-08,
"loss": 0.42753443121910095,
"step": 2392
},
{
"epoch": 2.947044334975369,
"grad_norm": 12.049826060673517,
"learning_rate": 9.938482576487551e-09,
"loss": 0.33313125371932983,
"step": 2393
},
{
"epoch": 2.9482758620689653,
"grad_norm": 11.358169603413613,
"learning_rate": 9.492008246466122e-09,
"loss": 0.4345099925994873,
"step": 2394
},
{
"epoch": 2.9495073891625614,
"grad_norm": 15.061185553672066,
"learning_rate": 9.055784847836202e-09,
"loss": 0.6844139695167542,
"step": 2395
},
{
"epoch": 2.9507389162561575,
"grad_norm": 12.25434358933355,
"learning_rate": 8.629813276637144e-09,
"loss": 0.4944530725479126,
"step": 2396
},
{
"epoch": 2.9519704433497536,
"grad_norm": 7.240836775147592,
"learning_rate": 8.214094407851814e-09,
"loss": 0.1517336368560791,
"step": 2397
},
{
"epoch": 2.9532019704433496,
"grad_norm": 11.570980194113849,
"learning_rate": 7.808629095402697e-09,
"loss": 0.24804279208183289,
"step": 2398
},
{
"epoch": 2.9544334975369457,
"grad_norm": 15.785024108321435,
"learning_rate": 7.413418172149689e-09,
"loss": 1.2773240804672241,
"step": 2399
},
{
"epoch": 2.955665024630542,
"grad_norm": 12.516388230034497,
"learning_rate": 7.028462449889528e-09,
"loss": 0.20905320346355438,
"step": 2400
},
{
"epoch": 2.956896551724138,
"grad_norm": 6.362652358430743,
"learning_rate": 6.6537627193558055e-09,
"loss": 0.24830211699008942,
"step": 2401
},
{
"epoch": 2.958128078817734,
"grad_norm": 9.391013644944394,
"learning_rate": 6.289319750212852e-09,
"loss": 0.30148234963417053,
"step": 2402
},
{
"epoch": 2.95935960591133,
"grad_norm": 11.036169214095409,
"learning_rate": 5.93513429105741e-09,
"loss": 0.7273882031440735,
"step": 2403
},
{
"epoch": 2.960591133004926,
"grad_norm": 10.956019864515577,
"learning_rate": 5.591207069417515e-09,
"loss": 0.4958484172821045,
"step": 2404
},
{
"epoch": 2.9618226600985222,
"grad_norm": 13.272684139309336,
"learning_rate": 5.257538791749173e-09,
"loss": 0.5852301120758057,
"step": 2405
},
{
"epoch": 2.9630541871921183,
"grad_norm": 15.300683310135565,
"learning_rate": 4.934130143435245e-09,
"loss": 0.5483534336090088,
"step": 2406
},
{
"epoch": 2.9642857142857144,
"grad_norm": 9.624016617554009,
"learning_rate": 4.6209817887848955e-09,
"loss": 0.49854928255081177,
"step": 2407
},
{
"epoch": 2.9655172413793105,
"grad_norm": 8.615173379839112,
"learning_rate": 4.318094371031922e-09,
"loss": 0.9770829677581787,
"step": 2408
},
{
"epoch": 2.9667487684729066,
"grad_norm": 15.370084776473758,
"learning_rate": 4.025468512333098e-09,
"loss": 0.4265647530555725,
"step": 2409
},
{
"epoch": 2.9679802955665027,
"grad_norm": 12.632393723486729,
"learning_rate": 3.743104813767051e-09,
"loss": 0.6890873908996582,
"step": 2410
},
{
"epoch": 2.9692118226600988,
"grad_norm": 8.772985107195037,
"learning_rate": 3.471003855332611e-09,
"loss": 0.28604504466056824,
"step": 2411
},
{
"epoch": 2.970443349753695,
"grad_norm": 9.587235477416659,
"learning_rate": 3.2091661959487986e-09,
"loss": 0.3280025124549866,
"step": 2412
},
{
"epoch": 2.9716748768472905,
"grad_norm": 9.74052346916064,
"learning_rate": 2.9575923734520562e-09,
"loss": 0.23375985026359558,
"step": 2413
},
{
"epoch": 2.9729064039408866,
"grad_norm": 14.377712378651319,
"learning_rate": 2.7162829045979113e-09,
"loss": 0.5062013864517212,
"step": 2414
},
{
"epoch": 2.9741379310344827,
"grad_norm": 10.486023439825937,
"learning_rate": 2.4852382850554245e-09,
"loss": 0.46517398953437805,
"step": 2415
},
{
"epoch": 2.9753694581280787,
"grad_norm": 7.705201332847603,
"learning_rate": 2.264458989410523e-09,
"loss": 0.43281105160713196,
"step": 2416
},
{
"epoch": 2.976600985221675,
"grad_norm": 9.481633319521942,
"learning_rate": 2.0539454711626663e-09,
"loss": 0.6278485655784607,
"step": 2417
},
{
"epoch": 2.977832512315271,
"grad_norm": 12.691647261969463,
"learning_rate": 1.8536981627254036e-09,
"loss": 0.3320518136024475,
"step": 2418
},
{
"epoch": 2.979064039408867,
"grad_norm": 9.582038617142,
"learning_rate": 1.6637174754230435e-09,
"loss": 0.4568738341331482,
"step": 2419
},
{
"epoch": 2.980295566502463,
"grad_norm": 10.563009615677867,
"learning_rate": 1.4840037994923173e-09,
"loss": 0.24025380611419678,
"step": 2420
},
{
"epoch": 2.981527093596059,
"grad_norm": 14.650292148384931,
"learning_rate": 1.3145575040801605e-09,
"loss": 0.33217573165893555,
"step": 2421
},
{
"epoch": 2.9827586206896552,
"grad_norm": 23.286828169967034,
"learning_rate": 1.1553789372453771e-09,
"loss": 1.5295354127883911,
"step": 2422
},
{
"epoch": 2.9839901477832513,
"grad_norm": 16.800662700378666,
"learning_rate": 1.0064684259525337e-09,
"loss": 0.6207250952720642,
"step": 2423
},
{
"epoch": 2.9852216748768474,
"grad_norm": 20.655163645870832,
"learning_rate": 8.678262760775102e-10,
"loss": 0.4011062681674957,
"step": 2424
},
{
"epoch": 2.9864532019704435,
"grad_norm": 12.812116716093689,
"learning_rate": 7.394527724030598e-10,
"loss": 0.8355351090431213,
"step": 2425
},
{
"epoch": 2.987684729064039,
"grad_norm": 13.524667045497342,
"learning_rate": 6.213481786199182e-10,
"loss": 0.6552157998085022,
"step": 2426
},
{
"epoch": 2.9889162561576352,
"grad_norm": 9.071239617590464,
"learning_rate": 5.13512737324029e-10,
"loss": 0.4416411519050598,
"step": 2427
},
{
"epoch": 2.9901477832512313,
"grad_norm": 12.103653519709662,
"learning_rate": 4.159466700187631e-10,
"loss": 0.3720128834247589,
"step": 2428
},
{
"epoch": 2.9913793103448274,
"grad_norm": 7.981239501743612,
"learning_rate": 3.2865017711380955e-10,
"loss": 0.6710848212242126,
"step": 2429
},
{
"epoch": 2.9926108374384235,
"grad_norm": 11.769326063023964,
"learning_rate": 2.516234379235094e-10,
"loss": 0.7640970349311829,
"step": 2430
},
{
"epoch": 2.9938423645320196,
"grad_norm": 11.664052062324599,
"learning_rate": 1.848666106674113e-10,
"loss": 0.5783921480178833,
"step": 2431
},
{
"epoch": 2.9950738916256157,
"grad_norm": 11.283478806003906,
"learning_rate": 1.2837983246916098e-10,
"loss": 0.411626935005188,
"step": 2432
},
{
"epoch": 2.9963054187192117,
"grad_norm": 11.703360380276939,
"learning_rate": 8.216321935816673e-11,
"loss": 0.529446005821228,
"step": 2433
},
{
"epoch": 2.997536945812808,
"grad_norm": 9.632699414961296,
"learning_rate": 4.6216866266823867e-11,
"loss": 0.44549500942230225,
"step": 2434
},
{
"epoch": 2.998768472906404,
"grad_norm": 9.699682514575105,
"learning_rate": 2.0540847032179955e-11,
"loss": 0.2854122519493103,
"step": 2435
},
{
"epoch": 3.0,
"grad_norm": 6.925750902905979,
"learning_rate": 5.135214394824672e-12,
"loss": 0.4455873966217041,
"step": 2436
},
{
"epoch": 3.0,
"step": 2436,
"total_flos": 6456127242240.0,
"train_loss": 1.6602046456561104,
"train_runtime": 2865.3381,
"train_samples_per_second": 3.4,
"train_steps_per_second": 0.85
}
],
"logging_steps": 1,
"max_steps": 2436,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6456127242240.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}