Files
llama3-8b-full-pretrain-was…/trainer_state.json
ModelHub XC 49edaca6a4 初始化项目,由ModelHub XC社区提供模型
Model: shuoxing/llama3-8b-full-pretrain-wash-c4-2-4m-bs4
Source: Original Platform
2026-04-20 23:54:05 +08:00

26630 lines
687 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 3798,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007898894154818325,
"grad_norm": 40.71173454733523,
"learning_rate": 0.0,
"loss": 3.673168182373047,
"step": 1
},
{
"epoch": 0.001579778830963665,
"grad_norm": 33.74250956215495,
"learning_rate": 2.631578947368421e-08,
"loss": 4.129596710205078,
"step": 2
},
{
"epoch": 0.002369668246445498,
"grad_norm": 36.11546455389956,
"learning_rate": 5.263157894736842e-08,
"loss": 4.184542655944824,
"step": 3
},
{
"epoch": 0.00315955766192733,
"grad_norm": 35.383571366831234,
"learning_rate": 7.894736842105264e-08,
"loss": 4.1142401695251465,
"step": 4
},
{
"epoch": 0.0039494470774091624,
"grad_norm": 34.37559125944977,
"learning_rate": 1.0526315789473685e-07,
"loss": 3.7817375659942627,
"step": 5
},
{
"epoch": 0.004739336492890996,
"grad_norm": 36.14685215620937,
"learning_rate": 1.3157894736842107e-07,
"loss": 4.114397048950195,
"step": 6
},
{
"epoch": 0.005529225908372828,
"grad_norm": 41.482542925342734,
"learning_rate": 1.5789473684210527e-07,
"loss": 4.3803019523620605,
"step": 7
},
{
"epoch": 0.00631911532385466,
"grad_norm": 33.58108993777,
"learning_rate": 1.8421052631578948e-07,
"loss": 4.10243558883667,
"step": 8
},
{
"epoch": 0.0071090047393364926,
"grad_norm": 39.33271362667731,
"learning_rate": 2.105263157894737e-07,
"loss": 3.9596402645111084,
"step": 9
},
{
"epoch": 0.007898894154818325,
"grad_norm": 50.03237601031588,
"learning_rate": 2.3684210526315792e-07,
"loss": 4.745785713195801,
"step": 10
},
{
"epoch": 0.008688783570300158,
"grad_norm": 35.67111148226347,
"learning_rate": 2.6315789473684213e-07,
"loss": 3.8755834102630615,
"step": 11
},
{
"epoch": 0.009478672985781991,
"grad_norm": 32.70142008970876,
"learning_rate": 2.894736842105263e-07,
"loss": 4.783450603485107,
"step": 12
},
{
"epoch": 0.010268562401263823,
"grad_norm": 52.049082104276714,
"learning_rate": 3.1578947368421055e-07,
"loss": 4.5102057456970215,
"step": 13
},
{
"epoch": 0.011058451816745656,
"grad_norm": 29.064967370756015,
"learning_rate": 3.421052631578948e-07,
"loss": 3.072397470474243,
"step": 14
},
{
"epoch": 0.011848341232227487,
"grad_norm": 22.10944679220599,
"learning_rate": 3.6842105263157896e-07,
"loss": 2.7109313011169434,
"step": 15
},
{
"epoch": 0.01263823064770932,
"grad_norm": 40.53408902245728,
"learning_rate": 3.9473684210526315e-07,
"loss": 4.520854949951172,
"step": 16
},
{
"epoch": 0.013428120063191154,
"grad_norm": 36.60530673657224,
"learning_rate": 4.210526315789474e-07,
"loss": 4.224055290222168,
"step": 17
},
{
"epoch": 0.014218009478672985,
"grad_norm": 33.336265071789754,
"learning_rate": 4.473684210526316e-07,
"loss": 4.239147186279297,
"step": 18
},
{
"epoch": 0.015007898894154818,
"grad_norm": 43.94432106792913,
"learning_rate": 4.7368421052631585e-07,
"loss": 4.043759346008301,
"step": 19
},
{
"epoch": 0.01579778830963665,
"grad_norm": 33.99185039525682,
"learning_rate": 5.000000000000001e-07,
"loss": 4.236106872558594,
"step": 20
},
{
"epoch": 0.016587677725118485,
"grad_norm": 38.29448839626639,
"learning_rate": 5.263157894736843e-07,
"loss": 4.180179595947266,
"step": 21
},
{
"epoch": 0.017377567140600316,
"grad_norm": 30.15666884397043,
"learning_rate": 5.526315789473684e-07,
"loss": 3.893503189086914,
"step": 22
},
{
"epoch": 0.018167456556082148,
"grad_norm": 36.85491373097027,
"learning_rate": 5.789473684210526e-07,
"loss": 4.568385124206543,
"step": 23
},
{
"epoch": 0.018957345971563982,
"grad_norm": 32.16948864627544,
"learning_rate": 6.052631578947369e-07,
"loss": 3.1852474212646484,
"step": 24
},
{
"epoch": 0.019747235387045814,
"grad_norm": 40.73483389215707,
"learning_rate": 6.315789473684211e-07,
"loss": 5.036445617675781,
"step": 25
},
{
"epoch": 0.020537124802527645,
"grad_norm": 37.103608164895796,
"learning_rate": 6.578947368421053e-07,
"loss": 4.371845245361328,
"step": 26
},
{
"epoch": 0.02132701421800948,
"grad_norm": 24.05172035473979,
"learning_rate": 6.842105263157896e-07,
"loss": 3.7923712730407715,
"step": 27
},
{
"epoch": 0.022116903633491312,
"grad_norm": 35.28854412414826,
"learning_rate": 7.105263157894737e-07,
"loss": 4.174007415771484,
"step": 28
},
{
"epoch": 0.022906793048973143,
"grad_norm": 18.614769000709014,
"learning_rate": 7.368421052631579e-07,
"loss": 2.7547430992126465,
"step": 29
},
{
"epoch": 0.023696682464454975,
"grad_norm": 28.426674964913303,
"learning_rate": 7.631578947368422e-07,
"loss": 4.140813827514648,
"step": 30
},
{
"epoch": 0.02448657187993681,
"grad_norm": 22.844379088367365,
"learning_rate": 7.894736842105263e-07,
"loss": 3.9789390563964844,
"step": 31
},
{
"epoch": 0.02527646129541864,
"grad_norm": 16.324910957553573,
"learning_rate": 8.157894736842106e-07,
"loss": 3.408233165740967,
"step": 32
},
{
"epoch": 0.026066350710900472,
"grad_norm": 17.84036406675527,
"learning_rate": 8.421052631578948e-07,
"loss": 3.152615547180176,
"step": 33
},
{
"epoch": 0.026856240126382307,
"grad_norm": 37.04828879151497,
"learning_rate": 8.68421052631579e-07,
"loss": 4.3278489112854,
"step": 34
},
{
"epoch": 0.02764612954186414,
"grad_norm": 23.57795025738247,
"learning_rate": 8.947368421052632e-07,
"loss": 3.982863426208496,
"step": 35
},
{
"epoch": 0.02843601895734597,
"grad_norm": 21.626842144038697,
"learning_rate": 9.210526315789474e-07,
"loss": 3.7231547832489014,
"step": 36
},
{
"epoch": 0.029225908372827805,
"grad_norm": 16.16330549232114,
"learning_rate": 9.473684210526317e-07,
"loss": 3.6238391399383545,
"step": 37
},
{
"epoch": 0.030015797788309637,
"grad_norm": 19.328825464830587,
"learning_rate": 9.736842105263158e-07,
"loss": 3.8130602836608887,
"step": 38
},
{
"epoch": 0.030805687203791468,
"grad_norm": 13.0720498885057,
"learning_rate": 1.0000000000000002e-06,
"loss": 3.765305519104004,
"step": 39
},
{
"epoch": 0.0315955766192733,
"grad_norm": 20.81732653658493,
"learning_rate": 1.0263157894736843e-06,
"loss": 3.808793783187866,
"step": 40
},
{
"epoch": 0.03238546603475513,
"grad_norm": 16.11869827988803,
"learning_rate": 1.0526315789473685e-06,
"loss": 3.3212504386901855,
"step": 41
},
{
"epoch": 0.03317535545023697,
"grad_norm": 19.85916750032169,
"learning_rate": 1.0789473684210527e-06,
"loss": 3.4793872833251953,
"step": 42
},
{
"epoch": 0.0339652448657188,
"grad_norm": 14.5384162049997,
"learning_rate": 1.1052631578947369e-06,
"loss": 3.4185662269592285,
"step": 43
},
{
"epoch": 0.03475513428120063,
"grad_norm": 13.037105709248864,
"learning_rate": 1.1315789473684213e-06,
"loss": 3.05875563621521,
"step": 44
},
{
"epoch": 0.035545023696682464,
"grad_norm": 28.094177440990883,
"learning_rate": 1.1578947368421053e-06,
"loss": 4.200315475463867,
"step": 45
},
{
"epoch": 0.036334913112164295,
"grad_norm": 15.298203417574237,
"learning_rate": 1.1842105263157894e-06,
"loss": 3.230691909790039,
"step": 46
},
{
"epoch": 0.03712480252764613,
"grad_norm": 20.703338617496122,
"learning_rate": 1.2105263157894738e-06,
"loss": 3.226253032684326,
"step": 47
},
{
"epoch": 0.037914691943127965,
"grad_norm": 19.766902333430476,
"learning_rate": 1.236842105263158e-06,
"loss": 3.6443753242492676,
"step": 48
},
{
"epoch": 0.038704581358609796,
"grad_norm": 12.540974533970843,
"learning_rate": 1.2631578947368422e-06,
"loss": 3.3909823894500732,
"step": 49
},
{
"epoch": 0.03949447077409163,
"grad_norm": 8.67596125382628,
"learning_rate": 1.2894736842105266e-06,
"loss": 3.0414187908172607,
"step": 50
},
{
"epoch": 0.04028436018957346,
"grad_norm": 14.600880901536001,
"learning_rate": 1.3157894736842106e-06,
"loss": 3.7503838539123535,
"step": 51
},
{
"epoch": 0.04107424960505529,
"grad_norm": 17.076909526004943,
"learning_rate": 1.342105263157895e-06,
"loss": 3.5003442764282227,
"step": 52
},
{
"epoch": 0.04186413902053712,
"grad_norm": 9.236759947184684,
"learning_rate": 1.3684210526315791e-06,
"loss": 2.874293088912964,
"step": 53
},
{
"epoch": 0.04265402843601896,
"grad_norm": 11.84849423549626,
"learning_rate": 1.394736842105263e-06,
"loss": 3.1183371543884277,
"step": 54
},
{
"epoch": 0.04344391785150079,
"grad_norm": 12.90432487886042,
"learning_rate": 1.4210526315789475e-06,
"loss": 3.250241756439209,
"step": 55
},
{
"epoch": 0.044233807266982623,
"grad_norm": 16.869043357282678,
"learning_rate": 1.4473684210526317e-06,
"loss": 3.1247291564941406,
"step": 56
},
{
"epoch": 0.045023696682464455,
"grad_norm": 9.282453097796106,
"learning_rate": 1.4736842105263159e-06,
"loss": 2.924187421798706,
"step": 57
},
{
"epoch": 0.045813586097946286,
"grad_norm": 8.648636973287344,
"learning_rate": 1.5e-06,
"loss": 3.1763153076171875,
"step": 58
},
{
"epoch": 0.04660347551342812,
"grad_norm": 8.629843477816483,
"learning_rate": 1.5263157894736844e-06,
"loss": 3.0944461822509766,
"step": 59
},
{
"epoch": 0.04739336492890995,
"grad_norm": 52.54991212893474,
"learning_rate": 1.5526315789473686e-06,
"loss": 2.84696888923645,
"step": 60
},
{
"epoch": 0.04818325434439179,
"grad_norm": 10.192033588061753,
"learning_rate": 1.5789473684210526e-06,
"loss": 3.2654457092285156,
"step": 61
},
{
"epoch": 0.04897314375987362,
"grad_norm": 19.884719085034156,
"learning_rate": 1.605263157894737e-06,
"loss": 3.7629897594451904,
"step": 62
},
{
"epoch": 0.04976303317535545,
"grad_norm": 12.313693728331911,
"learning_rate": 1.6315789473684212e-06,
"loss": 3.5302507877349854,
"step": 63
},
{
"epoch": 0.05055292259083728,
"grad_norm": 17.01311845510386,
"learning_rate": 1.6578947368421053e-06,
"loss": 3.5614633560180664,
"step": 64
},
{
"epoch": 0.05134281200631911,
"grad_norm": 16.95840885870688,
"learning_rate": 1.6842105263157895e-06,
"loss": 3.264988660812378,
"step": 65
},
{
"epoch": 0.052132701421800945,
"grad_norm": 14.308550297948006,
"learning_rate": 1.710526315789474e-06,
"loss": 3.265235424041748,
"step": 66
},
{
"epoch": 0.05292259083728278,
"grad_norm": 14.519207172660034,
"learning_rate": 1.736842105263158e-06,
"loss": 3.188286542892456,
"step": 67
},
{
"epoch": 0.053712480252764615,
"grad_norm": 17.603652475188834,
"learning_rate": 1.7631578947368423e-06,
"loss": 2.9039247035980225,
"step": 68
},
{
"epoch": 0.054502369668246446,
"grad_norm": 11.285868937730449,
"learning_rate": 1.7894736842105265e-06,
"loss": 3.108914375305176,
"step": 69
},
{
"epoch": 0.05529225908372828,
"grad_norm": 9.21536421673824,
"learning_rate": 1.8157894736842109e-06,
"loss": 2.8382675647735596,
"step": 70
},
{
"epoch": 0.05608214849921011,
"grad_norm": 13.008599403705528,
"learning_rate": 1.8421052631578948e-06,
"loss": 3.444577217102051,
"step": 71
},
{
"epoch": 0.05687203791469194,
"grad_norm": 18.071771458815004,
"learning_rate": 1.868421052631579e-06,
"loss": 3.67020320892334,
"step": 72
},
{
"epoch": 0.05766192733017378,
"grad_norm": 10.064836586829417,
"learning_rate": 1.8947368421052634e-06,
"loss": 3.3886473178863525,
"step": 73
},
{
"epoch": 0.05845181674565561,
"grad_norm": 10.757354094621407,
"learning_rate": 1.9210526315789474e-06,
"loss": 2.9087605476379395,
"step": 74
},
{
"epoch": 0.05924170616113744,
"grad_norm": 11.450459481352778,
"learning_rate": 1.9473684210526315e-06,
"loss": 3.3567676544189453,
"step": 75
},
{
"epoch": 0.06003159557661927,
"grad_norm": 13.789083297248077,
"learning_rate": 1.973684210526316e-06,
"loss": 3.567488431930542,
"step": 76
},
{
"epoch": 0.060821484992101105,
"grad_norm": 10.972976083153702,
"learning_rate": 2.0000000000000003e-06,
"loss": 3.606407642364502,
"step": 77
},
{
"epoch": 0.061611374407582936,
"grad_norm": 12.31068362309533,
"learning_rate": 2.026315789473684e-06,
"loss": 3.5690627098083496,
"step": 78
},
{
"epoch": 0.06240126382306477,
"grad_norm": 10.473991872514576,
"learning_rate": 2.0526315789473687e-06,
"loss": 3.2413203716278076,
"step": 79
},
{
"epoch": 0.0631911532385466,
"grad_norm": 11.476517559025107,
"learning_rate": 2.078947368421053e-06,
"loss": 3.541959524154663,
"step": 80
},
{
"epoch": 0.06398104265402843,
"grad_norm": 10.327083091837444,
"learning_rate": 2.105263157894737e-06,
"loss": 2.6203503608703613,
"step": 81
},
{
"epoch": 0.06477093206951026,
"grad_norm": 8.585567685933643,
"learning_rate": 2.1315789473684212e-06,
"loss": 3.0848870277404785,
"step": 82
},
{
"epoch": 0.06556082148499211,
"grad_norm": 13.504858685023217,
"learning_rate": 2.1578947368421054e-06,
"loss": 2.950331687927246,
"step": 83
},
{
"epoch": 0.06635071090047394,
"grad_norm": 12.928368968515905,
"learning_rate": 2.1842105263157896e-06,
"loss": 3.3477673530578613,
"step": 84
},
{
"epoch": 0.06714060031595577,
"grad_norm": 34.5955948056376,
"learning_rate": 2.2105263157894738e-06,
"loss": 3.6285767555236816,
"step": 85
},
{
"epoch": 0.0679304897314376,
"grad_norm": 21.770696431108018,
"learning_rate": 2.236842105263158e-06,
"loss": 2.9111273288726807,
"step": 86
},
{
"epoch": 0.06872037914691943,
"grad_norm": 10.760518044408816,
"learning_rate": 2.2631578947368426e-06,
"loss": 3.0999932289123535,
"step": 87
},
{
"epoch": 0.06951026856240126,
"grad_norm": 14.808381283086948,
"learning_rate": 2.2894736842105263e-06,
"loss": 2.990964412689209,
"step": 88
},
{
"epoch": 0.0703001579778831,
"grad_norm": 10.068849907277835,
"learning_rate": 2.3157894736842105e-06,
"loss": 3.190941095352173,
"step": 89
},
{
"epoch": 0.07109004739336493,
"grad_norm": 13.214233489035319,
"learning_rate": 2.342105263157895e-06,
"loss": 3.2512447834014893,
"step": 90
},
{
"epoch": 0.07187993680884676,
"grad_norm": 9.43746117081832,
"learning_rate": 2.368421052631579e-06,
"loss": 2.6215569972991943,
"step": 91
},
{
"epoch": 0.07266982622432859,
"grad_norm": 6.790489861990664,
"learning_rate": 2.3947368421052635e-06,
"loss": 2.6833393573760986,
"step": 92
},
{
"epoch": 0.07345971563981042,
"grad_norm": 10.046189419343667,
"learning_rate": 2.4210526315789477e-06,
"loss": 3.4258813858032227,
"step": 93
},
{
"epoch": 0.07424960505529225,
"grad_norm": 10.439120097914849,
"learning_rate": 2.447368421052632e-06,
"loss": 3.059093952178955,
"step": 94
},
{
"epoch": 0.0750394944707741,
"grad_norm": 15.352463342964036,
"learning_rate": 2.473684210526316e-06,
"loss": 3.1503472328186035,
"step": 95
},
{
"epoch": 0.07582938388625593,
"grad_norm": 10.372976525025615,
"learning_rate": 2.5e-06,
"loss": 3.1993770599365234,
"step": 96
},
{
"epoch": 0.07661927330173776,
"grad_norm": 9.496881055859197,
"learning_rate": 2.5263157894736844e-06,
"loss": 3.2442855834960938,
"step": 97
},
{
"epoch": 0.07740916271721959,
"grad_norm": 18.476512661393848,
"learning_rate": 2.552631578947369e-06,
"loss": 3.0400185585021973,
"step": 98
},
{
"epoch": 0.07819905213270142,
"grad_norm": 9.034794663205666,
"learning_rate": 2.578947368421053e-06,
"loss": 3.134880542755127,
"step": 99
},
{
"epoch": 0.07898894154818326,
"grad_norm": 9.677932164121641,
"learning_rate": 2.605263157894737e-06,
"loss": 3.2033190727233887,
"step": 100
},
{
"epoch": 0.07977883096366509,
"grad_norm": 11.430338365698852,
"learning_rate": 2.631578947368421e-06,
"loss": 2.273861885070801,
"step": 101
},
{
"epoch": 0.08056872037914692,
"grad_norm": 11.80829320514626,
"learning_rate": 2.6578947368421053e-06,
"loss": 2.6837069988250732,
"step": 102
},
{
"epoch": 0.08135860979462875,
"grad_norm": 11.605993978422056,
"learning_rate": 2.68421052631579e-06,
"loss": 3.128217935562134,
"step": 103
},
{
"epoch": 0.08214849921011058,
"grad_norm": 11.686211118151036,
"learning_rate": 2.710526315789474e-06,
"loss": 3.3786544799804688,
"step": 104
},
{
"epoch": 0.08293838862559241,
"grad_norm": 11.970591219069323,
"learning_rate": 2.7368421052631583e-06,
"loss": 3.7821977138519287,
"step": 105
},
{
"epoch": 0.08372827804107424,
"grad_norm": 8.789149918197234,
"learning_rate": 2.7631578947368424e-06,
"loss": 2.573795795440674,
"step": 106
},
{
"epoch": 0.08451816745655608,
"grad_norm": 12.34914621357682,
"learning_rate": 2.789473684210526e-06,
"loss": 3.3090810775756836,
"step": 107
},
{
"epoch": 0.08530805687203792,
"grad_norm": 13.080241716157126,
"learning_rate": 2.815789473684211e-06,
"loss": 2.717519998550415,
"step": 108
},
{
"epoch": 0.08609794628751975,
"grad_norm": 10.317983708712724,
"learning_rate": 2.842105263157895e-06,
"loss": 2.5964150428771973,
"step": 109
},
{
"epoch": 0.08688783570300158,
"grad_norm": 8.933831846542349,
"learning_rate": 2.868421052631579e-06,
"loss": 3.3069779872894287,
"step": 110
},
{
"epoch": 0.08767772511848342,
"grad_norm": 9.117041629342554,
"learning_rate": 2.8947368421052634e-06,
"loss": 2.860931396484375,
"step": 111
},
{
"epoch": 0.08846761453396525,
"grad_norm": 12.283222755341374,
"learning_rate": 2.921052631578948e-06,
"loss": 3.619509220123291,
"step": 112
},
{
"epoch": 0.08925750394944708,
"grad_norm": 9.277584709531098,
"learning_rate": 2.9473684210526317e-06,
"loss": 2.366508960723877,
"step": 113
},
{
"epoch": 0.09004739336492891,
"grad_norm": 7.894805274949022,
"learning_rate": 2.973684210526316e-06,
"loss": 3.2010068893432617,
"step": 114
},
{
"epoch": 0.09083728278041074,
"grad_norm": 7.815315629294605,
"learning_rate": 3e-06,
"loss": 3.1761326789855957,
"step": 115
},
{
"epoch": 0.09162717219589257,
"grad_norm": 12.929613818899176,
"learning_rate": 3.0263157894736843e-06,
"loss": 3.074592351913452,
"step": 116
},
{
"epoch": 0.0924170616113744,
"grad_norm": 10.211678960918519,
"learning_rate": 3.052631578947369e-06,
"loss": 2.9478042125701904,
"step": 117
},
{
"epoch": 0.09320695102685624,
"grad_norm": 14.765610562991277,
"learning_rate": 3.078947368421053e-06,
"loss": 2.716470718383789,
"step": 118
},
{
"epoch": 0.09399684044233807,
"grad_norm": 13.058271020031325,
"learning_rate": 3.1052631578947372e-06,
"loss": 2.669914722442627,
"step": 119
},
{
"epoch": 0.0947867298578199,
"grad_norm": 10.81941062693438,
"learning_rate": 3.131578947368421e-06,
"loss": 2.9119224548339844,
"step": 120
},
{
"epoch": 0.09557661927330174,
"grad_norm": 10.596711287978934,
"learning_rate": 3.157894736842105e-06,
"loss": 2.921963930130005,
"step": 121
},
{
"epoch": 0.09636650868878358,
"grad_norm": 15.574644209065054,
"learning_rate": 3.1842105263157898e-06,
"loss": 3.0262293815612793,
"step": 122
},
{
"epoch": 0.0971563981042654,
"grad_norm": 10.64778680746815,
"learning_rate": 3.210526315789474e-06,
"loss": 3.186441421508789,
"step": 123
},
{
"epoch": 0.09794628751974724,
"grad_norm": 14.63351232300644,
"learning_rate": 3.236842105263158e-06,
"loss": 3.451972246170044,
"step": 124
},
{
"epoch": 0.09873617693522907,
"grad_norm": 8.585310462971533,
"learning_rate": 3.2631578947368423e-06,
"loss": 2.5213232040405273,
"step": 125
},
{
"epoch": 0.0995260663507109,
"grad_norm": 7.39672659605034,
"learning_rate": 3.289473684210527e-06,
"loss": 2.5365099906921387,
"step": 126
},
{
"epoch": 0.10031595576619273,
"grad_norm": 11.157374472691357,
"learning_rate": 3.3157894736842107e-06,
"loss": 2.8651609420776367,
"step": 127
},
{
"epoch": 0.10110584518167456,
"grad_norm": 10.596959610695333,
"learning_rate": 3.342105263157895e-06,
"loss": 3.034381866455078,
"step": 128
},
{
"epoch": 0.1018957345971564,
"grad_norm": 14.277741979207777,
"learning_rate": 3.368421052631579e-06,
"loss": 2.627697467803955,
"step": 129
},
{
"epoch": 0.10268562401263823,
"grad_norm": 7.246920014548371,
"learning_rate": 3.3947368421052636e-06,
"loss": 2.9030356407165527,
"step": 130
},
{
"epoch": 0.10347551342812006,
"grad_norm": 7.2011210077095775,
"learning_rate": 3.421052631578948e-06,
"loss": 2.7330222129821777,
"step": 131
},
{
"epoch": 0.10426540284360189,
"grad_norm": 9.54971680440488,
"learning_rate": 3.447368421052632e-06,
"loss": 2.8853511810302734,
"step": 132
},
{
"epoch": 0.10505529225908374,
"grad_norm": 18.526435046610747,
"learning_rate": 3.473684210526316e-06,
"loss": 3.570380449295044,
"step": 133
},
{
"epoch": 0.10584518167456557,
"grad_norm": 34.71437679140294,
"learning_rate": 3.5e-06,
"loss": 3.5759708881378174,
"step": 134
},
{
"epoch": 0.1066350710900474,
"grad_norm": 13.769311251161728,
"learning_rate": 3.5263157894736846e-06,
"loss": 2.9467697143554688,
"step": 135
},
{
"epoch": 0.10742496050552923,
"grad_norm": 8.108715220223203,
"learning_rate": 3.5526315789473687e-06,
"loss": 2.982203483581543,
"step": 136
},
{
"epoch": 0.10821484992101106,
"grad_norm": 9.835029796798992,
"learning_rate": 3.578947368421053e-06,
"loss": 2.9849791526794434,
"step": 137
},
{
"epoch": 0.10900473933649289,
"grad_norm": 10.17747887266375,
"learning_rate": 3.605263157894737e-06,
"loss": 2.5947208404541016,
"step": 138
},
{
"epoch": 0.10979462875197472,
"grad_norm": 30.956825233562924,
"learning_rate": 3.6315789473684217e-06,
"loss": 2.478144407272339,
"step": 139
},
{
"epoch": 0.11058451816745656,
"grad_norm": 14.433139764637794,
"learning_rate": 3.657894736842106e-06,
"loss": 3.3270864486694336,
"step": 140
},
{
"epoch": 0.11137440758293839,
"grad_norm": 9.114525691597905,
"learning_rate": 3.6842105263157896e-06,
"loss": 3.3300728797912598,
"step": 141
},
{
"epoch": 0.11216429699842022,
"grad_norm": 10.962185488198799,
"learning_rate": 3.710526315789474e-06,
"loss": 2.858724594116211,
"step": 142
},
{
"epoch": 0.11295418641390205,
"grad_norm": 9.002282231003035,
"learning_rate": 3.736842105263158e-06,
"loss": 3.2307381629943848,
"step": 143
},
{
"epoch": 0.11374407582938388,
"grad_norm": 9.727066408665603,
"learning_rate": 3.7631578947368426e-06,
"loss": 3.1068058013916016,
"step": 144
},
{
"epoch": 0.11453396524486571,
"grad_norm": 11.203620625302497,
"learning_rate": 3.789473684210527e-06,
"loss": 3.130736827850342,
"step": 145
},
{
"epoch": 0.11532385466034756,
"grad_norm": 9.522046772312253,
"learning_rate": 3.815789473684211e-06,
"loss": 3.020020008087158,
"step": 146
},
{
"epoch": 0.11611374407582939,
"grad_norm": 8.938687629412906,
"learning_rate": 3.842105263157895e-06,
"loss": 3.1917128562927246,
"step": 147
},
{
"epoch": 0.11690363349131122,
"grad_norm": 12.372597800255045,
"learning_rate": 3.868421052631579e-06,
"loss": 3.393472671508789,
"step": 148
},
{
"epoch": 0.11769352290679305,
"grad_norm": 11.917832534808403,
"learning_rate": 3.894736842105263e-06,
"loss": 2.8924148082733154,
"step": 149
},
{
"epoch": 0.11848341232227488,
"grad_norm": 8.376978563766828,
"learning_rate": 3.921052631578947e-06,
"loss": 3.18516206741333,
"step": 150
},
{
"epoch": 0.11927330173775672,
"grad_norm": 10.751606207100387,
"learning_rate": 3.947368421052632e-06,
"loss": 3.0006637573242188,
"step": 151
},
{
"epoch": 0.12006319115323855,
"grad_norm": 10.601401419025592,
"learning_rate": 3.9736842105263165e-06,
"loss": 2.7774577140808105,
"step": 152
},
{
"epoch": 0.12085308056872038,
"grad_norm": 16.707393086077566,
"learning_rate": 4.000000000000001e-06,
"loss": 3.4506897926330566,
"step": 153
},
{
"epoch": 0.12164296998420221,
"grad_norm": 11.964190828423138,
"learning_rate": 4.026315789473684e-06,
"loss": 2.7091317176818848,
"step": 154
},
{
"epoch": 0.12243285939968404,
"grad_norm": 24.527932349156284,
"learning_rate": 4.052631578947368e-06,
"loss": 3.345675468444824,
"step": 155
},
{
"epoch": 0.12322274881516587,
"grad_norm": 7.3190241803547895,
"learning_rate": 4.078947368421053e-06,
"loss": 2.6796462535858154,
"step": 156
},
{
"epoch": 0.1240126382306477,
"grad_norm": 11.87038661241619,
"learning_rate": 4.105263157894737e-06,
"loss": 2.6081080436706543,
"step": 157
},
{
"epoch": 0.12480252764612954,
"grad_norm": 11.72825788387354,
"learning_rate": 4.1315789473684216e-06,
"loss": 3.0451087951660156,
"step": 158
},
{
"epoch": 0.12559241706161137,
"grad_norm": 11.390036203544065,
"learning_rate": 4.157894736842106e-06,
"loss": 2.8156795501708984,
"step": 159
},
{
"epoch": 0.1263823064770932,
"grad_norm": 21.914947252886318,
"learning_rate": 4.18421052631579e-06,
"loss": 2.579136848449707,
"step": 160
},
{
"epoch": 0.12717219589257503,
"grad_norm": 10.243517013753197,
"learning_rate": 4.210526315789474e-06,
"loss": 2.9866762161254883,
"step": 161
},
{
"epoch": 0.12796208530805686,
"grad_norm": 14.2986733371629,
"learning_rate": 4.236842105263158e-06,
"loss": 2.317359685897827,
"step": 162
},
{
"epoch": 0.1287519747235387,
"grad_norm": 7.467677343055879,
"learning_rate": 4.2631578947368425e-06,
"loss": 1.8712537288665771,
"step": 163
},
{
"epoch": 0.12954186413902052,
"grad_norm": 13.673831526330856,
"learning_rate": 4.289473684210527e-06,
"loss": 3.0335092544555664,
"step": 164
},
{
"epoch": 0.13033175355450238,
"grad_norm": 16.02571007766451,
"learning_rate": 4.315789473684211e-06,
"loss": 3.632401943206787,
"step": 165
},
{
"epoch": 0.13112164296998421,
"grad_norm": 17.67212914050096,
"learning_rate": 4.342105263157895e-06,
"loss": 2.679199457168579,
"step": 166
},
{
"epoch": 0.13191153238546605,
"grad_norm": 19.192049393351457,
"learning_rate": 4.368421052631579e-06,
"loss": 2.2504091262817383,
"step": 167
},
{
"epoch": 0.13270142180094788,
"grad_norm": 15.388906369037276,
"learning_rate": 4.394736842105263e-06,
"loss": 2.8348331451416016,
"step": 168
},
{
"epoch": 0.1334913112164297,
"grad_norm": 16.961630671510154,
"learning_rate": 4.4210526315789476e-06,
"loss": 3.5437369346618652,
"step": 169
},
{
"epoch": 0.13428120063191154,
"grad_norm": 19.29309682197195,
"learning_rate": 4.447368421052632e-06,
"loss": 3.0312232971191406,
"step": 170
},
{
"epoch": 0.13507109004739337,
"grad_norm": 12.4885877307498,
"learning_rate": 4.473684210526316e-06,
"loss": 2.930471658706665,
"step": 171
},
{
"epoch": 0.1358609794628752,
"grad_norm": 9.308821835922053,
"learning_rate": 4.5e-06,
"loss": 2.809572696685791,
"step": 172
},
{
"epoch": 0.13665086887835703,
"grad_norm": 22.10410295713834,
"learning_rate": 4.526315789473685e-06,
"loss": 2.782520294189453,
"step": 173
},
{
"epoch": 0.13744075829383887,
"grad_norm": 9.424258451368976,
"learning_rate": 4.552631578947369e-06,
"loss": 3.311187267303467,
"step": 174
},
{
"epoch": 0.1382306477093207,
"grad_norm": 11.81370027596634,
"learning_rate": 4.578947368421053e-06,
"loss": 2.8037075996398926,
"step": 175
},
{
"epoch": 0.13902053712480253,
"grad_norm": 19.14018612073658,
"learning_rate": 4.605263157894737e-06,
"loss": 2.7607202529907227,
"step": 176
},
{
"epoch": 0.13981042654028436,
"grad_norm": 9.985114785424416,
"learning_rate": 4.631578947368421e-06,
"loss": 3.2012553215026855,
"step": 177
},
{
"epoch": 0.1406003159557662,
"grad_norm": 9.786086973550603,
"learning_rate": 4.657894736842106e-06,
"loss": 2.4347238540649414,
"step": 178
},
{
"epoch": 0.14139020537124802,
"grad_norm": 8.769407464343628,
"learning_rate": 4.68421052631579e-06,
"loss": 2.9895355701446533,
"step": 179
},
{
"epoch": 0.14218009478672985,
"grad_norm": 10.531181285243829,
"learning_rate": 4.710526315789474e-06,
"loss": 2.5453662872314453,
"step": 180
},
{
"epoch": 0.1429699842022117,
"grad_norm": 11.460636592646704,
"learning_rate": 4.736842105263158e-06,
"loss": 2.439375400543213,
"step": 181
},
{
"epoch": 0.14375987361769352,
"grad_norm": 11.905830261985201,
"learning_rate": 4.763157894736842e-06,
"loss": 4.2906694412231445,
"step": 182
},
{
"epoch": 0.14454976303317535,
"grad_norm": 16.583174963846417,
"learning_rate": 4.789473684210527e-06,
"loss": 3.135786533355713,
"step": 183
},
{
"epoch": 0.14533965244865718,
"grad_norm": 16.327103894696393,
"learning_rate": 4.815789473684211e-06,
"loss": 1.763169288635254,
"step": 184
},
{
"epoch": 0.146129541864139,
"grad_norm": 9.224173810150578,
"learning_rate": 4.842105263157895e-06,
"loss": 2.444568157196045,
"step": 185
},
{
"epoch": 0.14691943127962084,
"grad_norm": 13.627965372331012,
"learning_rate": 4.8684210526315795e-06,
"loss": 2.6934571266174316,
"step": 186
},
{
"epoch": 0.14770932069510267,
"grad_norm": 11.077507415528911,
"learning_rate": 4.894736842105264e-06,
"loss": 2.9741287231445312,
"step": 187
},
{
"epoch": 0.1484992101105845,
"grad_norm": 11.532032255217114,
"learning_rate": 4.921052631578948e-06,
"loss": 2.824622869491577,
"step": 188
},
{
"epoch": 0.14928909952606634,
"grad_norm": 18.679360225668912,
"learning_rate": 4.947368421052632e-06,
"loss": 3.0964691638946533,
"step": 189
},
{
"epoch": 0.1500789889415482,
"grad_norm": 9.620068127723037,
"learning_rate": 4.973684210526316e-06,
"loss": 2.981693744659424,
"step": 190
},
{
"epoch": 0.15086887835703003,
"grad_norm": 7.772300956867857,
"learning_rate": 5e-06,
"loss": 2.799048662185669,
"step": 191
},
{
"epoch": 0.15165876777251186,
"grad_norm": 12.414480771670288,
"learning_rate": 5.026315789473685e-06,
"loss": 3.1177570819854736,
"step": 192
},
{
"epoch": 0.1524486571879937,
"grad_norm": 12.641305628552601,
"learning_rate": 5.052631578947369e-06,
"loss": 2.37302827835083,
"step": 193
},
{
"epoch": 0.15323854660347552,
"grad_norm": 11.55906392820543,
"learning_rate": 5.078947368421053e-06,
"loss": 2.5041732788085938,
"step": 194
},
{
"epoch": 0.15402843601895735,
"grad_norm": 7.7791945091120285,
"learning_rate": 5.105263157894738e-06,
"loss": 2.874569892883301,
"step": 195
},
{
"epoch": 0.15481832543443919,
"grad_norm": 10.474120784603407,
"learning_rate": 5.131578947368422e-06,
"loss": 2.8549094200134277,
"step": 196
},
{
"epoch": 0.15560821484992102,
"grad_norm": 7.993346143459158,
"learning_rate": 5.157894736842106e-06,
"loss": 2.993251085281372,
"step": 197
},
{
"epoch": 0.15639810426540285,
"grad_norm": 20.941550789609742,
"learning_rate": 5.18421052631579e-06,
"loss": 4.168525695800781,
"step": 198
},
{
"epoch": 0.15718799368088468,
"grad_norm": 12.528721144731515,
"learning_rate": 5.210526315789474e-06,
"loss": 2.4273793697357178,
"step": 199
},
{
"epoch": 0.1579778830963665,
"grad_norm": 11.286529989353973,
"learning_rate": 5.236842105263158e-06,
"loss": 3.011016368865967,
"step": 200
},
{
"epoch": 0.15876777251184834,
"grad_norm": 17.406854394281535,
"learning_rate": 5.263157894736842e-06,
"loss": 2.8892858028411865,
"step": 201
},
{
"epoch": 0.15955766192733017,
"grad_norm": 12.78306492867607,
"learning_rate": 5.289473684210526e-06,
"loss": 2.4699714183807373,
"step": 202
},
{
"epoch": 0.160347551342812,
"grad_norm": 12.969828387945576,
"learning_rate": 5.315789473684211e-06,
"loss": 2.7423977851867676,
"step": 203
},
{
"epoch": 0.16113744075829384,
"grad_norm": 7.657893790006663,
"learning_rate": 5.342105263157895e-06,
"loss": 2.7218151092529297,
"step": 204
},
{
"epoch": 0.16192733017377567,
"grad_norm": 9.492276008453782,
"learning_rate": 5.36842105263158e-06,
"loss": 2.555281639099121,
"step": 205
},
{
"epoch": 0.1627172195892575,
"grad_norm": 13.463617124979155,
"learning_rate": 5.394736842105264e-06,
"loss": 3.284069538116455,
"step": 206
},
{
"epoch": 0.16350710900473933,
"grad_norm": 12.35995760266546,
"learning_rate": 5.421052631578948e-06,
"loss": 2.8107781410217285,
"step": 207
},
{
"epoch": 0.16429699842022116,
"grad_norm": 22.996410093914797,
"learning_rate": 5.447368421052632e-06,
"loss": 2.7325127124786377,
"step": 208
},
{
"epoch": 0.165086887835703,
"grad_norm": 16.585705791801974,
"learning_rate": 5.4736842105263165e-06,
"loss": 2.7490532398223877,
"step": 209
},
{
"epoch": 0.16587677725118483,
"grad_norm": 8.638152990987923,
"learning_rate": 5.500000000000001e-06,
"loss": 3.5097951889038086,
"step": 210
},
{
"epoch": 0.16666666666666666,
"grad_norm": 13.734843553912885,
"learning_rate": 5.526315789473685e-06,
"loss": 3.340397834777832,
"step": 211
},
{
"epoch": 0.1674565560821485,
"grad_norm": 32.571959419532426,
"learning_rate": 5.552631578947368e-06,
"loss": 3.0845370292663574,
"step": 212
},
{
"epoch": 0.16824644549763032,
"grad_norm": 12.411694267674704,
"learning_rate": 5.578947368421052e-06,
"loss": 3.3935816287994385,
"step": 213
},
{
"epoch": 0.16903633491311215,
"grad_norm": 8.554865869690868,
"learning_rate": 5.605263157894737e-06,
"loss": 2.5191092491149902,
"step": 214
},
{
"epoch": 0.16982622432859398,
"grad_norm": 13.713331969172893,
"learning_rate": 5.631578947368422e-06,
"loss": 2.265045166015625,
"step": 215
},
{
"epoch": 0.17061611374407584,
"grad_norm": 8.759424141686443,
"learning_rate": 5.657894736842106e-06,
"loss": 2.9024651050567627,
"step": 216
},
{
"epoch": 0.17140600315955767,
"grad_norm": 20.192932719198154,
"learning_rate": 5.68421052631579e-06,
"loss": 3.174898624420166,
"step": 217
},
{
"epoch": 0.1721958925750395,
"grad_norm": 9.300452899615632,
"learning_rate": 5.710526315789474e-06,
"loss": 2.203352212905884,
"step": 218
},
{
"epoch": 0.17298578199052134,
"grad_norm": 16.964918499584563,
"learning_rate": 5.736842105263158e-06,
"loss": 2.399977684020996,
"step": 219
},
{
"epoch": 0.17377567140600317,
"grad_norm": 20.569787010147447,
"learning_rate": 5.7631578947368425e-06,
"loss": 3.2228713035583496,
"step": 220
},
{
"epoch": 0.174565560821485,
"grad_norm": 10.59360929404439,
"learning_rate": 5.789473684210527e-06,
"loss": 2.613041639328003,
"step": 221
},
{
"epoch": 0.17535545023696683,
"grad_norm": 12.774907668976972,
"learning_rate": 5.815789473684212e-06,
"loss": 3.2169127464294434,
"step": 222
},
{
"epoch": 0.17614533965244866,
"grad_norm": 38.185104925418536,
"learning_rate": 5.842105263157896e-06,
"loss": 3.227830648422241,
"step": 223
},
{
"epoch": 0.1769352290679305,
"grad_norm": 15.337225837697051,
"learning_rate": 5.86842105263158e-06,
"loss": 2.9216299057006836,
"step": 224
},
{
"epoch": 0.17772511848341233,
"grad_norm": 19.652283271220284,
"learning_rate": 5.8947368421052634e-06,
"loss": 3.0096397399902344,
"step": 225
},
{
"epoch": 0.17851500789889416,
"grad_norm": 21.342277640312286,
"learning_rate": 5.921052631578948e-06,
"loss": 2.2839317321777344,
"step": 226
},
{
"epoch": 0.179304897314376,
"grad_norm": 18.794368318684786,
"learning_rate": 5.947368421052632e-06,
"loss": 3.2453808784484863,
"step": 227
},
{
"epoch": 0.18009478672985782,
"grad_norm": 20.372960047849478,
"learning_rate": 5.973684210526316e-06,
"loss": 3.73872447013855,
"step": 228
},
{
"epoch": 0.18088467614533965,
"grad_norm": 7.477591538110839,
"learning_rate": 6e-06,
"loss": 2.9210574626922607,
"step": 229
},
{
"epoch": 0.18167456556082148,
"grad_norm": 20.286752935347092,
"learning_rate": 6.026315789473684e-06,
"loss": 3.0264639854431152,
"step": 230
},
{
"epoch": 0.18246445497630331,
"grad_norm": 7.752616268806402,
"learning_rate": 6.0526315789473685e-06,
"loss": 2.5452468395233154,
"step": 231
},
{
"epoch": 0.18325434439178515,
"grad_norm": 14.021383263434027,
"learning_rate": 6.0789473684210535e-06,
"loss": 3.122269868850708,
"step": 232
},
{
"epoch": 0.18404423380726698,
"grad_norm": 31.61505834794581,
"learning_rate": 6.105263157894738e-06,
"loss": 2.652003765106201,
"step": 233
},
{
"epoch": 0.1848341232227488,
"grad_norm": 14.714502845759036,
"learning_rate": 6.131578947368422e-06,
"loss": 3.3281772136688232,
"step": 234
},
{
"epoch": 0.18562401263823064,
"grad_norm": 8.281361641079902,
"learning_rate": 6.157894736842106e-06,
"loss": 2.793631076812744,
"step": 235
},
{
"epoch": 0.18641390205371247,
"grad_norm": 18.19614005385017,
"learning_rate": 6.18421052631579e-06,
"loss": 2.75974178314209,
"step": 236
},
{
"epoch": 0.1872037914691943,
"grad_norm": 14.062302050239529,
"learning_rate": 6.2105263157894745e-06,
"loss": 3.2623343467712402,
"step": 237
},
{
"epoch": 0.18799368088467613,
"grad_norm": 11.273662081089574,
"learning_rate": 6.236842105263159e-06,
"loss": 3.017319440841675,
"step": 238
},
{
"epoch": 0.18878357030015797,
"grad_norm": 12.442581117618769,
"learning_rate": 6.263157894736842e-06,
"loss": 2.9280037879943848,
"step": 239
},
{
"epoch": 0.1895734597156398,
"grad_norm": 14.868405534686683,
"learning_rate": 6.289473684210526e-06,
"loss": 4.794824123382568,
"step": 240
},
{
"epoch": 0.19036334913112166,
"grad_norm": 18.342383362134704,
"learning_rate": 6.31578947368421e-06,
"loss": 2.6316001415252686,
"step": 241
},
{
"epoch": 0.1911532385466035,
"grad_norm": 12.903091182966538,
"learning_rate": 6.342105263157895e-06,
"loss": 3.190481185913086,
"step": 242
},
{
"epoch": 0.19194312796208532,
"grad_norm": 13.060648808489958,
"learning_rate": 6.3684210526315795e-06,
"loss": 2.072887420654297,
"step": 243
},
{
"epoch": 0.19273301737756715,
"grad_norm": 10.434484710036394,
"learning_rate": 6.394736842105264e-06,
"loss": 3.4056625366210938,
"step": 244
},
{
"epoch": 0.19352290679304898,
"grad_norm": 17.07700555394061,
"learning_rate": 6.421052631578948e-06,
"loss": 3.1885697841644287,
"step": 245
},
{
"epoch": 0.1943127962085308,
"grad_norm": 11.242377631499217,
"learning_rate": 6.447368421052632e-06,
"loss": 2.7972757816314697,
"step": 246
},
{
"epoch": 0.19510268562401265,
"grad_norm": 10.122508148985222,
"learning_rate": 6.473684210526316e-06,
"loss": 2.625680923461914,
"step": 247
},
{
"epoch": 0.19589257503949448,
"grad_norm": 15.194302817759452,
"learning_rate": 6.5000000000000004e-06,
"loss": 3.370196580886841,
"step": 248
},
{
"epoch": 0.1966824644549763,
"grad_norm": 9.367476542559452,
"learning_rate": 6.526315789473685e-06,
"loss": 3.2335524559020996,
"step": 249
},
{
"epoch": 0.19747235387045814,
"grad_norm": 13.638698209387572,
"learning_rate": 6.55263157894737e-06,
"loss": 3.060309410095215,
"step": 250
},
{
"epoch": 0.19826224328593997,
"grad_norm": 21.504696461723928,
"learning_rate": 6.578947368421054e-06,
"loss": 2.5595827102661133,
"step": 251
},
{
"epoch": 0.1990521327014218,
"grad_norm": 12.760844619202933,
"learning_rate": 6.605263157894738e-06,
"loss": 2.9294533729553223,
"step": 252
},
{
"epoch": 0.19984202211690363,
"grad_norm": 16.14327478944924,
"learning_rate": 6.631578947368421e-06,
"loss": 3.1590025424957275,
"step": 253
},
{
"epoch": 0.20063191153238547,
"grad_norm": 10.670002260991126,
"learning_rate": 6.6578947368421055e-06,
"loss": 2.868878126144409,
"step": 254
},
{
"epoch": 0.2014218009478673,
"grad_norm": 7.410086829781492,
"learning_rate": 6.68421052631579e-06,
"loss": 2.524019479751587,
"step": 255
},
{
"epoch": 0.20221169036334913,
"grad_norm": 9.676832864068576,
"learning_rate": 6.710526315789474e-06,
"loss": 2.7657065391540527,
"step": 256
},
{
"epoch": 0.20300157977883096,
"grad_norm": 15.950580447163818,
"learning_rate": 6.736842105263158e-06,
"loss": 3.229970932006836,
"step": 257
},
{
"epoch": 0.2037914691943128,
"grad_norm": 15.60324691981264,
"learning_rate": 6.763157894736842e-06,
"loss": 2.7795071601867676,
"step": 258
},
{
"epoch": 0.20458135860979462,
"grad_norm": 7.647680832081348,
"learning_rate": 6.789473684210527e-06,
"loss": 2.202699661254883,
"step": 259
},
{
"epoch": 0.20537124802527645,
"grad_norm": 11.113708735727473,
"learning_rate": 6.8157894736842115e-06,
"loss": 3.018834114074707,
"step": 260
},
{
"epoch": 0.20616113744075829,
"grad_norm": 19.140215467090133,
"learning_rate": 6.842105263157896e-06,
"loss": 2.560054063796997,
"step": 261
},
{
"epoch": 0.20695102685624012,
"grad_norm": 9.162828639443463,
"learning_rate": 6.86842105263158e-06,
"loss": 2.5356526374816895,
"step": 262
},
{
"epoch": 0.20774091627172195,
"grad_norm": 13.818767936109861,
"learning_rate": 6.894736842105264e-06,
"loss": 3.1693482398986816,
"step": 263
},
{
"epoch": 0.20853080568720378,
"grad_norm": 18.595765057532514,
"learning_rate": 6.921052631578948e-06,
"loss": 2.3949215412139893,
"step": 264
},
{
"epoch": 0.2093206951026856,
"grad_norm": 17.318744899310534,
"learning_rate": 6.947368421052632e-06,
"loss": 3.075978994369507,
"step": 265
},
{
"epoch": 0.21011058451816747,
"grad_norm": 24.655257168385077,
"learning_rate": 6.973684210526316e-06,
"loss": 3.427466869354248,
"step": 266
},
{
"epoch": 0.2109004739336493,
"grad_norm": 15.241527713725562,
"learning_rate": 7e-06,
"loss": 2.109011650085449,
"step": 267
},
{
"epoch": 0.21169036334913113,
"grad_norm": 8.501293757028371,
"learning_rate": 7.026315789473684e-06,
"loss": 3.112001419067383,
"step": 268
},
{
"epoch": 0.21248025276461296,
"grad_norm": 15.448418886330026,
"learning_rate": 7.052631578947369e-06,
"loss": 1.7198365926742554,
"step": 269
},
{
"epoch": 0.2132701421800948,
"grad_norm": 9.545915874496153,
"learning_rate": 7.078947368421053e-06,
"loss": 2.8806190490722656,
"step": 270
},
{
"epoch": 0.21406003159557663,
"grad_norm": 18.727295292899043,
"learning_rate": 7.1052631578947375e-06,
"loss": 3.1914749145507812,
"step": 271
},
{
"epoch": 0.21484992101105846,
"grad_norm": 19.609264443447998,
"learning_rate": 7.131578947368422e-06,
"loss": 3.506901741027832,
"step": 272
},
{
"epoch": 0.2156398104265403,
"grad_norm": 13.136262515857457,
"learning_rate": 7.157894736842106e-06,
"loss": 3.197396755218506,
"step": 273
},
{
"epoch": 0.21642969984202212,
"grad_norm": 23.821750570981514,
"learning_rate": 7.18421052631579e-06,
"loss": 3.0057897567749023,
"step": 274
},
{
"epoch": 0.21721958925750395,
"grad_norm": 12.168381845745005,
"learning_rate": 7.210526315789474e-06,
"loss": 3.459364652633667,
"step": 275
},
{
"epoch": 0.21800947867298578,
"grad_norm": 17.00612657277193,
"learning_rate": 7.236842105263158e-06,
"loss": 3.4900269508361816,
"step": 276
},
{
"epoch": 0.21879936808846762,
"grad_norm": 10.283758953220772,
"learning_rate": 7.263157894736843e-06,
"loss": 3.3802132606506348,
"step": 277
},
{
"epoch": 0.21958925750394945,
"grad_norm": 16.02360521858266,
"learning_rate": 7.289473684210528e-06,
"loss": 3.455819606781006,
"step": 278
},
{
"epoch": 0.22037914691943128,
"grad_norm": 11.274076685112723,
"learning_rate": 7.315789473684212e-06,
"loss": 2.847879409790039,
"step": 279
},
{
"epoch": 0.2211690363349131,
"grad_norm": 15.606199898777566,
"learning_rate": 7.342105263157895e-06,
"loss": 3.4169538021087646,
"step": 280
},
{
"epoch": 0.22195892575039494,
"grad_norm": 12.898430910622961,
"learning_rate": 7.368421052631579e-06,
"loss": 3.030590772628784,
"step": 281
},
{
"epoch": 0.22274881516587677,
"grad_norm": 15.500513401800355,
"learning_rate": 7.3947368421052635e-06,
"loss": 2.649216890335083,
"step": 282
},
{
"epoch": 0.2235387045813586,
"grad_norm": 18.378896929038152,
"learning_rate": 7.421052631578948e-06,
"loss": 3.274688720703125,
"step": 283
},
{
"epoch": 0.22432859399684044,
"grad_norm": 9.090747343971215,
"learning_rate": 7.447368421052632e-06,
"loss": 2.7677786350250244,
"step": 284
},
{
"epoch": 0.22511848341232227,
"grad_norm": 11.906482510855422,
"learning_rate": 7.473684210526316e-06,
"loss": 3.239955186843872,
"step": 285
},
{
"epoch": 0.2259083728278041,
"grad_norm": 11.474852988282166,
"learning_rate": 7.500000000000001e-06,
"loss": 3.009387969970703,
"step": 286
},
{
"epoch": 0.22669826224328593,
"grad_norm": 12.575433605163134,
"learning_rate": 7.526315789473685e-06,
"loss": 2.9016571044921875,
"step": 287
},
{
"epoch": 0.22748815165876776,
"grad_norm": 14.9975427549329,
"learning_rate": 7.552631578947369e-06,
"loss": 3.257054328918457,
"step": 288
},
{
"epoch": 0.2282780410742496,
"grad_norm": 26.71508060406798,
"learning_rate": 7.578947368421054e-06,
"loss": 2.75146222114563,
"step": 289
},
{
"epoch": 0.22906793048973143,
"grad_norm": 16.910026550475397,
"learning_rate": 7.605263157894738e-06,
"loss": 2.757063388824463,
"step": 290
},
{
"epoch": 0.22985781990521326,
"grad_norm": 9.680373357034156,
"learning_rate": 7.631578947368423e-06,
"loss": 2.7959532737731934,
"step": 291
},
{
"epoch": 0.23064770932069512,
"grad_norm": 12.18894296936391,
"learning_rate": 7.657894736842106e-06,
"loss": 2.466348171234131,
"step": 292
},
{
"epoch": 0.23143759873617695,
"grad_norm": 10.475566667489755,
"learning_rate": 7.68421052631579e-06,
"loss": 3.3009557723999023,
"step": 293
},
{
"epoch": 0.23222748815165878,
"grad_norm": 51.393448155359216,
"learning_rate": 7.710526315789474e-06,
"loss": 2.725738048553467,
"step": 294
},
{
"epoch": 0.2330173775671406,
"grad_norm": 14.358238125160575,
"learning_rate": 7.736842105263158e-06,
"loss": 2.783334493637085,
"step": 295
},
{
"epoch": 0.23380726698262244,
"grad_norm": 19.57667640214124,
"learning_rate": 7.763157894736843e-06,
"loss": 3.1590988636016846,
"step": 296
},
{
"epoch": 0.23459715639810427,
"grad_norm": 12.972851406285965,
"learning_rate": 7.789473684210526e-06,
"loss": 3.1896674633026123,
"step": 297
},
{
"epoch": 0.2353870458135861,
"grad_norm": 10.525192505195403,
"learning_rate": 7.815789473684211e-06,
"loss": 2.8256943225860596,
"step": 298
},
{
"epoch": 0.23617693522906794,
"grad_norm": 11.099287801098866,
"learning_rate": 7.842105263157895e-06,
"loss": 3.2492432594299316,
"step": 299
},
{
"epoch": 0.23696682464454977,
"grad_norm": 9.230404731264265,
"learning_rate": 7.86842105263158e-06,
"loss": 2.5488808155059814,
"step": 300
},
{
"epoch": 0.2377567140600316,
"grad_norm": 15.341199592416597,
"learning_rate": 7.894736842105265e-06,
"loss": 2.4112818241119385,
"step": 301
},
{
"epoch": 0.23854660347551343,
"grad_norm": 7.319063117780196,
"learning_rate": 7.921052631578948e-06,
"loss": 3.260639190673828,
"step": 302
},
{
"epoch": 0.23933649289099526,
"grad_norm": 11.851958212642176,
"learning_rate": 7.947368421052633e-06,
"loss": 3.049391269683838,
"step": 303
},
{
"epoch": 0.2401263823064771,
"grad_norm": 28.13791336194168,
"learning_rate": 7.973684210526316e-06,
"loss": 4.401567459106445,
"step": 304
},
{
"epoch": 0.24091627172195892,
"grad_norm": 9.429106394158737,
"learning_rate": 8.000000000000001e-06,
"loss": 2.9501960277557373,
"step": 305
},
{
"epoch": 0.24170616113744076,
"grad_norm": 7.158645831331756,
"learning_rate": 8.026315789473685e-06,
"loss": 2.5581390857696533,
"step": 306
},
{
"epoch": 0.2424960505529226,
"grad_norm": 12.768920646366887,
"learning_rate": 8.052631578947368e-06,
"loss": 3.1385931968688965,
"step": 307
},
{
"epoch": 0.24328593996840442,
"grad_norm": 12.913024370975956,
"learning_rate": 8.078947368421053e-06,
"loss": 2.7068610191345215,
"step": 308
},
{
"epoch": 0.24407582938388625,
"grad_norm": 10.536088937809055,
"learning_rate": 8.105263157894736e-06,
"loss": 2.828160524368286,
"step": 309
},
{
"epoch": 0.24486571879936808,
"grad_norm": 8.341704390314765,
"learning_rate": 8.131578947368421e-06,
"loss": 2.663266658782959,
"step": 310
},
{
"epoch": 0.2456556082148499,
"grad_norm": 11.240401421047006,
"learning_rate": 8.157894736842106e-06,
"loss": 3.2981982231140137,
"step": 311
},
{
"epoch": 0.24644549763033174,
"grad_norm": 11.187665628610933,
"learning_rate": 8.18421052631579e-06,
"loss": 2.5415968894958496,
"step": 312
},
{
"epoch": 0.24723538704581358,
"grad_norm": 11.94500524279879,
"learning_rate": 8.210526315789475e-06,
"loss": 2.7816240787506104,
"step": 313
},
{
"epoch": 0.2480252764612954,
"grad_norm": 6.355221325715025,
"learning_rate": 8.236842105263158e-06,
"loss": 2.686246395111084,
"step": 314
},
{
"epoch": 0.24881516587677724,
"grad_norm": 17.979652703894846,
"learning_rate": 8.263157894736843e-06,
"loss": 3.970867395401001,
"step": 315
},
{
"epoch": 0.24960505529225907,
"grad_norm": 11.300024279923099,
"learning_rate": 8.289473684210526e-06,
"loss": 2.6537764072418213,
"step": 316
},
{
"epoch": 0.25039494470774093,
"grad_norm": 14.279900232687705,
"learning_rate": 8.315789473684212e-06,
"loss": 2.353628396987915,
"step": 317
},
{
"epoch": 0.25118483412322273,
"grad_norm": 15.211272553278281,
"learning_rate": 8.342105263157897e-06,
"loss": 2.978910207748413,
"step": 318
},
{
"epoch": 0.2519747235387046,
"grad_norm": 10.672333315833844,
"learning_rate": 8.36842105263158e-06,
"loss": 3.0564768314361572,
"step": 319
},
{
"epoch": 0.2527646129541864,
"grad_norm": 13.177853507822194,
"learning_rate": 8.394736842105263e-06,
"loss": 3.3562185764312744,
"step": 320
},
{
"epoch": 0.25355450236966826,
"grad_norm": 13.208149326921594,
"learning_rate": 8.421052631578948e-06,
"loss": 2.7793097496032715,
"step": 321
},
{
"epoch": 0.25434439178515006,
"grad_norm": 7.862272858844339,
"learning_rate": 8.447368421052632e-06,
"loss": 2.6758689880371094,
"step": 322
},
{
"epoch": 0.2551342812006319,
"grad_norm": 8.599054067109956,
"learning_rate": 8.473684210526317e-06,
"loss": 2.71248722076416,
"step": 323
},
{
"epoch": 0.2559241706161137,
"grad_norm": 7.541371370024793,
"learning_rate": 8.5e-06,
"loss": 2.3310184478759766,
"step": 324
},
{
"epoch": 0.2567140600315956,
"grad_norm": 7.201546562450413,
"learning_rate": 8.526315789473685e-06,
"loss": 2.957345962524414,
"step": 325
},
{
"epoch": 0.2575039494470774,
"grad_norm": 12.601533616205508,
"learning_rate": 8.552631578947368e-06,
"loss": 3.0129475593566895,
"step": 326
},
{
"epoch": 0.25829383886255924,
"grad_norm": 11.854948826772384,
"learning_rate": 8.578947368421053e-06,
"loss": 2.7769176959991455,
"step": 327
},
{
"epoch": 0.25908372827804105,
"grad_norm": 10.669512492571942,
"learning_rate": 8.605263157894738e-06,
"loss": 2.550236940383911,
"step": 328
},
{
"epoch": 0.2598736176935229,
"grad_norm": 17.79545384444541,
"learning_rate": 8.631578947368422e-06,
"loss": 2.8490400314331055,
"step": 329
},
{
"epoch": 0.26066350710900477,
"grad_norm": 14.8135974727651,
"learning_rate": 8.657894736842107e-06,
"loss": 3.2657470703125,
"step": 330
},
{
"epoch": 0.26145339652448657,
"grad_norm": 6.186576343564378,
"learning_rate": 8.68421052631579e-06,
"loss": 2.884676456451416,
"step": 331
},
{
"epoch": 0.26224328593996843,
"grad_norm": 10.116320453707134,
"learning_rate": 8.710526315789475e-06,
"loss": 2.7804079055786133,
"step": 332
},
{
"epoch": 0.26303317535545023,
"grad_norm": 9.185373101458339,
"learning_rate": 8.736842105263158e-06,
"loss": 3.2659783363342285,
"step": 333
},
{
"epoch": 0.2638230647709321,
"grad_norm": 12.059802052206885,
"learning_rate": 8.763157894736842e-06,
"loss": 2.868699073791504,
"step": 334
},
{
"epoch": 0.2646129541864139,
"grad_norm": 9.397740276905777,
"learning_rate": 8.789473684210527e-06,
"loss": 2.720752716064453,
"step": 335
},
{
"epoch": 0.26540284360189575,
"grad_norm": 15.209731085255642,
"learning_rate": 8.81578947368421e-06,
"loss": 2.566018581390381,
"step": 336
},
{
"epoch": 0.26619273301737756,
"grad_norm": 9.966999191876049,
"learning_rate": 8.842105263157895e-06,
"loss": 3.5632197856903076,
"step": 337
},
{
"epoch": 0.2669826224328594,
"grad_norm": 13.910376272854188,
"learning_rate": 8.86842105263158e-06,
"loss": 2.7050907611846924,
"step": 338
},
{
"epoch": 0.2677725118483412,
"grad_norm": 16.554248620157857,
"learning_rate": 8.894736842105264e-06,
"loss": 2.687314987182617,
"step": 339
},
{
"epoch": 0.2685624012638231,
"grad_norm": 8.06865971019336,
"learning_rate": 8.921052631578949e-06,
"loss": 2.154885768890381,
"step": 340
},
{
"epoch": 0.2693522906793049,
"grad_norm": 11.43463991295312,
"learning_rate": 8.947368421052632e-06,
"loss": 3.1463260650634766,
"step": 341
},
{
"epoch": 0.27014218009478674,
"grad_norm": 8.94663093119559,
"learning_rate": 8.973684210526317e-06,
"loss": 2.7585976123809814,
"step": 342
},
{
"epoch": 0.27093206951026855,
"grad_norm": 12.607303575239936,
"learning_rate": 9e-06,
"loss": 2.725893974304199,
"step": 343
},
{
"epoch": 0.2717219589257504,
"grad_norm": 40.64523343780517,
"learning_rate": 9.026315789473685e-06,
"loss": 4.318365097045898,
"step": 344
},
{
"epoch": 0.2725118483412322,
"grad_norm": 13.253916224420975,
"learning_rate": 9.05263157894737e-06,
"loss": 3.0328493118286133,
"step": 345
},
{
"epoch": 0.27330173775671407,
"grad_norm": 10.67307265402947,
"learning_rate": 9.078947368421054e-06,
"loss": 3.1137566566467285,
"step": 346
},
{
"epoch": 0.2740916271721959,
"grad_norm": 21.521088237334375,
"learning_rate": 9.105263157894739e-06,
"loss": 2.418055295944214,
"step": 347
},
{
"epoch": 0.27488151658767773,
"grad_norm": 11.029078243075645,
"learning_rate": 9.131578947368422e-06,
"loss": 2.902218818664551,
"step": 348
},
{
"epoch": 0.27567140600315954,
"grad_norm": 9.383500534244467,
"learning_rate": 9.157894736842105e-06,
"loss": 3.282095432281494,
"step": 349
},
{
"epoch": 0.2764612954186414,
"grad_norm": 12.131725163736556,
"learning_rate": 9.18421052631579e-06,
"loss": 2.7594404220581055,
"step": 350
},
{
"epoch": 0.2772511848341232,
"grad_norm": 8.647890182750494,
"learning_rate": 9.210526315789474e-06,
"loss": 2.8441665172576904,
"step": 351
},
{
"epoch": 0.27804107424960506,
"grad_norm": 24.531984631457014,
"learning_rate": 9.236842105263159e-06,
"loss": 2.7135703563690186,
"step": 352
},
{
"epoch": 0.27883096366508686,
"grad_norm": 18.852163365198667,
"learning_rate": 9.263157894736842e-06,
"loss": 2.604950428009033,
"step": 353
},
{
"epoch": 0.2796208530805687,
"grad_norm": 8.864599334587579,
"learning_rate": 9.289473684210527e-06,
"loss": 2.4473190307617188,
"step": 354
},
{
"epoch": 0.2804107424960506,
"grad_norm": 12.76373613296727,
"learning_rate": 9.315789473684212e-06,
"loss": 2.7692112922668457,
"step": 355
},
{
"epoch": 0.2812006319115324,
"grad_norm": 13.999019935363314,
"learning_rate": 9.342105263157895e-06,
"loss": 2.699820041656494,
"step": 356
},
{
"epoch": 0.28199052132701424,
"grad_norm": 12.316515560670386,
"learning_rate": 9.36842105263158e-06,
"loss": 2.9352567195892334,
"step": 357
},
{
"epoch": 0.28278041074249605,
"grad_norm": 12.51002958910899,
"learning_rate": 9.394736842105264e-06,
"loss": 3.0598864555358887,
"step": 358
},
{
"epoch": 0.2835703001579779,
"grad_norm": 9.85811587507726,
"learning_rate": 9.421052631578949e-06,
"loss": 2.760265588760376,
"step": 359
},
{
"epoch": 0.2843601895734597,
"grad_norm": 6.097027548328658,
"learning_rate": 9.447368421052632e-06,
"loss": 2.914360761642456,
"step": 360
},
{
"epoch": 0.28515007898894157,
"grad_norm": 7.443161559538115,
"learning_rate": 9.473684210526315e-06,
"loss": 2.4574689865112305,
"step": 361
},
{
"epoch": 0.2859399684044234,
"grad_norm": 11.482751191563155,
"learning_rate": 9.5e-06,
"loss": 2.4862399101257324,
"step": 362
},
{
"epoch": 0.28672985781990523,
"grad_norm": 15.424578264173912,
"learning_rate": 9.526315789473684e-06,
"loss": 3.046543598175049,
"step": 363
},
{
"epoch": 0.28751974723538704,
"grad_norm": 9.948975453360674,
"learning_rate": 9.552631578947369e-06,
"loss": 2.4152588844299316,
"step": 364
},
{
"epoch": 0.2883096366508689,
"grad_norm": 13.617591686696079,
"learning_rate": 9.578947368421054e-06,
"loss": 2.736586332321167,
"step": 365
},
{
"epoch": 0.2890995260663507,
"grad_norm": 15.14504958752738,
"learning_rate": 9.605263157894737e-06,
"loss": 2.4989566802978516,
"step": 366
},
{
"epoch": 0.28988941548183256,
"grad_norm": 10.799521972148789,
"learning_rate": 9.631578947368422e-06,
"loss": 3.044905424118042,
"step": 367
},
{
"epoch": 0.29067930489731436,
"grad_norm": 12.761463042249291,
"learning_rate": 9.657894736842106e-06,
"loss": 3.2041683197021484,
"step": 368
},
{
"epoch": 0.2914691943127962,
"grad_norm": 12.283682401493921,
"learning_rate": 9.68421052631579e-06,
"loss": 2.799464225769043,
"step": 369
},
{
"epoch": 0.292259083728278,
"grad_norm": 8.71212269177479,
"learning_rate": 9.710526315789474e-06,
"loss": 2.7430920600891113,
"step": 370
},
{
"epoch": 0.2930489731437599,
"grad_norm": 9.849085667279017,
"learning_rate": 9.736842105263159e-06,
"loss": 2.5595669746398926,
"step": 371
},
{
"epoch": 0.2938388625592417,
"grad_norm": 10.009176803122426,
"learning_rate": 9.763157894736844e-06,
"loss": 2.3133273124694824,
"step": 372
},
{
"epoch": 0.29462875197472355,
"grad_norm": 12.273926407605984,
"learning_rate": 9.789473684210527e-06,
"loss": 3.021390438079834,
"step": 373
},
{
"epoch": 0.29541864139020535,
"grad_norm": 11.93736332674034,
"learning_rate": 9.815789473684212e-06,
"loss": 2.2160844802856445,
"step": 374
},
{
"epoch": 0.2962085308056872,
"grad_norm": 12.560377306481286,
"learning_rate": 9.842105263157896e-06,
"loss": 3.1752209663391113,
"step": 375
},
{
"epoch": 0.296998420221169,
"grad_norm": 11.075691497999232,
"learning_rate": 9.868421052631579e-06,
"loss": 3.2403969764709473,
"step": 376
},
{
"epoch": 0.29778830963665087,
"grad_norm": 9.127329916365918,
"learning_rate": 9.894736842105264e-06,
"loss": 2.920243501663208,
"step": 377
},
{
"epoch": 0.2985781990521327,
"grad_norm": 20.657276819664517,
"learning_rate": 9.921052631578947e-06,
"loss": 2.298133134841919,
"step": 378
},
{
"epoch": 0.29936808846761453,
"grad_norm": 11.374882419716124,
"learning_rate": 9.947368421052632e-06,
"loss": 3.370616912841797,
"step": 379
},
{
"epoch": 0.3001579778830964,
"grad_norm": 62.541878662922215,
"learning_rate": 9.973684210526316e-06,
"loss": 2.8304061889648438,
"step": 380
},
{
"epoch": 0.3009478672985782,
"grad_norm": 12.325577555726225,
"learning_rate": 1e-05,
"loss": 2.761497974395752,
"step": 381
},
{
"epoch": 0.30173775671406006,
"grad_norm": 12.217449721945043,
"learning_rate": 9.999997887991768e-06,
"loss": 2.8636984825134277,
"step": 382
},
{
"epoch": 0.30252764612954186,
"grad_norm": 14.74250753809572,
"learning_rate": 9.999991551968855e-06,
"loss": 3.0699851512908936,
"step": 383
},
{
"epoch": 0.3033175355450237,
"grad_norm": 10.26093399476213,
"learning_rate": 9.999980991936614e-06,
"loss": 3.05659818649292,
"step": 384
},
{
"epoch": 0.3041074249605055,
"grad_norm": 9.938077108356492,
"learning_rate": 9.999966207903965e-06,
"loss": 2.55549955368042,
"step": 385
},
{
"epoch": 0.3048973143759874,
"grad_norm": 15.044911851031989,
"learning_rate": 9.999947199883402e-06,
"loss": 3.1818037033081055,
"step": 386
},
{
"epoch": 0.3056872037914692,
"grad_norm": 10.290661955924978,
"learning_rate": 9.999923967890976e-06,
"loss": 2.8169567584991455,
"step": 387
},
{
"epoch": 0.30647709320695105,
"grad_norm": 10.322447186873362,
"learning_rate": 9.999896511946318e-06,
"loss": 3.116442918777466,
"step": 388
},
{
"epoch": 0.30726698262243285,
"grad_norm": 11.292349498728926,
"learning_rate": 9.999864832072623e-06,
"loss": 3.246173858642578,
"step": 389
},
{
"epoch": 0.3080568720379147,
"grad_norm": 15.478127685526808,
"learning_rate": 9.999828928296653e-06,
"loss": 2.8075480461120605,
"step": 390
},
{
"epoch": 0.3088467614533965,
"grad_norm": 11.742069475711396,
"learning_rate": 9.999788800648741e-06,
"loss": 2.6594908237457275,
"step": 391
},
{
"epoch": 0.30963665086887837,
"grad_norm": 19.69321922275349,
"learning_rate": 9.999744449162785e-06,
"loss": 3.7354040145874023,
"step": 392
},
{
"epoch": 0.3104265402843602,
"grad_norm": 29.415130395985084,
"learning_rate": 9.999695873876255e-06,
"loss": 3.1815226078033447,
"step": 393
},
{
"epoch": 0.31121642969984203,
"grad_norm": 9.941742752266487,
"learning_rate": 9.999643074830185e-06,
"loss": 2.3940351009368896,
"step": 394
},
{
"epoch": 0.31200631911532384,
"grad_norm": 9.210678237061227,
"learning_rate": 9.999586052069184e-06,
"loss": 2.998382568359375,
"step": 395
},
{
"epoch": 0.3127962085308057,
"grad_norm": 10.901057718421931,
"learning_rate": 9.99952480564142e-06,
"loss": 3.067406177520752,
"step": 396
},
{
"epoch": 0.3135860979462875,
"grad_norm": 9.78128771641425,
"learning_rate": 9.999459335598639e-06,
"loss": 2.52431058883667,
"step": 397
},
{
"epoch": 0.31437598736176936,
"grad_norm": 14.851243022076401,
"learning_rate": 9.999389641996148e-06,
"loss": 3.1244137287139893,
"step": 398
},
{
"epoch": 0.31516587677725116,
"grad_norm": 14.329659712124773,
"learning_rate": 9.999315724892824e-06,
"loss": 2.4937219619750977,
"step": 399
},
{
"epoch": 0.315955766192733,
"grad_norm": 20.409817077031228,
"learning_rate": 9.999237584351112e-06,
"loss": 3.1036581993103027,
"step": 400
},
{
"epoch": 0.3167456556082148,
"grad_norm": 14.805880939701334,
"learning_rate": 9.999155220437027e-06,
"loss": 3.108419418334961,
"step": 401
},
{
"epoch": 0.3175355450236967,
"grad_norm": 14.542870401516955,
"learning_rate": 9.99906863322015e-06,
"loss": 2.667811632156372,
"step": 402
},
{
"epoch": 0.3183254344391785,
"grad_norm": 12.987665849474283,
"learning_rate": 9.99897782277363e-06,
"loss": 2.661196708679199,
"step": 403
},
{
"epoch": 0.31911532385466035,
"grad_norm": 6.198659599172178,
"learning_rate": 9.998882789174182e-06,
"loss": 2.734131336212158,
"step": 404
},
{
"epoch": 0.31990521327014215,
"grad_norm": 9.512514147413063,
"learning_rate": 9.998783532502094e-06,
"loss": 3.1050682067871094,
"step": 405
},
{
"epoch": 0.320695102685624,
"grad_norm": 12.163533280198134,
"learning_rate": 9.998680052841217e-06,
"loss": 3.1977107524871826,
"step": 406
},
{
"epoch": 0.32148499210110587,
"grad_norm": 14.380824912116953,
"learning_rate": 9.99857235027897e-06,
"loss": 2.267914056777954,
"step": 407
},
{
"epoch": 0.3222748815165877,
"grad_norm": 20.641763401030374,
"learning_rate": 9.998460424906343e-06,
"loss": 3.1761436462402344,
"step": 408
},
{
"epoch": 0.32306477093206953,
"grad_norm": 13.912380925992013,
"learning_rate": 9.998344276817888e-06,
"loss": 2.7362403869628906,
"step": 409
},
{
"epoch": 0.32385466034755134,
"grad_norm": 19.47995219931616,
"learning_rate": 9.998223906111728e-06,
"loss": 2.5798957347869873,
"step": 410
},
{
"epoch": 0.3246445497630332,
"grad_norm": 9.554629411150461,
"learning_rate": 9.998099312889553e-06,
"loss": 3.0284502506256104,
"step": 411
},
{
"epoch": 0.325434439178515,
"grad_norm": 7.846029378625489,
"learning_rate": 9.997970497256619e-06,
"loss": 3.201026201248169,
"step": 412
},
{
"epoch": 0.32622432859399686,
"grad_norm": 14.900346297835531,
"learning_rate": 9.997837459321753e-06,
"loss": 2.7072229385375977,
"step": 413
},
{
"epoch": 0.32701421800947866,
"grad_norm": 13.87299677542047,
"learning_rate": 9.997700199197342e-06,
"loss": 3.2912824153900146,
"step": 414
},
{
"epoch": 0.3278041074249605,
"grad_norm": 9.946081688834646,
"learning_rate": 9.997558716999345e-06,
"loss": 2.7330331802368164,
"step": 415
},
{
"epoch": 0.3285939968404423,
"grad_norm": 11.42849151295169,
"learning_rate": 9.997413012847288e-06,
"loss": 2.7119922637939453,
"step": 416
},
{
"epoch": 0.3293838862559242,
"grad_norm": 7.619660611663469,
"learning_rate": 9.997263086864261e-06,
"loss": 2.9227335453033447,
"step": 417
},
{
"epoch": 0.330173775671406,
"grad_norm": 8.897091541230195,
"learning_rate": 9.997108939176923e-06,
"loss": 2.9391653537750244,
"step": 418
},
{
"epoch": 0.33096366508688785,
"grad_norm": 12.48321978059039,
"learning_rate": 9.996950569915496e-06,
"loss": 2.7242140769958496,
"step": 419
},
{
"epoch": 0.33175355450236965,
"grad_norm": 7.856468794186075,
"learning_rate": 9.996787979213774e-06,
"loss": 2.9598379135131836,
"step": 420
},
{
"epoch": 0.3325434439178515,
"grad_norm": 6.881298773384626,
"learning_rate": 9.996621167209112e-06,
"loss": 2.7800540924072266,
"step": 421
},
{
"epoch": 0.3333333333333333,
"grad_norm": 8.554885683292087,
"learning_rate": 9.996450134042435e-06,
"loss": 3.363186836242676,
"step": 422
},
{
"epoch": 0.3341232227488152,
"grad_norm": 17.067634709237346,
"learning_rate": 9.99627487985823e-06,
"loss": 2.5413265228271484,
"step": 423
},
{
"epoch": 0.334913112164297,
"grad_norm": 7.715891787574891,
"learning_rate": 9.996095404804556e-06,
"loss": 2.6663031578063965,
"step": 424
},
{
"epoch": 0.33570300157977884,
"grad_norm": 12.33434757520458,
"learning_rate": 9.99591170903303e-06,
"loss": 3.798855781555176,
"step": 425
},
{
"epoch": 0.33649289099526064,
"grad_norm": 12.051508519336322,
"learning_rate": 9.995723792698841e-06,
"loss": 2.8377747535705566,
"step": 426
},
{
"epoch": 0.3372827804107425,
"grad_norm": 15.639973760945276,
"learning_rate": 9.995531655960742e-06,
"loss": 2.916019916534424,
"step": 427
},
{
"epoch": 0.3380726698262243,
"grad_norm": 19.40003707121016,
"learning_rate": 9.995335298981051e-06,
"loss": 3.0988028049468994,
"step": 428
},
{
"epoch": 0.33886255924170616,
"grad_norm": 8.979641220891013,
"learning_rate": 9.995134721925647e-06,
"loss": 3.1492342948913574,
"step": 429
},
{
"epoch": 0.33965244865718797,
"grad_norm": 21.875396313167574,
"learning_rate": 9.994929924963982e-06,
"loss": 2.8157858848571777,
"step": 430
},
{
"epoch": 0.3404423380726698,
"grad_norm": 10.410663939939454,
"learning_rate": 9.99472090826907e-06,
"loss": 2.898981809616089,
"step": 431
},
{
"epoch": 0.3412322274881517,
"grad_norm": 7.870464638075447,
"learning_rate": 9.994507672017483e-06,
"loss": 2.815958023071289,
"step": 432
},
{
"epoch": 0.3420221169036335,
"grad_norm": 10.074834498515466,
"learning_rate": 9.99429021638937e-06,
"loss": 2.8587734699249268,
"step": 433
},
{
"epoch": 0.34281200631911535,
"grad_norm": 9.479047537805325,
"learning_rate": 9.994068541568439e-06,
"loss": 2.758235216140747,
"step": 434
},
{
"epoch": 0.34360189573459715,
"grad_norm": 11.805213339073592,
"learning_rate": 9.993842647741955e-06,
"loss": 2.754237174987793,
"step": 435
},
{
"epoch": 0.344391785150079,
"grad_norm": 28.89113952525437,
"learning_rate": 9.993612535100759e-06,
"loss": 2.8837943077087402,
"step": 436
},
{
"epoch": 0.3451816745655608,
"grad_norm": 11.902001596459298,
"learning_rate": 9.993378203839248e-06,
"loss": 2.9341726303100586,
"step": 437
},
{
"epoch": 0.3459715639810427,
"grad_norm": 41.81687674626728,
"learning_rate": 9.99313965415539e-06,
"loss": 2.7734274864196777,
"step": 438
},
{
"epoch": 0.3467614533965245,
"grad_norm": 24.406866447867205,
"learning_rate": 9.992896886250708e-06,
"loss": 3.654956817626953,
"step": 439
},
{
"epoch": 0.34755134281200634,
"grad_norm": 9.751754855929297,
"learning_rate": 9.992649900330295e-06,
"loss": 2.4842309951782227,
"step": 440
},
{
"epoch": 0.34834123222748814,
"grad_norm": 9.90576591780518,
"learning_rate": 9.992398696602805e-06,
"loss": 2.8286516666412354,
"step": 441
},
{
"epoch": 0.34913112164297,
"grad_norm": 15.419437091625719,
"learning_rate": 9.992143275280458e-06,
"loss": 2.875483989715576,
"step": 442
},
{
"epoch": 0.3499210110584518,
"grad_norm": 19.78870286772306,
"learning_rate": 9.991883636579031e-06,
"loss": 2.9084625244140625,
"step": 443
},
{
"epoch": 0.35071090047393366,
"grad_norm": 12.586490948950289,
"learning_rate": 9.99161978071787e-06,
"loss": 3.2318763732910156,
"step": 444
},
{
"epoch": 0.35150078988941547,
"grad_norm": 14.996840973011029,
"learning_rate": 9.991351707919881e-06,
"loss": 2.615654468536377,
"step": 445
},
{
"epoch": 0.3522906793048973,
"grad_norm": 18.69675800078161,
"learning_rate": 9.991079418411534e-06,
"loss": 2.594451427459717,
"step": 446
},
{
"epoch": 0.35308056872037913,
"grad_norm": 12.66669407216749,
"learning_rate": 9.990802912422857e-06,
"loss": 2.9762067794799805,
"step": 447
},
{
"epoch": 0.353870458135861,
"grad_norm": 6.37331509167863,
"learning_rate": 9.990522190187446e-06,
"loss": 2.9721593856811523,
"step": 448
},
{
"epoch": 0.3546603475513428,
"grad_norm": 12.731115361238068,
"learning_rate": 9.990237251942455e-06,
"loss": 3.0602238178253174,
"step": 449
},
{
"epoch": 0.35545023696682465,
"grad_norm": 10.595337761744732,
"learning_rate": 9.9899480979286e-06,
"loss": 2.5146780014038086,
"step": 450
},
{
"epoch": 0.35624012638230645,
"grad_norm": 9.131974465956034,
"learning_rate": 9.98965472839016e-06,
"loss": 2.864349842071533,
"step": 451
},
{
"epoch": 0.3570300157977883,
"grad_norm": 9.313334991197689,
"learning_rate": 9.989357143574977e-06,
"loss": 3.1569554805755615,
"step": 452
},
{
"epoch": 0.3578199052132701,
"grad_norm": 9.952415070876715,
"learning_rate": 9.989055343734446e-06,
"loss": 2.114063262939453,
"step": 453
},
{
"epoch": 0.358609794628752,
"grad_norm": 8.442614576050127,
"learning_rate": 9.988749329123532e-06,
"loss": 2.6696736812591553,
"step": 454
},
{
"epoch": 0.3593996840442338,
"grad_norm": 12.468144651927648,
"learning_rate": 9.988439100000758e-06,
"loss": 2.951082468032837,
"step": 455
},
{
"epoch": 0.36018957345971564,
"grad_norm": 13.301749174192217,
"learning_rate": 9.988124656628205e-06,
"loss": 2.8643898963928223,
"step": 456
},
{
"epoch": 0.3609794628751975,
"grad_norm": 51.815990840364165,
"learning_rate": 9.987805999271517e-06,
"loss": 2.082789421081543,
"step": 457
},
{
"epoch": 0.3617693522906793,
"grad_norm": 14.907621177805696,
"learning_rate": 9.987483128199896e-06,
"loss": 2.7254719734191895,
"step": 458
},
{
"epoch": 0.36255924170616116,
"grad_norm": 11.168139294706775,
"learning_rate": 9.987156043686103e-06,
"loss": 2.920536994934082,
"step": 459
},
{
"epoch": 0.36334913112164297,
"grad_norm": 14.696725014468187,
"learning_rate": 9.986824746006463e-06,
"loss": 2.5195441246032715,
"step": 460
},
{
"epoch": 0.3641390205371248,
"grad_norm": 13.759499455594858,
"learning_rate": 9.986489235440858e-06,
"loss": 3.0576400756835938,
"step": 461
},
{
"epoch": 0.36492890995260663,
"grad_norm": 24.22268015695568,
"learning_rate": 9.986149512272723e-06,
"loss": 2.798210382461548,
"step": 462
},
{
"epoch": 0.3657187993680885,
"grad_norm": 7.164355232959107,
"learning_rate": 9.985805576789061e-06,
"loss": 3.2072739601135254,
"step": 463
},
{
"epoch": 0.3665086887835703,
"grad_norm": 21.200711435446056,
"learning_rate": 9.985457429280431e-06,
"loss": 2.2786130905151367,
"step": 464
},
{
"epoch": 0.36729857819905215,
"grad_norm": 10.245055778605597,
"learning_rate": 9.985105070040948e-06,
"loss": 2.6123218536376953,
"step": 465
},
{
"epoch": 0.36808846761453395,
"grad_norm": 11.013361642571633,
"learning_rate": 9.984748499368285e-06,
"loss": 2.8954989910125732,
"step": 466
},
{
"epoch": 0.3688783570300158,
"grad_norm": 11.61804983156601,
"learning_rate": 9.984387717563675e-06,
"loss": 2.308267116546631,
"step": 467
},
{
"epoch": 0.3696682464454976,
"grad_norm": 19.221327600533158,
"learning_rate": 9.984022724931908e-06,
"loss": 3.475597381591797,
"step": 468
},
{
"epoch": 0.3704581358609795,
"grad_norm": 11.854231560542186,
"learning_rate": 9.98365352178133e-06,
"loss": 3.1217355728149414,
"step": 469
},
{
"epoch": 0.3712480252764613,
"grad_norm": 6.403968704450684,
"learning_rate": 9.983280108423846e-06,
"loss": 3.116569995880127,
"step": 470
},
{
"epoch": 0.37203791469194314,
"grad_norm": 9.683074091826104,
"learning_rate": 9.982902485174917e-06,
"loss": 2.748913526535034,
"step": 471
},
{
"epoch": 0.37282780410742494,
"grad_norm": 45.29403837946451,
"learning_rate": 9.98252065235356e-06,
"loss": 2.734304428100586,
"step": 472
},
{
"epoch": 0.3736176935229068,
"grad_norm": 8.629169803711216,
"learning_rate": 9.982134610282348e-06,
"loss": 2.9549429416656494,
"step": 473
},
{
"epoch": 0.3744075829383886,
"grad_norm": 11.378526346658687,
"learning_rate": 9.98174435928741e-06,
"loss": 3.1600706577301025,
"step": 474
},
{
"epoch": 0.37519747235387046,
"grad_norm": 13.877969372280013,
"learning_rate": 9.981349899698433e-06,
"loss": 3.0187835693359375,
"step": 475
},
{
"epoch": 0.37598736176935227,
"grad_norm": 16.082987857555008,
"learning_rate": 9.98095123184866e-06,
"loss": 2.525953769683838,
"step": 476
},
{
"epoch": 0.3767772511848341,
"grad_norm": 15.97178916944899,
"learning_rate": 9.980548356074882e-06,
"loss": 2.4600391387939453,
"step": 477
},
{
"epoch": 0.37756714060031593,
"grad_norm": 11.222135044819899,
"learning_rate": 9.980141272717451e-06,
"loss": 3.0577778816223145,
"step": 478
},
{
"epoch": 0.3783570300157978,
"grad_norm": 10.38398589216634,
"learning_rate": 9.979729982120274e-06,
"loss": 2.5604796409606934,
"step": 479
},
{
"epoch": 0.3791469194312796,
"grad_norm": 28.714703496966944,
"learning_rate": 9.979314484630812e-06,
"loss": 2.344428300857544,
"step": 480
},
{
"epoch": 0.37993680884676145,
"grad_norm": 8.421863202990965,
"learning_rate": 9.978894780600072e-06,
"loss": 2.434558391571045,
"step": 481
},
{
"epoch": 0.3807266982622433,
"grad_norm": 8.802465948474383,
"learning_rate": 9.978470870382631e-06,
"loss": 2.9265592098236084,
"step": 482
},
{
"epoch": 0.3815165876777251,
"grad_norm": 16.360909810195515,
"learning_rate": 9.9780427543366e-06,
"loss": 2.2389657497406006,
"step": 483
},
{
"epoch": 0.382306477093207,
"grad_norm": 12.714530836347008,
"learning_rate": 9.977610432823661e-06,
"loss": 2.518057346343994,
"step": 484
},
{
"epoch": 0.3830963665086888,
"grad_norm": 12.593833443055948,
"learning_rate": 9.977173906209035e-06,
"loss": 2.7258565425872803,
"step": 485
},
{
"epoch": 0.38388625592417064,
"grad_norm": 12.935614043323996,
"learning_rate": 9.976733174861504e-06,
"loss": 2.57004451751709,
"step": 486
},
{
"epoch": 0.38467614533965244,
"grad_norm": 6.953226645670275,
"learning_rate": 9.9762882391534e-06,
"loss": 2.808042526245117,
"step": 487
},
{
"epoch": 0.3854660347551343,
"grad_norm": 7.174411200974892,
"learning_rate": 9.975839099460603e-06,
"loss": 2.4963083267211914,
"step": 488
},
{
"epoch": 0.3862559241706161,
"grad_norm": 8.613913754322418,
"learning_rate": 9.97538575616255e-06,
"loss": 2.676271438598633,
"step": 489
},
{
"epoch": 0.38704581358609796,
"grad_norm": 11.876597542420834,
"learning_rate": 9.974928209642225e-06,
"loss": 2.7614307403564453,
"step": 490
},
{
"epoch": 0.38783570300157977,
"grad_norm": 11.093132079984253,
"learning_rate": 9.974466460286168e-06,
"loss": 2.624708414077759,
"step": 491
},
{
"epoch": 0.3886255924170616,
"grad_norm": 10.27538264079467,
"learning_rate": 9.974000508484464e-06,
"loss": 2.5740928649902344,
"step": 492
},
{
"epoch": 0.38941548183254343,
"grad_norm": 12.05113290396417,
"learning_rate": 9.97353035463075e-06,
"loss": 3.132807731628418,
"step": 493
},
{
"epoch": 0.3902053712480253,
"grad_norm": 10.616071459580557,
"learning_rate": 9.973055999122217e-06,
"loss": 3.1886236667633057,
"step": 494
},
{
"epoch": 0.3909952606635071,
"grad_norm": 7.166433394997447,
"learning_rate": 9.972577442359596e-06,
"loss": 2.458066463470459,
"step": 495
},
{
"epoch": 0.39178515007898895,
"grad_norm": 18.273152599081556,
"learning_rate": 9.97209468474718e-06,
"loss": 2.686516761779785,
"step": 496
},
{
"epoch": 0.39257503949447076,
"grad_norm": 18.046414595846677,
"learning_rate": 9.9716077266928e-06,
"loss": 3.268564224243164,
"step": 497
},
{
"epoch": 0.3933649289099526,
"grad_norm": 20.306060211042794,
"learning_rate": 9.971116568607843e-06,
"loss": 2.7214527130126953,
"step": 498
},
{
"epoch": 0.3941548183254344,
"grad_norm": 12.50174696099143,
"learning_rate": 9.970621210907236e-06,
"loss": 2.9584507942199707,
"step": 499
},
{
"epoch": 0.3949447077409163,
"grad_norm": 12.497953323135684,
"learning_rate": 9.970121654009464e-06,
"loss": 2.7275800704956055,
"step": 500
},
{
"epoch": 0.3957345971563981,
"grad_norm": 8.96830588462741,
"learning_rate": 9.969617898336552e-06,
"loss": 2.4311466217041016,
"step": 501
},
{
"epoch": 0.39652448657187994,
"grad_norm": 13.616711842115256,
"learning_rate": 9.969109944314075e-06,
"loss": 2.9500246047973633,
"step": 502
},
{
"epoch": 0.39731437598736175,
"grad_norm": 17.251138991228274,
"learning_rate": 9.968597792371151e-06,
"loss": 3.416146755218506,
"step": 503
},
{
"epoch": 0.3981042654028436,
"grad_norm": 19.00639226186363,
"learning_rate": 9.968081442940454e-06,
"loss": 3.451007604598999,
"step": 504
},
{
"epoch": 0.3988941548183254,
"grad_norm": 7.711636407435598,
"learning_rate": 9.967560896458192e-06,
"loss": 2.7228689193725586,
"step": 505
},
{
"epoch": 0.39968404423380727,
"grad_norm": 12.583460827994319,
"learning_rate": 9.967036153364127e-06,
"loss": 2.8506970405578613,
"step": 506
},
{
"epoch": 0.4004739336492891,
"grad_norm": 12.221260607836053,
"learning_rate": 9.966507214101565e-06,
"loss": 2.9692885875701904,
"step": 507
},
{
"epoch": 0.40126382306477093,
"grad_norm": 12.905892162067822,
"learning_rate": 9.965974079117351e-06,
"loss": 3.444052219390869,
"step": 508
},
{
"epoch": 0.4020537124802528,
"grad_norm": 11.169864619254174,
"learning_rate": 9.965436748861883e-06,
"loss": 3.00361967086792,
"step": 509
},
{
"epoch": 0.4028436018957346,
"grad_norm": 18.923855282243036,
"learning_rate": 9.9648952237891e-06,
"loss": 2.179131507873535,
"step": 510
},
{
"epoch": 0.40363349131121645,
"grad_norm": 17.18325282035086,
"learning_rate": 9.964349504356481e-06,
"loss": 2.724170446395874,
"step": 511
},
{
"epoch": 0.40442338072669826,
"grad_norm": 9.348305791417955,
"learning_rate": 9.963799591025054e-06,
"loss": 2.658226490020752,
"step": 512
},
{
"epoch": 0.4052132701421801,
"grad_norm": 14.084370825683834,
"learning_rate": 9.963245484259384e-06,
"loss": 3.301179885864258,
"step": 513
},
{
"epoch": 0.4060031595576619,
"grad_norm": 9.131570579938595,
"learning_rate": 9.96268718452759e-06,
"loss": 2.7031455039978027,
"step": 514
},
{
"epoch": 0.4067930489731438,
"grad_norm": 7.293530938243252,
"learning_rate": 9.962124692301315e-06,
"loss": 2.310668468475342,
"step": 515
},
{
"epoch": 0.4075829383886256,
"grad_norm": 28.464034341123952,
"learning_rate": 9.961558008055764e-06,
"loss": 2.6063344478607178,
"step": 516
},
{
"epoch": 0.40837282780410744,
"grad_norm": 7.628841921327853,
"learning_rate": 9.960987132269668e-06,
"loss": 2.0414226055145264,
"step": 517
},
{
"epoch": 0.40916271721958924,
"grad_norm": 14.44432111132087,
"learning_rate": 9.960412065425308e-06,
"loss": 2.770200252532959,
"step": 518
},
{
"epoch": 0.4099526066350711,
"grad_norm": 11.901956808416232,
"learning_rate": 9.959832808008498e-06,
"loss": 2.8997509479522705,
"step": 519
},
{
"epoch": 0.4107424960505529,
"grad_norm": 14.40790924911134,
"learning_rate": 9.959249360508598e-06,
"loss": 2.9758782386779785,
"step": 520
},
{
"epoch": 0.41153238546603477,
"grad_norm": 9.725811994478915,
"learning_rate": 9.95866172341851e-06,
"loss": 2.986323356628418,
"step": 521
},
{
"epoch": 0.41232227488151657,
"grad_norm": 16.22979712148631,
"learning_rate": 9.95806989723467e-06,
"loss": 2.5895464420318604,
"step": 522
},
{
"epoch": 0.41311216429699843,
"grad_norm": 13.923489867833995,
"learning_rate": 9.957473882457051e-06,
"loss": 2.687991142272949,
"step": 523
},
{
"epoch": 0.41390205371248023,
"grad_norm": 26.95251995610371,
"learning_rate": 9.956873679589173e-06,
"loss": 2.4715166091918945,
"step": 524
},
{
"epoch": 0.4146919431279621,
"grad_norm": 14.00634199685074,
"learning_rate": 9.956269289138088e-06,
"loss": 2.5624163150787354,
"step": 525
},
{
"epoch": 0.4154818325434439,
"grad_norm": 29.26485590244888,
"learning_rate": 9.955660711614386e-06,
"loss": 2.6949751377105713,
"step": 526
},
{
"epoch": 0.41627172195892576,
"grad_norm": 13.78447550231186,
"learning_rate": 9.955047947532194e-06,
"loss": 3.0492568016052246,
"step": 527
},
{
"epoch": 0.41706161137440756,
"grad_norm": 9.027477437625507,
"learning_rate": 9.954430997409181e-06,
"loss": 3.8118910789489746,
"step": 528
},
{
"epoch": 0.4178515007898894,
"grad_norm": 8.039786133247507,
"learning_rate": 9.953809861766547e-06,
"loss": 3.058897018432617,
"step": 529
},
{
"epoch": 0.4186413902053712,
"grad_norm": 16.995294036547257,
"learning_rate": 9.953184541129029e-06,
"loss": 3.157442808151245,
"step": 530
},
{
"epoch": 0.4194312796208531,
"grad_norm": 14.009340053986007,
"learning_rate": 9.952555036024898e-06,
"loss": 2.9258034229278564,
"step": 531
},
{
"epoch": 0.42022116903633494,
"grad_norm": 12.583256188813682,
"learning_rate": 9.951921346985966e-06,
"loss": 2.772176742553711,
"step": 532
},
{
"epoch": 0.42101105845181674,
"grad_norm": 14.778680678932332,
"learning_rate": 9.951283474547574e-06,
"loss": 3.1442911624908447,
"step": 533
},
{
"epoch": 0.4218009478672986,
"grad_norm": 10.191686358833875,
"learning_rate": 9.950641419248595e-06,
"loss": 2.6074397563934326,
"step": 534
},
{
"epoch": 0.4225908372827804,
"grad_norm": 13.596508027495249,
"learning_rate": 9.949995181631444e-06,
"loss": 2.861325740814209,
"step": 535
},
{
"epoch": 0.42338072669826227,
"grad_norm": 16.87570821527581,
"learning_rate": 9.949344762242064e-06,
"loss": 2.9847991466522217,
"step": 536
},
{
"epoch": 0.42417061611374407,
"grad_norm": 15.098978095560872,
"learning_rate": 9.94869016162993e-06,
"loss": 3.360105037689209,
"step": 537
},
{
"epoch": 0.42496050552922593,
"grad_norm": 11.916401679971019,
"learning_rate": 9.948031380348051e-06,
"loss": 2.6311533451080322,
"step": 538
},
{
"epoch": 0.42575039494470773,
"grad_norm": 15.393668192114527,
"learning_rate": 9.94736841895297e-06,
"loss": 2.572305202484131,
"step": 539
},
{
"epoch": 0.4265402843601896,
"grad_norm": 21.88619737730292,
"learning_rate": 9.946701278004755e-06,
"loss": 2.6645431518554688,
"step": 540
},
{
"epoch": 0.4273301737756714,
"grad_norm": 14.623832761629139,
"learning_rate": 9.946029958067012e-06,
"loss": 2.8375582695007324,
"step": 541
},
{
"epoch": 0.42812006319115326,
"grad_norm": 11.959588905376497,
"learning_rate": 9.945354459706873e-06,
"loss": 2.8177828788757324,
"step": 542
},
{
"epoch": 0.42890995260663506,
"grad_norm": 11.801706214734535,
"learning_rate": 9.944674783495e-06,
"loss": 3.4021530151367188,
"step": 543
},
{
"epoch": 0.4296998420221169,
"grad_norm": 8.145218059279367,
"learning_rate": 9.94399093000559e-06,
"loss": 2.4974822998046875,
"step": 544
},
{
"epoch": 0.4304897314375987,
"grad_norm": 9.708825005813761,
"learning_rate": 9.94330289981636e-06,
"loss": 2.775845527648926,
"step": 545
},
{
"epoch": 0.4312796208530806,
"grad_norm": 22.963925015075688,
"learning_rate": 9.942610693508564e-06,
"loss": 2.5559940338134766,
"step": 546
},
{
"epoch": 0.4320695102685624,
"grad_norm": 9.353349856037912,
"learning_rate": 9.941914311666976e-06,
"loss": 2.7999205589294434,
"step": 547
},
{
"epoch": 0.43285939968404424,
"grad_norm": 13.107963046441109,
"learning_rate": 9.941213754879904e-06,
"loss": 2.6367478370666504,
"step": 548
},
{
"epoch": 0.43364928909952605,
"grad_norm": 9.146316137469308,
"learning_rate": 9.940509023739181e-06,
"loss": 2.6994175910949707,
"step": 549
},
{
"epoch": 0.4344391785150079,
"grad_norm": 6.883523518701926,
"learning_rate": 9.939800118840167e-06,
"loss": 2.807130813598633,
"step": 550
},
{
"epoch": 0.4352290679304897,
"grad_norm": 9.612172104441717,
"learning_rate": 9.939087040781743e-06,
"loss": 2.729193687438965,
"step": 551
},
{
"epoch": 0.43601895734597157,
"grad_norm": 24.67008747020927,
"learning_rate": 9.938369790166325e-06,
"loss": 2.557534694671631,
"step": 552
},
{
"epoch": 0.4368088467614534,
"grad_norm": 8.054347498191312,
"learning_rate": 9.937648367599845e-06,
"loss": 2.8205268383026123,
"step": 553
},
{
"epoch": 0.43759873617693523,
"grad_norm": 14.422963472461976,
"learning_rate": 9.936922773691764e-06,
"loss": 2.7715141773223877,
"step": 554
},
{
"epoch": 0.43838862559241704,
"grad_norm": 7.484053892132038,
"learning_rate": 9.93619300905507e-06,
"loss": 2.254258155822754,
"step": 555
},
{
"epoch": 0.4391785150078989,
"grad_norm": 11.183683079441906,
"learning_rate": 9.935459074306261e-06,
"loss": 2.682985544204712,
"step": 556
},
{
"epoch": 0.4399684044233807,
"grad_norm": 9.16127826816926,
"learning_rate": 9.934720970065379e-06,
"loss": 2.539468288421631,
"step": 557
},
{
"epoch": 0.44075829383886256,
"grad_norm": 7.141623413765421,
"learning_rate": 9.93397869695597e-06,
"loss": 2.5426435470581055,
"step": 558
},
{
"epoch": 0.4415481832543444,
"grad_norm": 10.23954487534604,
"learning_rate": 9.93323225560511e-06,
"loss": 3.2652475833892822,
"step": 559
},
{
"epoch": 0.4423380726698262,
"grad_norm": 16.167362326761815,
"learning_rate": 9.932481646643395e-06,
"loss": 2.5560061931610107,
"step": 560
},
{
"epoch": 0.4431279620853081,
"grad_norm": 24.50508286675025,
"learning_rate": 9.931726870704943e-06,
"loss": 2.4910902976989746,
"step": 561
},
{
"epoch": 0.4439178515007899,
"grad_norm": 13.272899493408493,
"learning_rate": 9.930967928427389e-06,
"loss": 3.1928012371063232,
"step": 562
},
{
"epoch": 0.44470774091627174,
"grad_norm": 11.029176665335946,
"learning_rate": 9.930204820451892e-06,
"loss": 2.045280933380127,
"step": 563
},
{
"epoch": 0.44549763033175355,
"grad_norm": 19.40400206586639,
"learning_rate": 9.92943754742313e-06,
"loss": 2.734166145324707,
"step": 564
},
{
"epoch": 0.4462875197472354,
"grad_norm": 7.706073509198339,
"learning_rate": 9.928666109989294e-06,
"loss": 2.8022024631500244,
"step": 565
},
{
"epoch": 0.4470774091627172,
"grad_norm": 14.62203101954205,
"learning_rate": 9.927890508802096e-06,
"loss": 2.7379016876220703,
"step": 566
},
{
"epoch": 0.44786729857819907,
"grad_norm": 17.181389624231375,
"learning_rate": 9.92711074451677e-06,
"loss": 2.999567985534668,
"step": 567
},
{
"epoch": 0.4486571879936809,
"grad_norm": 14.992846352483394,
"learning_rate": 9.926326817792065e-06,
"loss": 2.635314464569092,
"step": 568
},
{
"epoch": 0.44944707740916273,
"grad_norm": 7.6279407925730816,
"learning_rate": 9.925538729290239e-06,
"loss": 2.3740317821502686,
"step": 569
},
{
"epoch": 0.45023696682464454,
"grad_norm": 15.752498964379894,
"learning_rate": 9.924746479677075e-06,
"loss": 2.9476394653320312,
"step": 570
},
{
"epoch": 0.4510268562401264,
"grad_norm": 7.93326356210501,
"learning_rate": 9.923950069621868e-06,
"loss": 3.3303630352020264,
"step": 571
},
{
"epoch": 0.4518167456556082,
"grad_norm": 29.90614936882604,
"learning_rate": 9.923149499797429e-06,
"loss": 3.194509267807007,
"step": 572
},
{
"epoch": 0.45260663507109006,
"grad_norm": 13.371064507421321,
"learning_rate": 9.92234477088008e-06,
"loss": 2.94869327545166,
"step": 573
},
{
"epoch": 0.45339652448657186,
"grad_norm": 15.897418580785546,
"learning_rate": 9.921535883549658e-06,
"loss": 2.7056546211242676,
"step": 574
},
{
"epoch": 0.4541864139020537,
"grad_norm": 26.124918845816804,
"learning_rate": 9.920722838489515e-06,
"loss": 3.060375452041626,
"step": 575
},
{
"epoch": 0.4549763033175355,
"grad_norm": 18.882545145062025,
"learning_rate": 9.919905636386516e-06,
"loss": 3.0005345344543457,
"step": 576
},
{
"epoch": 0.4557661927330174,
"grad_norm": 13.37328310128464,
"learning_rate": 9.919084277931033e-06,
"loss": 2.5772323608398438,
"step": 577
},
{
"epoch": 0.4565560821484992,
"grad_norm": 17.390743534176387,
"learning_rate": 9.918258763816954e-06,
"loss": 3.439105749130249,
"step": 578
},
{
"epoch": 0.45734597156398105,
"grad_norm": 13.795578608418804,
"learning_rate": 9.917429094741676e-06,
"loss": 2.9797146320343018,
"step": 579
},
{
"epoch": 0.45813586097946285,
"grad_norm": 14.581579263670818,
"learning_rate": 9.916595271406104e-06,
"loss": 3.3291659355163574,
"step": 580
},
{
"epoch": 0.4589257503949447,
"grad_norm": 23.055562889090734,
"learning_rate": 9.915757294514658e-06,
"loss": 3.4787819385528564,
"step": 581
},
{
"epoch": 0.4597156398104265,
"grad_norm": 11.454956025296493,
"learning_rate": 9.91491516477526e-06,
"loss": 2.543201446533203,
"step": 582
},
{
"epoch": 0.46050552922590837,
"grad_norm": 27.44348974944536,
"learning_rate": 9.91406888289935e-06,
"loss": 3.8477420806884766,
"step": 583
},
{
"epoch": 0.46129541864139023,
"grad_norm": 13.748326155704126,
"learning_rate": 9.913218449601862e-06,
"loss": 3.204080581665039,
"step": 584
},
{
"epoch": 0.46208530805687204,
"grad_norm": 14.852830560317845,
"learning_rate": 9.912363865601252e-06,
"loss": 2.0701780319213867,
"step": 585
},
{
"epoch": 0.4628751974723539,
"grad_norm": 9.286607736299267,
"learning_rate": 9.911505131619467e-06,
"loss": 2.616168975830078,
"step": 586
},
{
"epoch": 0.4636650868878357,
"grad_norm": 27.1312162902194,
"learning_rate": 9.910642248381978e-06,
"loss": 3.5931811332702637,
"step": 587
},
{
"epoch": 0.46445497630331756,
"grad_norm": 32.41215406453979,
"learning_rate": 9.909775216617746e-06,
"loss": 2.6403136253356934,
"step": 588
},
{
"epoch": 0.46524486571879936,
"grad_norm": 16.462651257709005,
"learning_rate": 9.908904037059242e-06,
"loss": 1.806509017944336,
"step": 589
},
{
"epoch": 0.4660347551342812,
"grad_norm": 8.054217161807381,
"learning_rate": 9.908028710442443e-06,
"loss": 2.955305337905884,
"step": 590
},
{
"epoch": 0.466824644549763,
"grad_norm": 13.079572202568693,
"learning_rate": 9.907149237506825e-06,
"loss": 3.071561098098755,
"step": 591
},
{
"epoch": 0.4676145339652449,
"grad_norm": 17.039788808514484,
"learning_rate": 9.906265618995375e-06,
"loss": 3.0196356773376465,
"step": 592
},
{
"epoch": 0.4684044233807267,
"grad_norm": 13.364691523127064,
"learning_rate": 9.905377855654574e-06,
"loss": 2.70352840423584,
"step": 593
},
{
"epoch": 0.46919431279620855,
"grad_norm": 10.365123705943594,
"learning_rate": 9.904485948234406e-06,
"loss": 2.782586097717285,
"step": 594
},
{
"epoch": 0.46998420221169035,
"grad_norm": 17.914445810601254,
"learning_rate": 9.903589897488358e-06,
"loss": 3.854835271835327,
"step": 595
},
{
"epoch": 0.4707740916271722,
"grad_norm": 14.706414319504669,
"learning_rate": 9.902689704173418e-06,
"loss": 3.0983946323394775,
"step": 596
},
{
"epoch": 0.471563981042654,
"grad_norm": 11.041528398876812,
"learning_rate": 9.901785369050073e-06,
"loss": 3.148883819580078,
"step": 597
},
{
"epoch": 0.47235387045813587,
"grad_norm": 12.764570521775047,
"learning_rate": 9.900876892882303e-06,
"loss": 3.0621113777160645,
"step": 598
},
{
"epoch": 0.4731437598736177,
"grad_norm": 12.917586958199212,
"learning_rate": 9.899964276437596e-06,
"loss": 2.7622828483581543,
"step": 599
},
{
"epoch": 0.47393364928909953,
"grad_norm": 11.372886554785106,
"learning_rate": 9.899047520486935e-06,
"loss": 2.379685878753662,
"step": 600
},
{
"epoch": 0.47472353870458134,
"grad_norm": 8.25416016120882,
"learning_rate": 9.898126625804796e-06,
"loss": 2.2554409503936768,
"step": 601
},
{
"epoch": 0.4755134281200632,
"grad_norm": 13.238723315824029,
"learning_rate": 9.897201593169153e-06,
"loss": 2.7117209434509277,
"step": 602
},
{
"epoch": 0.476303317535545,
"grad_norm": 11.785948359638766,
"learning_rate": 9.896272423361479e-06,
"loss": 2.219001531600952,
"step": 603
},
{
"epoch": 0.47709320695102686,
"grad_norm": 16.426783052438104,
"learning_rate": 9.895339117166737e-06,
"loss": 3.105238199234009,
"step": 604
},
{
"epoch": 0.47788309636650866,
"grad_norm": 9.983370358512682,
"learning_rate": 9.894401675373388e-06,
"loss": 2.7501213550567627,
"step": 605
},
{
"epoch": 0.4786729857819905,
"grad_norm": 11.613745339741977,
"learning_rate": 9.89346009877339e-06,
"loss": 3.157028913497925,
"step": 606
},
{
"epoch": 0.4794628751974723,
"grad_norm": 16.033920286391126,
"learning_rate": 9.892514388162183e-06,
"loss": 2.930591106414795,
"step": 607
},
{
"epoch": 0.4802527646129542,
"grad_norm": 26.306838660431477,
"learning_rate": 9.89156454433871e-06,
"loss": 2.365173816680908,
"step": 608
},
{
"epoch": 0.48104265402843605,
"grad_norm": 8.551782014277038,
"learning_rate": 9.890610568105401e-06,
"loss": 2.737978935241699,
"step": 609
},
{
"epoch": 0.48183254344391785,
"grad_norm": 6.191554963386605,
"learning_rate": 9.889652460268183e-06,
"loss": 1.0541880130767822,
"step": 610
},
{
"epoch": 0.4826224328593997,
"grad_norm": 10.343101459770976,
"learning_rate": 9.888690221636462e-06,
"loss": 3.482835054397583,
"step": 611
},
{
"epoch": 0.4834123222748815,
"grad_norm": 7.536217244705736,
"learning_rate": 9.887723853023144e-06,
"loss": 2.714404582977295,
"step": 612
},
{
"epoch": 0.48420221169036337,
"grad_norm": 11.909381848673933,
"learning_rate": 9.88675335524462e-06,
"loss": 2.518251895904541,
"step": 613
},
{
"epoch": 0.4849921011058452,
"grad_norm": 19.55484435219003,
"learning_rate": 9.885778729120771e-06,
"loss": 3.3546159267425537,
"step": 614
},
{
"epoch": 0.48578199052132703,
"grad_norm": 13.61585851649587,
"learning_rate": 9.884799975474961e-06,
"loss": 3.282747745513916,
"step": 615
},
{
"epoch": 0.48657187993680884,
"grad_norm": 7.245085293733777,
"learning_rate": 9.883817095134048e-06,
"loss": 2.7314577102661133,
"step": 616
},
{
"epoch": 0.4873617693522907,
"grad_norm": 9.753488601463083,
"learning_rate": 9.882830088928368e-06,
"loss": 2.8968541622161865,
"step": 617
},
{
"epoch": 0.4881516587677725,
"grad_norm": 10.570209792818952,
"learning_rate": 9.881838957691752e-06,
"loss": 2.766514539718628,
"step": 618
},
{
"epoch": 0.48894154818325436,
"grad_norm": 10.820407163130954,
"learning_rate": 9.880843702261506e-06,
"loss": 2.4016904830932617,
"step": 619
},
{
"epoch": 0.48973143759873616,
"grad_norm": 11.038883967400233,
"learning_rate": 9.87984432347843e-06,
"loss": 2.8720149993896484,
"step": 620
},
{
"epoch": 0.490521327014218,
"grad_norm": 11.813323160870127,
"learning_rate": 9.8788408221868e-06,
"loss": 2.9305214881896973,
"step": 621
},
{
"epoch": 0.4913112164296998,
"grad_norm": 11.366049566856011,
"learning_rate": 9.877833199234378e-06,
"loss": 2.9653875827789307,
"step": 622
},
{
"epoch": 0.4921011058451817,
"grad_norm": 12.438353507803086,
"learning_rate": 9.876821455472405e-06,
"loss": 2.3867058753967285,
"step": 623
},
{
"epoch": 0.4928909952606635,
"grad_norm": 8.498213125601112,
"learning_rate": 9.875805591755608e-06,
"loss": 3.2036352157592773,
"step": 624
},
{
"epoch": 0.49368088467614535,
"grad_norm": 16.30479309681846,
"learning_rate": 9.874785608942192e-06,
"loss": 3.305636167526245,
"step": 625
},
{
"epoch": 0.49447077409162715,
"grad_norm": 10.081140854214283,
"learning_rate": 9.87376150789384e-06,
"loss": 3.041412353515625,
"step": 626
},
{
"epoch": 0.495260663507109,
"grad_norm": 64.71900103986658,
"learning_rate": 9.872733289475717e-06,
"loss": 4.324435710906982,
"step": 627
},
{
"epoch": 0.4960505529225908,
"grad_norm": 11.539415904080359,
"learning_rate": 9.871700954556464e-06,
"loss": 3.1219332218170166,
"step": 628
},
{
"epoch": 0.4968404423380727,
"grad_norm": 16.008989800922443,
"learning_rate": 9.870664504008205e-06,
"loss": 2.5995893478393555,
"step": 629
},
{
"epoch": 0.4976303317535545,
"grad_norm": 11.383945681171209,
"learning_rate": 9.869623938706531e-06,
"loss": 2.9473705291748047,
"step": 630
},
{
"epoch": 0.49842022116903634,
"grad_norm": 14.730134044674989,
"learning_rate": 9.868579259530519e-06,
"loss": 3.243873357772827,
"step": 631
},
{
"epoch": 0.49921011058451814,
"grad_norm": 8.390549227817651,
"learning_rate": 9.867530467362718e-06,
"loss": 2.4504904747009277,
"step": 632
},
{
"epoch": 0.5,
"grad_norm": 8.424703770575638,
"learning_rate": 9.866477563089148e-06,
"loss": 2.6318535804748535,
"step": 633
},
{
"epoch": 0.5007898894154819,
"grad_norm": 9.38333524987037,
"learning_rate": 9.865420547599308e-06,
"loss": 2.7379918098449707,
"step": 634
},
{
"epoch": 0.5015797788309637,
"grad_norm": 9.704149060081368,
"learning_rate": 9.864359421786168e-06,
"loss": 2.304293632507324,
"step": 635
},
{
"epoch": 0.5023696682464455,
"grad_norm": 24.08311236031377,
"learning_rate": 9.863294186546168e-06,
"loss": 2.332653522491455,
"step": 636
},
{
"epoch": 0.5031595576619273,
"grad_norm": 16.210594683251617,
"learning_rate": 9.862224842779225e-06,
"loss": 3.0899691581726074,
"step": 637
},
{
"epoch": 0.5039494470774092,
"grad_norm": 14.171269286760387,
"learning_rate": 9.861151391388726e-06,
"loss": 2.624315023422241,
"step": 638
},
{
"epoch": 0.504739336492891,
"grad_norm": 17.155749747348096,
"learning_rate": 9.86007383328152e-06,
"loss": 3.289152145385742,
"step": 639
},
{
"epoch": 0.5055292259083728,
"grad_norm": 8.077042869666613,
"learning_rate": 9.858992169367939e-06,
"loss": 2.8146300315856934,
"step": 640
},
{
"epoch": 0.5063191153238547,
"grad_norm": 14.900885791368967,
"learning_rate": 9.857906400561771e-06,
"loss": 3.192298173904419,
"step": 641
},
{
"epoch": 0.5071090047393365,
"grad_norm": 9.42772244547937,
"learning_rate": 9.856816527780279e-06,
"loss": 2.958979845046997,
"step": 642
},
{
"epoch": 0.5078988941548184,
"grad_norm": 19.45725727165277,
"learning_rate": 9.855722551944192e-06,
"loss": 2.5792651176452637,
"step": 643
},
{
"epoch": 0.5086887835703001,
"grad_norm": 18.71191352944223,
"learning_rate": 9.854624473977702e-06,
"loss": 2.2687480449676514,
"step": 644
},
{
"epoch": 0.509478672985782,
"grad_norm": 5.973081965855647,
"learning_rate": 9.85352229480847e-06,
"loss": 2.5880727767944336,
"step": 645
},
{
"epoch": 0.5102685624012638,
"grad_norm": 7.964099363507532,
"learning_rate": 9.852416015367622e-06,
"loss": 3.0370497703552246,
"step": 646
},
{
"epoch": 0.5110584518167457,
"grad_norm": 6.554920362859979,
"learning_rate": 9.851305636589745e-06,
"loss": 2.9287662506103516,
"step": 647
},
{
"epoch": 0.5118483412322274,
"grad_norm": 8.324682375342665,
"learning_rate": 9.85019115941289e-06,
"loss": 2.7224721908569336,
"step": 648
},
{
"epoch": 0.5126382306477093,
"grad_norm": 15.108696475255462,
"learning_rate": 9.849072584778572e-06,
"loss": 3.304979085922241,
"step": 649
},
{
"epoch": 0.5134281200631912,
"grad_norm": 13.13698436229945,
"learning_rate": 9.847949913631767e-06,
"loss": 1.872714877128601,
"step": 650
},
{
"epoch": 0.514218009478673,
"grad_norm": 14.549850333742206,
"learning_rate": 9.84682314692091e-06,
"loss": 3.086406707763672,
"step": 651
},
{
"epoch": 0.5150078988941548,
"grad_norm": 14.573017384190178,
"learning_rate": 9.845692285597898e-06,
"loss": 3.119309186935425,
"step": 652
},
{
"epoch": 0.5157977883096366,
"grad_norm": 14.70328848938873,
"learning_rate": 9.844557330618087e-06,
"loss": 3.3144378662109375,
"step": 653
},
{
"epoch": 0.5165876777251185,
"grad_norm": 13.577075929710624,
"learning_rate": 9.843418282940291e-06,
"loss": 3.093888282775879,
"step": 654
},
{
"epoch": 0.5173775671406003,
"grad_norm": 16.48466621944885,
"learning_rate": 9.842275143526779e-06,
"loss": 3.2132608890533447,
"step": 655
},
{
"epoch": 0.5181674565560821,
"grad_norm": 21.816699436754334,
"learning_rate": 9.841127913343281e-06,
"loss": 2.8770318031311035,
"step": 656
},
{
"epoch": 0.518957345971564,
"grad_norm": 18.527709516001913,
"learning_rate": 9.83997659335898e-06,
"loss": 3.0780622959136963,
"step": 657
},
{
"epoch": 0.5197472353870458,
"grad_norm": 12.06728106207623,
"learning_rate": 9.838821184546513e-06,
"loss": 2.7250850200653076,
"step": 658
},
{
"epoch": 0.5205371248025277,
"grad_norm": 24.07545499285517,
"learning_rate": 9.837661687881976e-06,
"loss": 2.6896378993988037,
"step": 659
},
{
"epoch": 0.5213270142180095,
"grad_norm": 14.021969692199526,
"learning_rate": 9.836498104344916e-06,
"loss": 2.549968719482422,
"step": 660
},
{
"epoch": 0.5221169036334913,
"grad_norm": 10.925462746063516,
"learning_rate": 9.835330434918329e-06,
"loss": 2.809274673461914,
"step": 661
},
{
"epoch": 0.5229067930489731,
"grad_norm": 13.630283976038868,
"learning_rate": 9.83415868058867e-06,
"loss": 3.4539241790771484,
"step": 662
},
{
"epoch": 0.523696682464455,
"grad_norm": 12.004271103191083,
"learning_rate": 9.832982842345838e-06,
"loss": 3.2775259017944336,
"step": 663
},
{
"epoch": 0.5244865718799369,
"grad_norm": 12.05995942930735,
"learning_rate": 9.831802921183184e-06,
"loss": 2.543905735015869,
"step": 664
},
{
"epoch": 0.5252764612954186,
"grad_norm": 8.986701835621098,
"learning_rate": 9.830618918097514e-06,
"loss": 2.7053022384643555,
"step": 665
},
{
"epoch": 0.5260663507109005,
"grad_norm": 9.733936760368861,
"learning_rate": 9.829430834089072e-06,
"loss": 2.9009079933166504,
"step": 666
},
{
"epoch": 0.5268562401263823,
"grad_norm": 9.705816192138302,
"learning_rate": 9.82823867016156e-06,
"loss": 2.320451259613037,
"step": 667
},
{
"epoch": 0.5276461295418642,
"grad_norm": 15.366368478511447,
"learning_rate": 9.82704242732212e-06,
"loss": 3.4952645301818848,
"step": 668
},
{
"epoch": 0.5284360189573459,
"grad_norm": 15.006524588925533,
"learning_rate": 9.825842106581343e-06,
"loss": 2.6732113361358643,
"step": 669
},
{
"epoch": 0.5292259083728278,
"grad_norm": 9.72530845064569,
"learning_rate": 9.824637708953262e-06,
"loss": 2.7073092460632324,
"step": 670
},
{
"epoch": 0.5300157977883097,
"grad_norm": 57.6393618147052,
"learning_rate": 9.823429235455357e-06,
"loss": 2.821194887161255,
"step": 671
},
{
"epoch": 0.5308056872037915,
"grad_norm": 15.587439690063317,
"learning_rate": 9.822216687108549e-06,
"loss": 3.2857871055603027,
"step": 672
},
{
"epoch": 0.5315955766192733,
"grad_norm": 5.803845483605398,
"learning_rate": 9.821000064937205e-06,
"loss": 2.699526309967041,
"step": 673
},
{
"epoch": 0.5323854660347551,
"grad_norm": 8.386330014964896,
"learning_rate": 9.81977936996913e-06,
"loss": 2.623192071914673,
"step": 674
},
{
"epoch": 0.533175355450237,
"grad_norm": 11.975531675577631,
"learning_rate": 9.818554603235574e-06,
"loss": 2.8475778102874756,
"step": 675
},
{
"epoch": 0.5339652448657188,
"grad_norm": 8.008863647269184,
"learning_rate": 9.81732576577122e-06,
"loss": 2.2658133506774902,
"step": 676
},
{
"epoch": 0.5347551342812006,
"grad_norm": 22.741549269978968,
"learning_rate": 9.816092858614197e-06,
"loss": 3.2006266117095947,
"step": 677
},
{
"epoch": 0.5355450236966824,
"grad_norm": 11.748680083922181,
"learning_rate": 9.814855882806068e-06,
"loss": 2.7071900367736816,
"step": 678
},
{
"epoch": 0.5363349131121643,
"grad_norm": 15.879587406556047,
"learning_rate": 9.813614839391831e-06,
"loss": 3.224722385406494,
"step": 679
},
{
"epoch": 0.5371248025276462,
"grad_norm": 9.796469130582556,
"learning_rate": 9.812369729419928e-06,
"loss": 3.0697150230407715,
"step": 680
},
{
"epoch": 0.5379146919431279,
"grad_norm": 14.580990160396306,
"learning_rate": 9.811120553942232e-06,
"loss": 3.0138320922851562,
"step": 681
},
{
"epoch": 0.5387045813586098,
"grad_norm": 8.267187675857448,
"learning_rate": 9.809867314014047e-06,
"loss": 2.831322193145752,
"step": 682
},
{
"epoch": 0.5394944707740916,
"grad_norm": 7.636617838042198,
"learning_rate": 9.808610010694118e-06,
"loss": 3.239677906036377,
"step": 683
},
{
"epoch": 0.5402843601895735,
"grad_norm": 15.391979519107375,
"learning_rate": 9.807348645044617e-06,
"loss": 2.747056245803833,
"step": 684
},
{
"epoch": 0.5410742496050553,
"grad_norm": 8.961485412404242,
"learning_rate": 9.806083218131148e-06,
"loss": 2.910431385040283,
"step": 685
},
{
"epoch": 0.5418641390205371,
"grad_norm": 14.197136726081288,
"learning_rate": 9.804813731022753e-06,
"loss": 3.1174066066741943,
"step": 686
},
{
"epoch": 0.542654028436019,
"grad_norm": 17.168792272156345,
"learning_rate": 9.803540184791894e-06,
"loss": 3.201021671295166,
"step": 687
},
{
"epoch": 0.5434439178515008,
"grad_norm": 16.249151769463698,
"learning_rate": 9.80226258051447e-06,
"loss": 3.176429510116577,
"step": 688
},
{
"epoch": 0.5442338072669827,
"grad_norm": 13.891045745287295,
"learning_rate": 9.800980919269803e-06,
"loss": 2.7803795337677,
"step": 689
},
{
"epoch": 0.5450236966824644,
"grad_norm": 10.877077331774473,
"learning_rate": 9.799695202140647e-06,
"loss": 2.882291555404663,
"step": 690
},
{
"epoch": 0.5458135860979463,
"grad_norm": 9.592760171635328,
"learning_rate": 9.798405430213177e-06,
"loss": 2.8458828926086426,
"step": 691
},
{
"epoch": 0.5466034755134281,
"grad_norm": 7.884425647446356,
"learning_rate": 9.797111604577e-06,
"loss": 2.5656301975250244,
"step": 692
},
{
"epoch": 0.54739336492891,
"grad_norm": 10.716521460356288,
"learning_rate": 9.795813726325142e-06,
"loss": 2.761523485183716,
"step": 693
},
{
"epoch": 0.5481832543443917,
"grad_norm": 11.360091917539375,
"learning_rate": 9.794511796554055e-06,
"loss": 2.7499184608459473,
"step": 694
},
{
"epoch": 0.5489731437598736,
"grad_norm": 23.055753815263646,
"learning_rate": 9.793205816363616e-06,
"loss": 2.517162799835205,
"step": 695
},
{
"epoch": 0.5497630331753555,
"grad_norm": 7.78885839561945,
"learning_rate": 9.791895786857118e-06,
"loss": 2.723165273666382,
"step": 696
},
{
"epoch": 0.5505529225908373,
"grad_norm": 17.967883170098016,
"learning_rate": 9.79058170914128e-06,
"loss": 2.6987109184265137,
"step": 697
},
{
"epoch": 0.5513428120063191,
"grad_norm": 11.709595647250415,
"learning_rate": 9.789263584326238e-06,
"loss": 2.288817882537842,
"step": 698
},
{
"epoch": 0.5521327014218009,
"grad_norm": 7.605046056867549,
"learning_rate": 9.78794141352555e-06,
"loss": 2.7125701904296875,
"step": 699
},
{
"epoch": 0.5529225908372828,
"grad_norm": 10.47514502255061,
"learning_rate": 9.786615197856188e-06,
"loss": 2.7359495162963867,
"step": 700
},
{
"epoch": 0.5537124802527646,
"grad_norm": 14.91063318894891,
"learning_rate": 9.785284938438545e-06,
"loss": 3.3785290718078613,
"step": 701
},
{
"epoch": 0.5545023696682464,
"grad_norm": 8.501053428871433,
"learning_rate": 9.783950636396429e-06,
"loss": 2.2032179832458496,
"step": 702
},
{
"epoch": 0.5552922590837283,
"grad_norm": 8.382387828104997,
"learning_rate": 9.78261229285706e-06,
"loss": 2.5567541122436523,
"step": 703
},
{
"epoch": 0.5560821484992101,
"grad_norm": 6.833003481377768,
"learning_rate": 9.781269908951079e-06,
"loss": 2.9519448280334473,
"step": 704
},
{
"epoch": 0.556872037914692,
"grad_norm": 10.767129048941534,
"learning_rate": 9.779923485812534e-06,
"loss": 2.9880781173706055,
"step": 705
},
{
"epoch": 0.5576619273301737,
"grad_norm": 17.977394830366933,
"learning_rate": 9.778573024578886e-06,
"loss": 2.571939706802368,
"step": 706
},
{
"epoch": 0.5584518167456556,
"grad_norm": 8.839985291634292,
"learning_rate": 9.777218526391013e-06,
"loss": 2.6147358417510986,
"step": 707
},
{
"epoch": 0.5592417061611374,
"grad_norm": 16.290139197222373,
"learning_rate": 9.775859992393198e-06,
"loss": 2.765791654586792,
"step": 708
},
{
"epoch": 0.5600315955766193,
"grad_norm": 9.812939254834504,
"learning_rate": 9.774497423733134e-06,
"loss": 2.9214420318603516,
"step": 709
},
{
"epoch": 0.5608214849921012,
"grad_norm": 10.32637589864547,
"learning_rate": 9.773130821561923e-06,
"loss": 2.793147563934326,
"step": 710
},
{
"epoch": 0.5616113744075829,
"grad_norm": 8.09381965198076,
"learning_rate": 9.771760187034076e-06,
"loss": 3.287661552429199,
"step": 711
},
{
"epoch": 0.5624012638230648,
"grad_norm": 35.57847572787897,
"learning_rate": 9.770385521307511e-06,
"loss": 2.639596462249756,
"step": 712
},
{
"epoch": 0.5631911532385466,
"grad_norm": 10.298454720332643,
"learning_rate": 9.769006825543547e-06,
"loss": 2.0149660110473633,
"step": 713
},
{
"epoch": 0.5639810426540285,
"grad_norm": 9.21174002933949,
"learning_rate": 9.767624100906915e-06,
"loss": 2.675302743911743,
"step": 714
},
{
"epoch": 0.5647709320695102,
"grad_norm": 7.382352205544447,
"learning_rate": 9.766237348565741e-06,
"loss": 1.6485764980316162,
"step": 715
},
{
"epoch": 0.5655608214849921,
"grad_norm": 10.607210159075297,
"learning_rate": 9.76484656969156e-06,
"loss": 2.4519925117492676,
"step": 716
},
{
"epoch": 0.566350710900474,
"grad_norm": 10.50163998316087,
"learning_rate": 9.763451765459307e-06,
"loss": 2.3074722290039062,
"step": 717
},
{
"epoch": 0.5671406003159558,
"grad_norm": 10.624503499088616,
"learning_rate": 9.762052937047318e-06,
"loss": 2.695051670074463,
"step": 718
},
{
"epoch": 0.5679304897314376,
"grad_norm": 15.616632678838926,
"learning_rate": 9.760650085637322e-06,
"loss": 3.360673666000366,
"step": 719
},
{
"epoch": 0.5687203791469194,
"grad_norm": 8.375766262844625,
"learning_rate": 9.75924321241446e-06,
"loss": 2.7661333084106445,
"step": 720
},
{
"epoch": 0.5695102685624013,
"grad_norm": 16.894936615307905,
"learning_rate": 9.75783231856726e-06,
"loss": 2.6800551414489746,
"step": 721
},
{
"epoch": 0.5703001579778831,
"grad_norm": 9.306984501448646,
"learning_rate": 9.756417405287649e-06,
"loss": 3.1795547008514404,
"step": 722
},
{
"epoch": 0.5710900473933649,
"grad_norm": 9.638215383453232,
"learning_rate": 9.754998473770952e-06,
"loss": 3.0728039741516113,
"step": 723
},
{
"epoch": 0.5718799368088467,
"grad_norm": 18.971875030373713,
"learning_rate": 9.753575525215885e-06,
"loss": 3.101027250289917,
"step": 724
},
{
"epoch": 0.5726698262243286,
"grad_norm": 9.703129422537145,
"learning_rate": 9.752148560824562e-06,
"loss": 2.3094897270202637,
"step": 725
},
{
"epoch": 0.5734597156398105,
"grad_norm": 15.734256139808767,
"learning_rate": 9.750717581802486e-06,
"loss": 2.910053253173828,
"step": 726
},
{
"epoch": 0.5742496050552922,
"grad_norm": 16.302208461415216,
"learning_rate": 9.749282589358553e-06,
"loss": 3.1184496879577637,
"step": 727
},
{
"epoch": 0.5750394944707741,
"grad_norm": 12.9735057677299,
"learning_rate": 9.747843584705047e-06,
"loss": 2.873502731323242,
"step": 728
},
{
"epoch": 0.5758293838862559,
"grad_norm": 22.638733634704586,
"learning_rate": 9.746400569057648e-06,
"loss": 2.4251301288604736,
"step": 729
},
{
"epoch": 0.5766192733017378,
"grad_norm": 7.163678590109422,
"learning_rate": 9.744953543635417e-06,
"loss": 2.799077033996582,
"step": 730
},
{
"epoch": 0.5774091627172195,
"grad_norm": 15.90505006968332,
"learning_rate": 9.74350250966081e-06,
"loss": 3.6660590171813965,
"step": 731
},
{
"epoch": 0.5781990521327014,
"grad_norm": 7.61368151252301,
"learning_rate": 9.742047468359661e-06,
"loss": 3.0964913368225098,
"step": 732
},
{
"epoch": 0.5789889415481833,
"grad_norm": 7.331962193745934,
"learning_rate": 9.740588420961194e-06,
"loss": 2.7701282501220703,
"step": 733
},
{
"epoch": 0.5797788309636651,
"grad_norm": 10.939975135494327,
"learning_rate": 9.739125368698019e-06,
"loss": 2.936520576477051,
"step": 734
},
{
"epoch": 0.580568720379147,
"grad_norm": 11.923772537267181,
"learning_rate": 9.737658312806128e-06,
"loss": 2.714221477508545,
"step": 735
},
{
"epoch": 0.5813586097946287,
"grad_norm": 8.343105964786538,
"learning_rate": 9.73618725452489e-06,
"loss": 2.6335134506225586,
"step": 736
},
{
"epoch": 0.5821484992101106,
"grad_norm": 11.578813272698921,
"learning_rate": 9.734712195097068e-06,
"loss": 3.0548324584960938,
"step": 737
},
{
"epoch": 0.5829383886255924,
"grad_norm": 8.178172803096258,
"learning_rate": 9.733233135768789e-06,
"loss": 2.455691337585449,
"step": 738
},
{
"epoch": 0.5837282780410743,
"grad_norm": 8.208384722056353,
"learning_rate": 9.731750077789572e-06,
"loss": 2.85522198677063,
"step": 739
},
{
"epoch": 0.584518167456556,
"grad_norm": 14.357432181582325,
"learning_rate": 9.730263022412307e-06,
"loss": 2.7620186805725098,
"step": 740
},
{
"epoch": 0.5853080568720379,
"grad_norm": 19.304824374340818,
"learning_rate": 9.728771970893262e-06,
"loss": 2.078908920288086,
"step": 741
},
{
"epoch": 0.5860979462875198,
"grad_norm": 8.634363379681329,
"learning_rate": 9.727276924492088e-06,
"loss": 2.5789947509765625,
"step": 742
},
{
"epoch": 0.5868878357030016,
"grad_norm": 13.987495203502483,
"learning_rate": 9.725777884471798e-06,
"loss": 2.6600892543792725,
"step": 743
},
{
"epoch": 0.5876777251184834,
"grad_norm": 21.907120600463696,
"learning_rate": 9.724274852098792e-06,
"loss": 3.122257709503174,
"step": 744
},
{
"epoch": 0.5884676145339652,
"grad_norm": 12.86185834923555,
"learning_rate": 9.722767828642831e-06,
"loss": 2.9660885334014893,
"step": 745
},
{
"epoch": 0.5892575039494471,
"grad_norm": 15.825928885727478,
"learning_rate": 9.721256815377059e-06,
"loss": 2.9355366230010986,
"step": 746
},
{
"epoch": 0.590047393364929,
"grad_norm": 17.506752397926537,
"learning_rate": 9.719741813577982e-06,
"loss": 2.7380142211914062,
"step": 747
},
{
"epoch": 0.5908372827804107,
"grad_norm": 13.905446136498991,
"learning_rate": 9.718222824525476e-06,
"loss": 2.624443292617798,
"step": 748
},
{
"epoch": 0.5916271721958926,
"grad_norm": 10.950226102749792,
"learning_rate": 9.716699849502794e-06,
"loss": 3.353207588195801,
"step": 749
},
{
"epoch": 0.5924170616113744,
"grad_norm": 10.01361583202192,
"learning_rate": 9.715172889796546e-06,
"loss": 2.4462380409240723,
"step": 750
},
{
"epoch": 0.5932069510268563,
"grad_norm": 10.868695455875331,
"learning_rate": 9.713641946696713e-06,
"loss": 2.470088005065918,
"step": 751
},
{
"epoch": 0.593996840442338,
"grad_norm": 12.24308594192275,
"learning_rate": 9.712107021496641e-06,
"loss": 2.746387004852295,
"step": 752
},
{
"epoch": 0.5947867298578199,
"grad_norm": 10.293898753873405,
"learning_rate": 9.710568115493041e-06,
"loss": 2.893784284591675,
"step": 753
},
{
"epoch": 0.5955766192733017,
"grad_norm": 8.791452259605602,
"learning_rate": 9.709025229985986e-06,
"loss": 3.845496654510498,
"step": 754
},
{
"epoch": 0.5963665086887836,
"grad_norm": 7.952120961759487,
"learning_rate": 9.707478366278911e-06,
"loss": 3.286113739013672,
"step": 755
},
{
"epoch": 0.5971563981042654,
"grad_norm": 11.052387334163443,
"learning_rate": 9.705927525678608e-06,
"loss": 2.54490327835083,
"step": 756
},
{
"epoch": 0.5979462875197472,
"grad_norm": 28.59255694909707,
"learning_rate": 9.704372709495237e-06,
"loss": 2.303287982940674,
"step": 757
},
{
"epoch": 0.5987361769352291,
"grad_norm": 12.717484973265515,
"learning_rate": 9.702813919042308e-06,
"loss": 2.5255141258239746,
"step": 758
},
{
"epoch": 0.5995260663507109,
"grad_norm": 10.644626496040491,
"learning_rate": 9.701251155636696e-06,
"loss": 2.6174449920654297,
"step": 759
},
{
"epoch": 0.6003159557661928,
"grad_norm": 10.420680692777719,
"learning_rate": 9.699684420598622e-06,
"loss": 3.0751430988311768,
"step": 760
},
{
"epoch": 0.6011058451816745,
"grad_norm": 17.7407887464059,
"learning_rate": 9.698113715251678e-06,
"loss": 3.1690831184387207,
"step": 761
},
{
"epoch": 0.6018957345971564,
"grad_norm": 6.8133884021840165,
"learning_rate": 9.696539040922794e-06,
"loss": 2.992917060852051,
"step": 762
},
{
"epoch": 0.6026856240126383,
"grad_norm": 14.288096164471353,
"learning_rate": 9.694960398942264e-06,
"loss": 2.862287998199463,
"step": 763
},
{
"epoch": 0.6034755134281201,
"grad_norm": 8.475183410580591,
"learning_rate": 9.693377790643728e-06,
"loss": 2.5695481300354004,
"step": 764
},
{
"epoch": 0.6042654028436019,
"grad_norm": 12.006246518434427,
"learning_rate": 9.69179121736418e-06,
"loss": 2.456130266189575,
"step": 765
},
{
"epoch": 0.6050552922590837,
"grad_norm": 10.187805065838019,
"learning_rate": 9.69020068044396e-06,
"loss": 2.798001766204834,
"step": 766
},
{
"epoch": 0.6058451816745656,
"grad_norm": 12.030587021942077,
"learning_rate": 9.68860618122676e-06,
"loss": 3.1254353523254395,
"step": 767
},
{
"epoch": 0.6066350710900474,
"grad_norm": 8.960697365970546,
"learning_rate": 9.68700772105962e-06,
"loss": 2.784362316131592,
"step": 768
},
{
"epoch": 0.6074249605055292,
"grad_norm": 12.104781783849731,
"learning_rate": 9.685405301292924e-06,
"loss": 2.487422466278076,
"step": 769
},
{
"epoch": 0.608214849921011,
"grad_norm": 11.735082585197036,
"learning_rate": 9.683798923280398e-06,
"loss": 2.749908208847046,
"step": 770
},
{
"epoch": 0.6090047393364929,
"grad_norm": 8.39933378207941,
"learning_rate": 9.68218858837912e-06,
"loss": 2.5542333126068115,
"step": 771
},
{
"epoch": 0.6097946287519748,
"grad_norm": 13.203001273703372,
"learning_rate": 9.680574297949503e-06,
"loss": 2.381009578704834,
"step": 772
},
{
"epoch": 0.6105845181674565,
"grad_norm": 10.227737175443485,
"learning_rate": 9.678956053355306e-06,
"loss": 2.797962188720703,
"step": 773
},
{
"epoch": 0.6113744075829384,
"grad_norm": 19.644909570407606,
"learning_rate": 9.677333855963627e-06,
"loss": 2.3713326454162598,
"step": 774
},
{
"epoch": 0.6121642969984202,
"grad_norm": 10.802978377889708,
"learning_rate": 9.675707707144906e-06,
"loss": 3.7946083545684814,
"step": 775
},
{
"epoch": 0.6129541864139021,
"grad_norm": 9.800404823560639,
"learning_rate": 9.674077608272916e-06,
"loss": 2.2313640117645264,
"step": 776
},
{
"epoch": 0.6137440758293838,
"grad_norm": 10.197415931944706,
"learning_rate": 9.67244356072477e-06,
"loss": 2.564563035964966,
"step": 777
},
{
"epoch": 0.6145339652448657,
"grad_norm": 13.443842890508776,
"learning_rate": 9.670805565880919e-06,
"loss": 2.851914405822754,
"step": 778
},
{
"epoch": 0.6153238546603476,
"grad_norm": 19.85673680273832,
"learning_rate": 9.669163625125143e-06,
"loss": 2.2651009559631348,
"step": 779
},
{
"epoch": 0.6161137440758294,
"grad_norm": 12.884481490463953,
"learning_rate": 9.667517739844563e-06,
"loss": 2.5570054054260254,
"step": 780
},
{
"epoch": 0.6169036334913112,
"grad_norm": 17.453234670736126,
"learning_rate": 9.665867911429625e-06,
"loss": 2.5283799171447754,
"step": 781
},
{
"epoch": 0.617693522906793,
"grad_norm": 18.936216590713382,
"learning_rate": 9.664214141274111e-06,
"loss": 3.2821831703186035,
"step": 782
},
{
"epoch": 0.6184834123222749,
"grad_norm": 12.29731371387329,
"learning_rate": 9.662556430775132e-06,
"loss": 3.238713264465332,
"step": 783
},
{
"epoch": 0.6192733017377567,
"grad_norm": 13.021393099946007,
"learning_rate": 9.660894781333126e-06,
"loss": 2.938838005065918,
"step": 784
},
{
"epoch": 0.6200631911532386,
"grad_norm": 6.931741915013856,
"learning_rate": 9.65922919435186e-06,
"loss": 2.7250375747680664,
"step": 785
},
{
"epoch": 0.6208530805687204,
"grad_norm": 12.605958216464781,
"learning_rate": 9.657559671238428e-06,
"loss": 2.9273529052734375,
"step": 786
},
{
"epoch": 0.6216429699842022,
"grad_norm": 24.241779337105008,
"learning_rate": 9.65588621340325e-06,
"loss": 2.6192431449890137,
"step": 787
},
{
"epoch": 0.6224328593996841,
"grad_norm": 15.774873829312497,
"learning_rate": 9.654208822260064e-06,
"loss": 2.6683297157287598,
"step": 788
},
{
"epoch": 0.6232227488151659,
"grad_norm": 13.55562535680502,
"learning_rate": 9.65252749922594e-06,
"loss": 3.453798294067383,
"step": 789
},
{
"epoch": 0.6240126382306477,
"grad_norm": 10.437339665014067,
"learning_rate": 9.650842245721265e-06,
"loss": 2.660048007965088,
"step": 790
},
{
"epoch": 0.6248025276461295,
"grad_norm": 7.373849740564372,
"learning_rate": 9.649153063169747e-06,
"loss": 3.181802272796631,
"step": 791
},
{
"epoch": 0.6255924170616114,
"grad_norm": 10.97034956730053,
"learning_rate": 9.647459952998409e-06,
"loss": 2.794236183166504,
"step": 792
},
{
"epoch": 0.6263823064770933,
"grad_norm": 13.289064458186381,
"learning_rate": 9.6457629166376e-06,
"loss": 2.934234142303467,
"step": 793
},
{
"epoch": 0.627172195892575,
"grad_norm": 6.66173575936569,
"learning_rate": 9.644061955520981e-06,
"loss": 2.53916072845459,
"step": 794
},
{
"epoch": 0.6279620853080569,
"grad_norm": 8.21096858166868,
"learning_rate": 9.642357071085527e-06,
"loss": 3.4347705841064453,
"step": 795
},
{
"epoch": 0.6287519747235387,
"grad_norm": 10.267840042109265,
"learning_rate": 9.640648264771532e-06,
"loss": 2.589984655380249,
"step": 796
},
{
"epoch": 0.6295418641390206,
"grad_norm": 23.740776784719007,
"learning_rate": 9.638935538022605e-06,
"loss": 2.2766027450561523,
"step": 797
},
{
"epoch": 0.6303317535545023,
"grad_norm": 10.175389822962396,
"learning_rate": 9.637218892285656e-06,
"loss": 2.140416383743286,
"step": 798
},
{
"epoch": 0.6311216429699842,
"grad_norm": 10.006330545127017,
"learning_rate": 9.635498329010918e-06,
"loss": 2.6404151916503906,
"step": 799
},
{
"epoch": 0.631911532385466,
"grad_norm": 15.860393911321083,
"learning_rate": 9.633773849651926e-06,
"loss": 3.4515304565429688,
"step": 800
},
{
"epoch": 0.6327014218009479,
"grad_norm": 10.447911887203587,
"learning_rate": 9.632045455665528e-06,
"loss": 2.6439762115478516,
"step": 801
},
{
"epoch": 0.6334913112164297,
"grad_norm": 9.938315980267259,
"learning_rate": 9.630313148511876e-06,
"loss": 2.633496046066284,
"step": 802
},
{
"epoch": 0.6342812006319115,
"grad_norm": 14.127497750541355,
"learning_rate": 9.628576929654427e-06,
"loss": 3.0772128105163574,
"step": 803
},
{
"epoch": 0.6350710900473934,
"grad_norm": 9.475057560148757,
"learning_rate": 9.626836800559948e-06,
"loss": 2.8075199127197266,
"step": 804
},
{
"epoch": 0.6358609794628752,
"grad_norm": 15.2657530938932,
"learning_rate": 9.625092762698502e-06,
"loss": 2.2376973628997803,
"step": 805
},
{
"epoch": 0.636650868878357,
"grad_norm": 13.5038856684049,
"learning_rate": 9.623344817543462e-06,
"loss": 3.09859037399292,
"step": 806
},
{
"epoch": 0.6374407582938388,
"grad_norm": 15.533786385781745,
"learning_rate": 9.621592966571493e-06,
"loss": 2.4869344234466553,
"step": 807
},
{
"epoch": 0.6382306477093207,
"grad_norm": 13.72530582074668,
"learning_rate": 9.619837211262569e-06,
"loss": 3.0674853324890137,
"step": 808
},
{
"epoch": 0.6390205371248026,
"grad_norm": 23.557899308984517,
"learning_rate": 9.618077553099954e-06,
"loss": 3.3668880462646484,
"step": 809
},
{
"epoch": 0.6398104265402843,
"grad_norm": 18.763852919675887,
"learning_rate": 9.616313993570215e-06,
"loss": 2.933554172515869,
"step": 810
},
{
"epoch": 0.6406003159557662,
"grad_norm": 8.365194309767189,
"learning_rate": 9.614546534163214e-06,
"loss": 2.367485523223877,
"step": 811
},
{
"epoch": 0.641390205371248,
"grad_norm": 8.781343171527238,
"learning_rate": 9.612775176372104e-06,
"loss": 2.1504476070404053,
"step": 812
},
{
"epoch": 0.6421800947867299,
"grad_norm": 14.453301784955004,
"learning_rate": 9.610999921693335e-06,
"loss": 3.482938766479492,
"step": 813
},
{
"epoch": 0.6429699842022117,
"grad_norm": 11.750899386703743,
"learning_rate": 9.60922077162665e-06,
"loss": 2.383328437805176,
"step": 814
},
{
"epoch": 0.6437598736176935,
"grad_norm": 9.466726112052974,
"learning_rate": 9.607437727675077e-06,
"loss": 2.781550884246826,
"step": 815
},
{
"epoch": 0.6445497630331753,
"grad_norm": 12.278841985932612,
"learning_rate": 9.60565079134494e-06,
"loss": 2.6321635246276855,
"step": 816
},
{
"epoch": 0.6453396524486572,
"grad_norm": 16.454023020237027,
"learning_rate": 9.60385996414585e-06,
"loss": 3.094892978668213,
"step": 817
},
{
"epoch": 0.6461295418641391,
"grad_norm": 7.492293232454839,
"learning_rate": 9.6020652475907e-06,
"loss": 2.353990077972412,
"step": 818
},
{
"epoch": 0.6469194312796208,
"grad_norm": 7.938504133286718,
"learning_rate": 9.600266643195675e-06,
"loss": 2.719548225402832,
"step": 819
},
{
"epoch": 0.6477093206951027,
"grad_norm": 34.78257617710777,
"learning_rate": 9.598464152480241e-06,
"loss": 2.486771821975708,
"step": 820
},
{
"epoch": 0.6484992101105845,
"grad_norm": 7.695149398961293,
"learning_rate": 9.596657776967149e-06,
"loss": 2.359746217727661,
"step": 821
},
{
"epoch": 0.6492890995260664,
"grad_norm": 10.381251787843585,
"learning_rate": 9.594847518182428e-06,
"loss": 2.8774003982543945,
"step": 822
},
{
"epoch": 0.6500789889415481,
"grad_norm": 21.800739755225305,
"learning_rate": 9.593033377655396e-06,
"loss": 2.1589415073394775,
"step": 823
},
{
"epoch": 0.65086887835703,
"grad_norm": 16.66199847491689,
"learning_rate": 9.59121535691864e-06,
"loss": 3.30254864692688,
"step": 824
},
{
"epoch": 0.6516587677725119,
"grad_norm": 10.566975533918288,
"learning_rate": 9.589393457508032e-06,
"loss": 2.679553508758545,
"step": 825
},
{
"epoch": 0.6524486571879937,
"grad_norm": 19.121309989507864,
"learning_rate": 9.587567680962716e-06,
"loss": 3.172027111053467,
"step": 826
},
{
"epoch": 0.6532385466034755,
"grad_norm": 11.16230347524207,
"learning_rate": 9.58573802882512e-06,
"loss": 2.9337282180786133,
"step": 827
},
{
"epoch": 0.6540284360189573,
"grad_norm": 7.133910740967563,
"learning_rate": 9.583904502640936e-06,
"loss": 2.826122283935547,
"step": 828
},
{
"epoch": 0.6548183254344392,
"grad_norm": 14.35666338571649,
"learning_rate": 9.582067103959131e-06,
"loss": 3.0313868522644043,
"step": 829
},
{
"epoch": 0.655608214849921,
"grad_norm": 11.532262508822264,
"learning_rate": 9.58022583433195e-06,
"loss": 2.7884521484375,
"step": 830
},
{
"epoch": 0.6563981042654028,
"grad_norm": 6.874943848075909,
"learning_rate": 9.5783806953149e-06,
"loss": 2.96806001663208,
"step": 831
},
{
"epoch": 0.6571879936808847,
"grad_norm": 12.146810880091056,
"learning_rate": 9.576531688466762e-06,
"loss": 2.976937770843506,
"step": 832
},
{
"epoch": 0.6579778830963665,
"grad_norm": 7.921587511162627,
"learning_rate": 9.574678815349585e-06,
"loss": 2.6038804054260254,
"step": 833
},
{
"epoch": 0.6587677725118484,
"grad_norm": 17.969038197005215,
"learning_rate": 9.572822077528678e-06,
"loss": 3.1494526863098145,
"step": 834
},
{
"epoch": 0.6595576619273301,
"grad_norm": 8.31536269495529,
"learning_rate": 9.570961476572624e-06,
"loss": 2.9516241550445557,
"step": 835
},
{
"epoch": 0.660347551342812,
"grad_norm": 6.4993286688590715,
"learning_rate": 9.56909701405326e-06,
"loss": 2.8607451915740967,
"step": 836
},
{
"epoch": 0.6611374407582938,
"grad_norm": 7.552272245609234,
"learning_rate": 9.567228691545696e-06,
"loss": 2.5649495124816895,
"step": 837
},
{
"epoch": 0.6619273301737757,
"grad_norm": 14.48881948712848,
"learning_rate": 9.565356510628291e-06,
"loss": 2.513335943222046,
"step": 838
},
{
"epoch": 0.6627172195892576,
"grad_norm": 9.081903072300518,
"learning_rate": 9.563480472882673e-06,
"loss": 2.9949398040771484,
"step": 839
},
{
"epoch": 0.6635071090047393,
"grad_norm": 12.074800830284559,
"learning_rate": 9.561600579893723e-06,
"loss": 2.6771364212036133,
"step": 840
},
{
"epoch": 0.6642969984202212,
"grad_norm": 11.321384919112033,
"learning_rate": 9.559716833249583e-06,
"loss": 2.8205018043518066,
"step": 841
},
{
"epoch": 0.665086887835703,
"grad_norm": 9.775240458791433,
"learning_rate": 9.557829234541647e-06,
"loss": 2.9774630069732666,
"step": 842
},
{
"epoch": 0.6658767772511849,
"grad_norm": 11.521035463074744,
"learning_rate": 9.555937785364563e-06,
"loss": 2.579075574874878,
"step": 843
},
{
"epoch": 0.6666666666666666,
"grad_norm": 9.432485493960403,
"learning_rate": 9.554042487316237e-06,
"loss": 2.726024627685547,
"step": 844
},
{
"epoch": 0.6674565560821485,
"grad_norm": 14.134144139013555,
"learning_rate": 9.552143341997822e-06,
"loss": 2.8715529441833496,
"step": 845
},
{
"epoch": 0.6682464454976303,
"grad_norm": 8.902377494487911,
"learning_rate": 9.55024035101372e-06,
"loss": 2.533745527267456,
"step": 846
},
{
"epoch": 0.6690363349131122,
"grad_norm": 12.832395817425043,
"learning_rate": 9.548333515971587e-06,
"loss": 2.761075496673584,
"step": 847
},
{
"epoch": 0.669826224328594,
"grad_norm": 10.20576917282644,
"learning_rate": 9.546422838482322e-06,
"loss": 2.5824503898620605,
"step": 848
},
{
"epoch": 0.6706161137440758,
"grad_norm": 6.880173889228289,
"learning_rate": 9.54450832016007e-06,
"loss": 2.5947561264038086,
"step": 849
},
{
"epoch": 0.6714060031595577,
"grad_norm": 14.836403016663432,
"learning_rate": 9.542589962622225e-06,
"loss": 2.1935033798217773,
"step": 850
},
{
"epoch": 0.6721958925750395,
"grad_norm": 6.943720267173772,
"learning_rate": 9.540667767489421e-06,
"loss": 2.6050100326538086,
"step": 851
},
{
"epoch": 0.6729857819905213,
"grad_norm": 15.12169878975551,
"learning_rate": 9.538741736385534e-06,
"loss": 3.6529133319854736,
"step": 852
},
{
"epoch": 0.6737756714060031,
"grad_norm": 5.867087588241686,
"learning_rate": 9.536811870937684e-06,
"loss": 2.063253164291382,
"step": 853
},
{
"epoch": 0.674565560821485,
"grad_norm": 7.798417844400532,
"learning_rate": 9.534878172776224e-06,
"loss": 2.7908072471618652,
"step": 854
},
{
"epoch": 0.6753554502369669,
"grad_norm": 11.818473782559336,
"learning_rate": 9.532940643534751e-06,
"loss": 2.4319844245910645,
"step": 855
},
{
"epoch": 0.6761453396524486,
"grad_norm": 12.80740078253414,
"learning_rate": 9.530999284850095e-06,
"loss": 3.1545660495758057,
"step": 856
},
{
"epoch": 0.6769352290679305,
"grad_norm": 9.292936008984638,
"learning_rate": 9.529054098362322e-06,
"loss": 2.947558641433716,
"step": 857
},
{
"epoch": 0.6777251184834123,
"grad_norm": 16.62193321855355,
"learning_rate": 9.527105085714734e-06,
"loss": 2.610852003097534,
"step": 858
},
{
"epoch": 0.6785150078988942,
"grad_norm": 14.554767788526535,
"learning_rate": 9.525152248553862e-06,
"loss": 2.979235887527466,
"step": 859
},
{
"epoch": 0.6793048973143759,
"grad_norm": 11.910814876723402,
"learning_rate": 9.523195588529468e-06,
"loss": 2.6078577041625977,
"step": 860
},
{
"epoch": 0.6800947867298578,
"grad_norm": 8.286047769780788,
"learning_rate": 9.521235107294548e-06,
"loss": 2.068547010421753,
"step": 861
},
{
"epoch": 0.6808846761453397,
"grad_norm": 10.506290416192853,
"learning_rate": 9.51927080650532e-06,
"loss": 2.794530153274536,
"step": 862
},
{
"epoch": 0.6816745655608215,
"grad_norm": 11.391589488737578,
"learning_rate": 9.517302687821231e-06,
"loss": 2.5470008850097656,
"step": 863
},
{
"epoch": 0.6824644549763034,
"grad_norm": 7.668217055524585,
"learning_rate": 9.515330752904956e-06,
"loss": 2.6968884468078613,
"step": 864
},
{
"epoch": 0.6832543443917851,
"grad_norm": 8.551258441901858,
"learning_rate": 9.513355003422396e-06,
"loss": 2.8228256702423096,
"step": 865
},
{
"epoch": 0.684044233807267,
"grad_norm": 11.671477572882841,
"learning_rate": 9.511375441042663e-06,
"loss": 3.5630812644958496,
"step": 866
},
{
"epoch": 0.6848341232227488,
"grad_norm": 15.900567407044479,
"learning_rate": 9.5093920674381e-06,
"loss": 2.9535064697265625,
"step": 867
},
{
"epoch": 0.6856240126382307,
"grad_norm": 8.643390525668298,
"learning_rate": 9.507404884284273e-06,
"loss": 2.741084337234497,
"step": 868
},
{
"epoch": 0.6864139020537124,
"grad_norm": 10.692778433484234,
"learning_rate": 9.505413893259956e-06,
"loss": 3.3046531677246094,
"step": 869
},
{
"epoch": 0.6872037914691943,
"grad_norm": 16.535562300261393,
"learning_rate": 9.503419096047144e-06,
"loss": 3.2300820350646973,
"step": 870
},
{
"epoch": 0.6879936808846762,
"grad_norm": 24.170107530956294,
"learning_rate": 9.501420494331052e-06,
"loss": 2.438554048538208,
"step": 871
},
{
"epoch": 0.688783570300158,
"grad_norm": 8.690027829520277,
"learning_rate": 9.499418089800102e-06,
"loss": 2.4033608436584473,
"step": 872
},
{
"epoch": 0.6895734597156398,
"grad_norm": 9.282954177762111,
"learning_rate": 9.497411884145933e-06,
"loss": 2.7961714267730713,
"step": 873
},
{
"epoch": 0.6903633491311216,
"grad_norm": 8.534548336142196,
"learning_rate": 9.495401879063395e-06,
"loss": 2.739697217941284,
"step": 874
},
{
"epoch": 0.6911532385466035,
"grad_norm": 14.710104471833855,
"learning_rate": 9.493388076250546e-06,
"loss": 3.3953442573547363,
"step": 875
},
{
"epoch": 0.6919431279620853,
"grad_norm": 10.793976678843643,
"learning_rate": 9.491370477408655e-06,
"loss": 3.082679271697998,
"step": 876
},
{
"epoch": 0.6927330173775671,
"grad_norm": 11.424816104504234,
"learning_rate": 9.489349084242192e-06,
"loss": 2.755612850189209,
"step": 877
},
{
"epoch": 0.693522906793049,
"grad_norm": 7.915901709561951,
"learning_rate": 9.487323898458841e-06,
"loss": 2.766568660736084,
"step": 878
},
{
"epoch": 0.6943127962085308,
"grad_norm": 17.848385230595895,
"learning_rate": 9.485294921769484e-06,
"loss": 2.5398964881896973,
"step": 879
},
{
"epoch": 0.6951026856240127,
"grad_norm": 29.674496967139927,
"learning_rate": 9.483262155888207e-06,
"loss": 3.093564987182617,
"step": 880
},
{
"epoch": 0.6958925750394944,
"grad_norm": 14.90079966406828,
"learning_rate": 9.481225602532296e-06,
"loss": 2.9575257301330566,
"step": 881
},
{
"epoch": 0.6966824644549763,
"grad_norm": 26.431365807353334,
"learning_rate": 9.47918526342224e-06,
"loss": 3.3621833324432373,
"step": 882
},
{
"epoch": 0.6974723538704581,
"grad_norm": 18.17594227056942,
"learning_rate": 9.477141140281724e-06,
"loss": 2.7552647590637207,
"step": 883
},
{
"epoch": 0.69826224328594,
"grad_norm": 9.17468592273275,
"learning_rate": 9.475093234837629e-06,
"loss": 2.6432392597198486,
"step": 884
},
{
"epoch": 0.6990521327014217,
"grad_norm": 11.724571460894934,
"learning_rate": 9.473041548820034e-06,
"loss": 2.863342523574829,
"step": 885
},
{
"epoch": 0.6998420221169036,
"grad_norm": 13.647582206639745,
"learning_rate": 9.470986083962208e-06,
"loss": 3.1229562759399414,
"step": 886
},
{
"epoch": 0.7006319115323855,
"grad_norm": 10.904081018729473,
"learning_rate": 9.468926842000614e-06,
"loss": 2.8623602390289307,
"step": 887
},
{
"epoch": 0.7014218009478673,
"grad_norm": 14.77540518637624,
"learning_rate": 9.46686382467491e-06,
"loss": 2.9971213340759277,
"step": 888
},
{
"epoch": 0.7022116903633492,
"grad_norm": 17.160481522672626,
"learning_rate": 9.464797033727937e-06,
"loss": 2.8732876777648926,
"step": 889
},
{
"epoch": 0.7030015797788309,
"grad_norm": 7.662847037910413,
"learning_rate": 9.462726470905727e-06,
"loss": 2.4884605407714844,
"step": 890
},
{
"epoch": 0.7037914691943128,
"grad_norm": 15.496194298757855,
"learning_rate": 9.460652137957497e-06,
"loss": 2.5895493030548096,
"step": 891
},
{
"epoch": 0.7045813586097947,
"grad_norm": 9.210243074506902,
"learning_rate": 9.458574036635656e-06,
"loss": 3.070889472961426,
"step": 892
},
{
"epoch": 0.7053712480252765,
"grad_norm": 13.22477817543685,
"learning_rate": 9.456492168695783e-06,
"loss": 2.6131277084350586,
"step": 893
},
{
"epoch": 0.7061611374407583,
"grad_norm": 10.736756209485726,
"learning_rate": 9.454406535896653e-06,
"loss": 2.7342894077301025,
"step": 894
},
{
"epoch": 0.7069510268562401,
"grad_norm": 15.649909827229424,
"learning_rate": 9.452317140000213e-06,
"loss": 2.709885835647583,
"step": 895
},
{
"epoch": 0.707740916271722,
"grad_norm": 17.03495887885535,
"learning_rate": 9.45022398277159e-06,
"loss": 2.229793071746826,
"step": 896
},
{
"epoch": 0.7085308056872038,
"grad_norm": 18.286957303019204,
"learning_rate": 9.448127065979093e-06,
"loss": 2.3719115257263184,
"step": 897
},
{
"epoch": 0.7093206951026856,
"grad_norm": 9.674060014502675,
"learning_rate": 9.446026391394203e-06,
"loss": 3.1232872009277344,
"step": 898
},
{
"epoch": 0.7101105845181674,
"grad_norm": 8.294415989977118,
"learning_rate": 9.443921960791578e-06,
"loss": 2.2887797355651855,
"step": 899
},
{
"epoch": 0.7109004739336493,
"grad_norm": 10.7843258557463,
"learning_rate": 9.441813775949045e-06,
"loss": 2.947249174118042,
"step": 900
},
{
"epoch": 0.7116903633491312,
"grad_norm": 55.81560616750336,
"learning_rate": 9.439701838647607e-06,
"loss": 2.6564688682556152,
"step": 901
},
{
"epoch": 0.7124802527646129,
"grad_norm": 15.475637484457012,
"learning_rate": 9.437586150671438e-06,
"loss": 3.2652010917663574,
"step": 902
},
{
"epoch": 0.7132701421800948,
"grad_norm": 12.6346062418189,
"learning_rate": 9.435466713807875e-06,
"loss": 3.212409257888794,
"step": 903
},
{
"epoch": 0.7140600315955766,
"grad_norm": 12.653119224973862,
"learning_rate": 9.433343529847426e-06,
"loss": 2.8347318172454834,
"step": 904
},
{
"epoch": 0.7148499210110585,
"grad_norm": 8.174222426024595,
"learning_rate": 9.431216600583764e-06,
"loss": 2.7938289642333984,
"step": 905
},
{
"epoch": 0.7156398104265402,
"grad_norm": 10.498223049643716,
"learning_rate": 9.429085927813725e-06,
"loss": 2.8059895038604736,
"step": 906
},
{
"epoch": 0.7164296998420221,
"grad_norm": 11.610537771337127,
"learning_rate": 9.42695151333731e-06,
"loss": 2.670276403427124,
"step": 907
},
{
"epoch": 0.717219589257504,
"grad_norm": 15.926885320452712,
"learning_rate": 9.424813358957678e-06,
"loss": 2.8029661178588867,
"step": 908
},
{
"epoch": 0.7180094786729858,
"grad_norm": 12.370001760911942,
"learning_rate": 9.42267146648115e-06,
"loss": 2.344736099243164,
"step": 909
},
{
"epoch": 0.7187993680884676,
"grad_norm": 15.272394441293677,
"learning_rate": 9.420525837717205e-06,
"loss": 1.8855293989181519,
"step": 910
},
{
"epoch": 0.7195892575039494,
"grad_norm": 10.81989245618176,
"learning_rate": 9.418376474478474e-06,
"loss": 2.810041666030884,
"step": 911
},
{
"epoch": 0.7203791469194313,
"grad_norm": 17.67649470148584,
"learning_rate": 9.416223378580747e-06,
"loss": 2.526409864425659,
"step": 912
},
{
"epoch": 0.7211690363349131,
"grad_norm": 10.055598633901095,
"learning_rate": 9.414066551842969e-06,
"loss": 2.868654489517212,
"step": 913
},
{
"epoch": 0.721958925750395,
"grad_norm": 9.513800374151746,
"learning_rate": 9.41190599608723e-06,
"loss": 2.928063154220581,
"step": 914
},
{
"epoch": 0.7227488151658767,
"grad_norm": 13.871125687274514,
"learning_rate": 9.40974171313878e-06,
"loss": 2.4988300800323486,
"step": 915
},
{
"epoch": 0.7235387045813586,
"grad_norm": 11.896211423240262,
"learning_rate": 9.407573704826008e-06,
"loss": 1.8240364789962769,
"step": 916
},
{
"epoch": 0.7243285939968405,
"grad_norm": 9.077450079878284,
"learning_rate": 9.405401972980457e-06,
"loss": 2.8183727264404297,
"step": 917
},
{
"epoch": 0.7251184834123223,
"grad_norm": 13.826245036289404,
"learning_rate": 9.40322651943681e-06,
"loss": 2.8091042041778564,
"step": 918
},
{
"epoch": 0.7259083728278041,
"grad_norm": 7.372371185613573,
"learning_rate": 9.4010473460329e-06,
"loss": 2.093374252319336,
"step": 919
},
{
"epoch": 0.7266982622432859,
"grad_norm": 8.058209243668498,
"learning_rate": 9.398864454609702e-06,
"loss": 2.278440475463867,
"step": 920
},
{
"epoch": 0.7274881516587678,
"grad_norm": 12.147448734615645,
"learning_rate": 9.396677847011326e-06,
"loss": 2.403252601623535,
"step": 921
},
{
"epoch": 0.7282780410742496,
"grad_norm": 8.711345300212125,
"learning_rate": 9.394487525085027e-06,
"loss": 2.735347270965576,
"step": 922
},
{
"epoch": 0.7290679304897314,
"grad_norm": 8.80572208933033,
"learning_rate": 9.392293490681195e-06,
"loss": 2.715076446533203,
"step": 923
},
{
"epoch": 0.7298578199052133,
"grad_norm": 7.247758343473638,
"learning_rate": 9.390095745653359e-06,
"loss": 2.7396597862243652,
"step": 924
},
{
"epoch": 0.7306477093206951,
"grad_norm": 7.245885716473276,
"learning_rate": 9.38789429185818e-06,
"loss": 2.7173843383789062,
"step": 925
},
{
"epoch": 0.731437598736177,
"grad_norm": 19.0606393622214,
"learning_rate": 9.385689131155456e-06,
"loss": 3.2145304679870605,
"step": 926
},
{
"epoch": 0.7322274881516587,
"grad_norm": 14.229401707613647,
"learning_rate": 9.383480265408109e-06,
"loss": 2.976992130279541,
"step": 927
},
{
"epoch": 0.7330173775671406,
"grad_norm": 11.037803882230573,
"learning_rate": 9.3812676964822e-06,
"loss": 3.361060619354248,
"step": 928
},
{
"epoch": 0.7338072669826224,
"grad_norm": 23.852840951499623,
"learning_rate": 9.379051426246914e-06,
"loss": 3.223222255706787,
"step": 929
},
{
"epoch": 0.7345971563981043,
"grad_norm": 14.223529634226185,
"learning_rate": 9.376831456574561e-06,
"loss": 2.8687520027160645,
"step": 930
},
{
"epoch": 0.735387045813586,
"grad_norm": 7.156756071444025,
"learning_rate": 9.374607789340584e-06,
"loss": 2.873199701309204,
"step": 931
},
{
"epoch": 0.7361769352290679,
"grad_norm": 7.616209133800571,
"learning_rate": 9.37238042642354e-06,
"loss": 3.0081300735473633,
"step": 932
},
{
"epoch": 0.7369668246445498,
"grad_norm": 7.149090801992643,
"learning_rate": 9.370149369705112e-06,
"loss": 2.981412887573242,
"step": 933
},
{
"epoch": 0.7377567140600316,
"grad_norm": 15.58505127158866,
"learning_rate": 9.367914621070107e-06,
"loss": 2.8132896423339844,
"step": 934
},
{
"epoch": 0.7385466034755134,
"grad_norm": 13.465860481670774,
"learning_rate": 9.365676182406446e-06,
"loss": 3.4976980686187744,
"step": 935
},
{
"epoch": 0.7393364928909952,
"grad_norm": 11.899120310195212,
"learning_rate": 9.36343405560517e-06,
"loss": 2.8405492305755615,
"step": 936
},
{
"epoch": 0.7401263823064771,
"grad_norm": 11.739242983649948,
"learning_rate": 9.361188242560436e-06,
"loss": 2.7775559425354004,
"step": 937
},
{
"epoch": 0.740916271721959,
"grad_norm": 10.611580913268543,
"learning_rate": 9.358938745169512e-06,
"loss": 2.7165842056274414,
"step": 938
},
{
"epoch": 0.7417061611374408,
"grad_norm": 16.286711299762892,
"learning_rate": 9.356685565332783e-06,
"loss": 2.8377950191497803,
"step": 939
},
{
"epoch": 0.7424960505529226,
"grad_norm": 10.06467306357769,
"learning_rate": 9.354428704953743e-06,
"loss": 2.605860471725464,
"step": 940
},
{
"epoch": 0.7432859399684044,
"grad_norm": 9.106574090331993,
"learning_rate": 9.352168165938992e-06,
"loss": 3.039595127105713,
"step": 941
},
{
"epoch": 0.7440758293838863,
"grad_norm": 13.910557878586525,
"learning_rate": 9.349903950198243e-06,
"loss": 3.1908493041992188,
"step": 942
},
{
"epoch": 0.7448657187993681,
"grad_norm": 15.785181835421662,
"learning_rate": 9.347636059644313e-06,
"loss": 3.6178295612335205,
"step": 943
},
{
"epoch": 0.7456556082148499,
"grad_norm": 13.627805304388191,
"learning_rate": 9.345364496193124e-06,
"loss": 2.30802059173584,
"step": 944
},
{
"epoch": 0.7464454976303317,
"grad_norm": 13.113283530630058,
"learning_rate": 9.343089261763698e-06,
"loss": 2.7968385219573975,
"step": 945
},
{
"epoch": 0.7472353870458136,
"grad_norm": 54.4044476972888,
"learning_rate": 9.340810358278163e-06,
"loss": 2.887650728225708,
"step": 946
},
{
"epoch": 0.7480252764612955,
"grad_norm": 12.476106430670102,
"learning_rate": 9.338527787661743e-06,
"loss": 2.8030970096588135,
"step": 947
},
{
"epoch": 0.7488151658767772,
"grad_norm": 11.33742883881337,
"learning_rate": 9.336241551842759e-06,
"loss": 2.91349196434021,
"step": 948
},
{
"epoch": 0.7496050552922591,
"grad_norm": 10.310783405801457,
"learning_rate": 9.333951652752636e-06,
"loss": 2.592141628265381,
"step": 949
},
{
"epoch": 0.7503949447077409,
"grad_norm": 19.674293659067043,
"learning_rate": 9.331658092325884e-06,
"loss": 3.334771156311035,
"step": 950
},
{
"epoch": 0.7511848341232228,
"grad_norm": 9.403755720421595,
"learning_rate": 9.32936087250011e-06,
"loss": 2.669703960418701,
"step": 951
},
{
"epoch": 0.7519747235387045,
"grad_norm": 7.772849025991002,
"learning_rate": 9.327059995216017e-06,
"loss": 2.772550344467163,
"step": 952
},
{
"epoch": 0.7527646129541864,
"grad_norm": 12.063324109253381,
"learning_rate": 9.32475546241739e-06,
"loss": 3.1131880283355713,
"step": 953
},
{
"epoch": 0.7535545023696683,
"grad_norm": 8.755986042063324,
"learning_rate": 9.322447276051106e-06,
"loss": 2.6408510208129883,
"step": 954
},
{
"epoch": 0.7543443917851501,
"grad_norm": 16.519428236399992,
"learning_rate": 9.32013543806713e-06,
"loss": 2.889667510986328,
"step": 955
},
{
"epoch": 0.7551342812006319,
"grad_norm": 10.18189972859802,
"learning_rate": 9.31781995041851e-06,
"loss": 1.998913049697876,
"step": 956
},
{
"epoch": 0.7559241706161137,
"grad_norm": 12.945059813106072,
"learning_rate": 9.315500815061378e-06,
"loss": 2.572543144226074,
"step": 957
},
{
"epoch": 0.7567140600315956,
"grad_norm": 7.760044557288449,
"learning_rate": 9.313178033954946e-06,
"loss": 2.8043367862701416,
"step": 958
},
{
"epoch": 0.7575039494470774,
"grad_norm": 12.169397261499922,
"learning_rate": 9.310851609061507e-06,
"loss": 2.6561851501464844,
"step": 959
},
{
"epoch": 0.7582938388625592,
"grad_norm": 8.032984175481454,
"learning_rate": 9.308521542346434e-06,
"loss": 3.0927743911743164,
"step": 960
},
{
"epoch": 0.759083728278041,
"grad_norm": 11.370911942807972,
"learning_rate": 9.306187835778173e-06,
"loss": 3.112912178039551,
"step": 961
},
{
"epoch": 0.7598736176935229,
"grad_norm": 30.68759001827329,
"learning_rate": 9.30385049132825e-06,
"loss": 2.32753324508667,
"step": 962
},
{
"epoch": 0.7606635071090048,
"grad_norm": 19.20736554439743,
"learning_rate": 9.301509510971259e-06,
"loss": 2.4722962379455566,
"step": 963
},
{
"epoch": 0.7614533965244866,
"grad_norm": 7.896910852894032,
"learning_rate": 9.299164896684867e-06,
"loss": 2.8172154426574707,
"step": 964
},
{
"epoch": 0.7622432859399684,
"grad_norm": 8.285847211519757,
"learning_rate": 9.296816650449813e-06,
"loss": 3.217062473297119,
"step": 965
},
{
"epoch": 0.7630331753554502,
"grad_norm": 14.348208287781882,
"learning_rate": 9.294464774249905e-06,
"loss": 3.099119186401367,
"step": 966
},
{
"epoch": 0.7638230647709321,
"grad_norm": 17.781084869379775,
"learning_rate": 9.292109270072013e-06,
"loss": 3.2744314670562744,
"step": 967
},
{
"epoch": 0.764612954186414,
"grad_norm": 8.411579984707192,
"learning_rate": 9.289750139906075e-06,
"loss": 3.0986344814300537,
"step": 968
},
{
"epoch": 0.7654028436018957,
"grad_norm": 12.640585889095618,
"learning_rate": 9.287387385745094e-06,
"loss": 2.9320476055145264,
"step": 969
},
{
"epoch": 0.7661927330173776,
"grad_norm": 18.268696929650034,
"learning_rate": 9.28502100958513e-06,
"loss": 1.89057457447052,
"step": 970
},
{
"epoch": 0.7669826224328594,
"grad_norm": 8.318894507210741,
"learning_rate": 9.282651013425309e-06,
"loss": 2.681485891342163,
"step": 971
},
{
"epoch": 0.7677725118483413,
"grad_norm": 12.92970042225882,
"learning_rate": 9.280277399267808e-06,
"loss": 3.2145707607269287,
"step": 972
},
{
"epoch": 0.768562401263823,
"grad_norm": 8.005504028109495,
"learning_rate": 9.277900169117864e-06,
"loss": 2.4123406410217285,
"step": 973
},
{
"epoch": 0.7693522906793049,
"grad_norm": 12.062434756711202,
"learning_rate": 9.27551932498377e-06,
"loss": 2.878951072692871,
"step": 974
},
{
"epoch": 0.7701421800947867,
"grad_norm": 9.094718919698682,
"learning_rate": 9.273134868876872e-06,
"loss": 2.949256420135498,
"step": 975
},
{
"epoch": 0.7709320695102686,
"grad_norm": 21.141805602331043,
"learning_rate": 9.270746802811566e-06,
"loss": 2.42653226852417,
"step": 976
},
{
"epoch": 0.7717219589257504,
"grad_norm": 14.843894359981512,
"learning_rate": 9.268355128805298e-06,
"loss": 2.678997039794922,
"step": 977
},
{
"epoch": 0.7725118483412322,
"grad_norm": 12.746073187269445,
"learning_rate": 9.265959848878558e-06,
"loss": 2.422623634338379,
"step": 978
},
{
"epoch": 0.7733017377567141,
"grad_norm": 11.40428902421513,
"learning_rate": 9.263560965054894e-06,
"loss": 2.154381036758423,
"step": 979
},
{
"epoch": 0.7740916271721959,
"grad_norm": 20.956261055506054,
"learning_rate": 9.261158479360884e-06,
"loss": 2.777848720550537,
"step": 980
},
{
"epoch": 0.7748815165876777,
"grad_norm": 9.70811631729052,
"learning_rate": 9.25875239382616e-06,
"loss": 2.6050872802734375,
"step": 981
},
{
"epoch": 0.7756714060031595,
"grad_norm": 10.241699921518476,
"learning_rate": 9.25634271048339e-06,
"loss": 2.080990791320801,
"step": 982
},
{
"epoch": 0.7764612954186414,
"grad_norm": 11.768131588715727,
"learning_rate": 9.253929431368282e-06,
"loss": 3.0087733268737793,
"step": 983
},
{
"epoch": 0.7772511848341233,
"grad_norm": 12.464298821154987,
"learning_rate": 9.251512558519582e-06,
"loss": 2.852950096130371,
"step": 984
},
{
"epoch": 0.778041074249605,
"grad_norm": 11.838439498108045,
"learning_rate": 9.24909209397907e-06,
"loss": 3.7374510765075684,
"step": 985
},
{
"epoch": 0.7788309636650869,
"grad_norm": 15.277389692233058,
"learning_rate": 9.246668039791568e-06,
"loss": 2.7718963623046875,
"step": 986
},
{
"epoch": 0.7796208530805687,
"grad_norm": 6.6742475596758535,
"learning_rate": 9.244240398004922e-06,
"loss": 2.6798787117004395,
"step": 987
},
{
"epoch": 0.7804107424960506,
"grad_norm": 10.32795974684294,
"learning_rate": 9.241809170670008e-06,
"loss": 2.7594618797302246,
"step": 988
},
{
"epoch": 0.7812006319115324,
"grad_norm": 9.018400283983219,
"learning_rate": 9.239374359840742e-06,
"loss": 3.114830493927002,
"step": 989
},
{
"epoch": 0.7819905213270142,
"grad_norm": 16.724516344511926,
"learning_rate": 9.236935967574054e-06,
"loss": 3.228322982788086,
"step": 990
},
{
"epoch": 0.782780410742496,
"grad_norm": 23.82315306020477,
"learning_rate": 9.234493995929912e-06,
"loss": 3.004939079284668,
"step": 991
},
{
"epoch": 0.7835703001579779,
"grad_norm": 13.99018159956102,
"learning_rate": 9.232048446971296e-06,
"loss": 2.6142897605895996,
"step": 992
},
{
"epoch": 0.7843601895734598,
"grad_norm": 10.608540414298071,
"learning_rate": 9.229599322764215e-06,
"loss": 2.6615846157073975,
"step": 993
},
{
"epoch": 0.7851500789889415,
"grad_norm": 23.10494541404644,
"learning_rate": 9.227146625377699e-06,
"loss": 2.831402063369751,
"step": 994
},
{
"epoch": 0.7859399684044234,
"grad_norm": 9.659599628220574,
"learning_rate": 9.224690356883793e-06,
"loss": 3.2252285480499268,
"step": 995
},
{
"epoch": 0.7867298578199052,
"grad_norm": 7.285958976275862,
"learning_rate": 9.222230519357562e-06,
"loss": 2.635441780090332,
"step": 996
},
{
"epoch": 0.7875197472353871,
"grad_norm": 7.160771261905654,
"learning_rate": 9.219767114877086e-06,
"loss": 2.6156837940216064,
"step": 997
},
{
"epoch": 0.7883096366508688,
"grad_norm": 14.57854463656351,
"learning_rate": 9.217300145523453e-06,
"loss": 2.979773998260498,
"step": 998
},
{
"epoch": 0.7890995260663507,
"grad_norm": 10.862479398287723,
"learning_rate": 9.214829613380772e-06,
"loss": 2.811668872833252,
"step": 999
},
{
"epoch": 0.7898894154818326,
"grad_norm": 14.273686758443457,
"learning_rate": 9.212355520536153e-06,
"loss": 3.670020818710327,
"step": 1000
},
{
"epoch": 0.7906793048973144,
"grad_norm": 7.748159510449536,
"learning_rate": 9.209877869079719e-06,
"loss": 2.974087953567505,
"step": 1001
},
{
"epoch": 0.7914691943127962,
"grad_norm": 7.596423622962509,
"learning_rate": 9.207396661104599e-06,
"loss": 2.0368399620056152,
"step": 1002
},
{
"epoch": 0.792259083728278,
"grad_norm": 15.086522167851689,
"learning_rate": 9.204911898706925e-06,
"loss": 3.096889019012451,
"step": 1003
},
{
"epoch": 0.7930489731437599,
"grad_norm": 11.981971765032,
"learning_rate": 9.202423583985832e-06,
"loss": 2.6723742485046387,
"step": 1004
},
{
"epoch": 0.7938388625592417,
"grad_norm": 8.646415889552525,
"learning_rate": 9.199931719043456e-06,
"loss": 2.6578660011291504,
"step": 1005
},
{
"epoch": 0.7946287519747235,
"grad_norm": 14.68042449542436,
"learning_rate": 9.197436305984933e-06,
"loss": 2.629140853881836,
"step": 1006
},
{
"epoch": 0.7954186413902053,
"grad_norm": 9.552656598953957,
"learning_rate": 9.194937346918398e-06,
"loss": 2.732150077819824,
"step": 1007
},
{
"epoch": 0.7962085308056872,
"grad_norm": 8.954639954969533,
"learning_rate": 9.192434843954977e-06,
"loss": 2.240567445755005,
"step": 1008
},
{
"epoch": 0.7969984202211691,
"grad_norm": 7.418598530709051,
"learning_rate": 9.189928799208794e-06,
"loss": 2.6821372509002686,
"step": 1009
},
{
"epoch": 0.7977883096366508,
"grad_norm": 6.460842787120627,
"learning_rate": 9.187419214796967e-06,
"loss": 2.697354793548584,
"step": 1010
},
{
"epoch": 0.7985781990521327,
"grad_norm": 6.9324406657677695,
"learning_rate": 9.184906092839596e-06,
"loss": 1.7275753021240234,
"step": 1011
},
{
"epoch": 0.7993680884676145,
"grad_norm": 15.212694250890982,
"learning_rate": 9.182389435459776e-06,
"loss": 2.717050075531006,
"step": 1012
},
{
"epoch": 0.8001579778830964,
"grad_norm": 8.870533933713203,
"learning_rate": 9.17986924478359e-06,
"loss": 2.5672261714935303,
"step": 1013
},
{
"epoch": 0.8009478672985783,
"grad_norm": 12.582683742658494,
"learning_rate": 9.177345522940102e-06,
"loss": 2.8706305027008057,
"step": 1014
},
{
"epoch": 0.80173775671406,
"grad_norm": 9.481753489579146,
"learning_rate": 9.174818272061358e-06,
"loss": 2.880066156387329,
"step": 1015
},
{
"epoch": 0.8025276461295419,
"grad_norm": 7.842796420054131,
"learning_rate": 9.172287494282393e-06,
"loss": 1.724432349205017,
"step": 1016
},
{
"epoch": 0.8033175355450237,
"grad_norm": 13.467226411075792,
"learning_rate": 9.169753191741211e-06,
"loss": 3.1031036376953125,
"step": 1017
},
{
"epoch": 0.8041074249605056,
"grad_norm": 9.642248663487798,
"learning_rate": 9.167215366578804e-06,
"loss": 2.266718626022339,
"step": 1018
},
{
"epoch": 0.8048973143759873,
"grad_norm": 9.565467328925202,
"learning_rate": 9.16467402093913e-06,
"loss": 2.8157501220703125,
"step": 1019
},
{
"epoch": 0.8056872037914692,
"grad_norm": 15.209367854706489,
"learning_rate": 9.162129156969131e-06,
"loss": 2.440033197402954,
"step": 1020
},
{
"epoch": 0.806477093206951,
"grad_norm": 9.68569819521668,
"learning_rate": 9.159580776818715e-06,
"loss": 2.4779491424560547,
"step": 1021
},
{
"epoch": 0.8072669826224329,
"grad_norm": 12.736707657945674,
"learning_rate": 9.15702888264076e-06,
"loss": 2.2091784477233887,
"step": 1022
},
{
"epoch": 0.8080568720379147,
"grad_norm": 13.514115226011919,
"learning_rate": 9.154473476591114e-06,
"loss": 3.1801180839538574,
"step": 1023
},
{
"epoch": 0.8088467614533965,
"grad_norm": 20.122470837459804,
"learning_rate": 9.151914560828598e-06,
"loss": 3.0873842239379883,
"step": 1024
},
{
"epoch": 0.8096366508688784,
"grad_norm": 13.480557474542609,
"learning_rate": 9.149352137514987e-06,
"loss": 2.603421449661255,
"step": 1025
},
{
"epoch": 0.8104265402843602,
"grad_norm": 21.95371373023252,
"learning_rate": 9.146786208815026e-06,
"loss": 2.6986594200134277,
"step": 1026
},
{
"epoch": 0.811216429699842,
"grad_norm": 38.65128008605009,
"learning_rate": 9.144216776896422e-06,
"loss": 2.905870199203491,
"step": 1027
},
{
"epoch": 0.8120063191153238,
"grad_norm": 10.540992057323065,
"learning_rate": 9.141643843929837e-06,
"loss": 2.4717659950256348,
"step": 1028
},
{
"epoch": 0.8127962085308057,
"grad_norm": 15.24208436151514,
"learning_rate": 9.139067412088895e-06,
"loss": 2.477531909942627,
"step": 1029
},
{
"epoch": 0.8135860979462876,
"grad_norm": 9.30424965654663,
"learning_rate": 9.136487483550172e-06,
"loss": 3.016301393508911,
"step": 1030
},
{
"epoch": 0.8143759873617693,
"grad_norm": 22.185168693739104,
"learning_rate": 9.1339040604932e-06,
"loss": 2.662216901779175,
"step": 1031
},
{
"epoch": 0.8151658767772512,
"grad_norm": 29.979197475408252,
"learning_rate": 9.131317145100469e-06,
"loss": 2.694211959838867,
"step": 1032
},
{
"epoch": 0.815955766192733,
"grad_norm": 11.06100764534907,
"learning_rate": 9.128726739557408e-06,
"loss": 2.5702898502349854,
"step": 1033
},
{
"epoch": 0.8167456556082149,
"grad_norm": 7.880841439580581,
"learning_rate": 9.126132846052401e-06,
"loss": 2.8700671195983887,
"step": 1034
},
{
"epoch": 0.8175355450236966,
"grad_norm": 27.058763319079304,
"learning_rate": 9.123535466776778e-06,
"loss": 3.244725227355957,
"step": 1035
},
{
"epoch": 0.8183254344391785,
"grad_norm": 16.382446856894965,
"learning_rate": 9.120934603924816e-06,
"loss": 2.4301857948303223,
"step": 1036
},
{
"epoch": 0.8191153238546603,
"grad_norm": 16.26715685664894,
"learning_rate": 9.118330259693728e-06,
"loss": 4.2284698486328125,
"step": 1037
},
{
"epoch": 0.8199052132701422,
"grad_norm": 21.996363546185417,
"learning_rate": 9.115722436283676e-06,
"loss": 3.5227627754211426,
"step": 1038
},
{
"epoch": 0.8206951026856241,
"grad_norm": 20.19577186521635,
"learning_rate": 9.113111135897757e-06,
"loss": 3.057605743408203,
"step": 1039
},
{
"epoch": 0.8214849921011058,
"grad_norm": 13.600963392770467,
"learning_rate": 9.110496360742006e-06,
"loss": 3.0911357402801514,
"step": 1040
},
{
"epoch": 0.8222748815165877,
"grad_norm": 12.336475393471844,
"learning_rate": 9.107878113025393e-06,
"loss": 3.0051560401916504,
"step": 1041
},
{
"epoch": 0.8230647709320695,
"grad_norm": 10.984835326798942,
"learning_rate": 9.105256394959822e-06,
"loss": 2.4400599002838135,
"step": 1042
},
{
"epoch": 0.8238546603475514,
"grad_norm": 20.947414774581414,
"learning_rate": 9.102631208760131e-06,
"loss": 3.4805350303649902,
"step": 1043
},
{
"epoch": 0.8246445497630331,
"grad_norm": 14.360309838396736,
"learning_rate": 9.100002556644086e-06,
"loss": 2.588095188140869,
"step": 1044
},
{
"epoch": 0.825434439178515,
"grad_norm": 10.20829475484016,
"learning_rate": 9.097370440832378e-06,
"loss": 2.78764271736145,
"step": 1045
},
{
"epoch": 0.8262243285939969,
"grad_norm": 10.885822678518133,
"learning_rate": 9.094734863548629e-06,
"loss": 2.8764867782592773,
"step": 1046
},
{
"epoch": 0.8270142180094787,
"grad_norm": 8.714569458458401,
"learning_rate": 9.092095827019385e-06,
"loss": 2.7947893142700195,
"step": 1047
},
{
"epoch": 0.8278041074249605,
"grad_norm": 16.531291726752407,
"learning_rate": 9.08945333347411e-06,
"loss": 3.4402239322662354,
"step": 1048
},
{
"epoch": 0.8285939968404423,
"grad_norm": 11.649383119473496,
"learning_rate": 9.086807385145193e-06,
"loss": 2.5275776386260986,
"step": 1049
},
{
"epoch": 0.8293838862559242,
"grad_norm": 37.04934005274269,
"learning_rate": 9.084157984267939e-06,
"loss": 3.1136865615844727,
"step": 1050
},
{
"epoch": 0.830173775671406,
"grad_norm": 27.375499924589366,
"learning_rate": 9.08150513308057e-06,
"loss": 2.692000389099121,
"step": 1051
},
{
"epoch": 0.8309636650868878,
"grad_norm": 15.165913666522817,
"learning_rate": 9.078848833824226e-06,
"loss": 3.2129108905792236,
"step": 1052
},
{
"epoch": 0.8317535545023697,
"grad_norm": 16.540229701542952,
"learning_rate": 9.076189088742955e-06,
"loss": 3.135190963745117,
"step": 1053
},
{
"epoch": 0.8325434439178515,
"grad_norm": 11.775957201988549,
"learning_rate": 9.073525900083717e-06,
"loss": 2.9450531005859375,
"step": 1054
},
{
"epoch": 0.8333333333333334,
"grad_norm": 28.18681618707373,
"learning_rate": 9.070859270096385e-06,
"loss": 3.7651073932647705,
"step": 1055
},
{
"epoch": 0.8341232227488151,
"grad_norm": 11.37393230420539,
"learning_rate": 9.06818920103374e-06,
"loss": 2.674816846847534,
"step": 1056
},
{
"epoch": 0.834913112164297,
"grad_norm": 5.773096989056217,
"learning_rate": 9.065515695151459e-06,
"loss": 2.8101024627685547,
"step": 1057
},
{
"epoch": 0.8357030015797788,
"grad_norm": 9.404139830590696,
"learning_rate": 9.06283875470813e-06,
"loss": 2.883345603942871,
"step": 1058
},
{
"epoch": 0.8364928909952607,
"grad_norm": 10.053270788353137,
"learning_rate": 9.060158381965242e-06,
"loss": 3.0177576541900635,
"step": 1059
},
{
"epoch": 0.8372827804107424,
"grad_norm": 9.56854149440088,
"learning_rate": 9.057474579187184e-06,
"loss": 2.5740466117858887,
"step": 1060
},
{
"epoch": 0.8380726698262243,
"grad_norm": 25.48696327806149,
"learning_rate": 9.05478734864124e-06,
"loss": 2.5997400283813477,
"step": 1061
},
{
"epoch": 0.8388625592417062,
"grad_norm": 8.822888785903398,
"learning_rate": 9.052096692597594e-06,
"loss": 2.480900287628174,
"step": 1062
},
{
"epoch": 0.839652448657188,
"grad_norm": 12.233243827649204,
"learning_rate": 9.049402613329316e-06,
"loss": 2.372171401977539,
"step": 1063
},
{
"epoch": 0.8404423380726699,
"grad_norm": 10.92795851598965,
"learning_rate": 9.046705113112375e-06,
"loss": 3.3010194301605225,
"step": 1064
},
{
"epoch": 0.8412322274881516,
"grad_norm": 14.04201976986867,
"learning_rate": 9.04400419422563e-06,
"loss": 1.9576343297958374,
"step": 1065
},
{
"epoch": 0.8420221169036335,
"grad_norm": 11.652105566428002,
"learning_rate": 9.041299858950824e-06,
"loss": 2.61598539352417,
"step": 1066
},
{
"epoch": 0.8428120063191153,
"grad_norm": 8.98594247433151,
"learning_rate": 9.03859210957259e-06,
"loss": 2.4816157817840576,
"step": 1067
},
{
"epoch": 0.8436018957345972,
"grad_norm": 15.298066202780824,
"learning_rate": 9.035880948378443e-06,
"loss": 2.8336338996887207,
"step": 1068
},
{
"epoch": 0.844391785150079,
"grad_norm": 10.357852449251151,
"learning_rate": 9.03316637765878e-06,
"loss": 2.5921220779418945,
"step": 1069
},
{
"epoch": 0.8451816745655608,
"grad_norm": 8.480809498263813,
"learning_rate": 9.030448399706881e-06,
"loss": 1.7483251094818115,
"step": 1070
},
{
"epoch": 0.8459715639810427,
"grad_norm": 27.723578315488755,
"learning_rate": 9.0277270168189e-06,
"loss": 2.880528688430786,
"step": 1071
},
{
"epoch": 0.8467614533965245,
"grad_norm": 6.917974588538861,
"learning_rate": 9.025002231293874e-06,
"loss": 2.048827648162842,
"step": 1072
},
{
"epoch": 0.8475513428120063,
"grad_norm": 12.11627041441912,
"learning_rate": 9.022274045433706e-06,
"loss": 2.47039794921875,
"step": 1073
},
{
"epoch": 0.8483412322274881,
"grad_norm": 44.924170557742606,
"learning_rate": 9.019542461543181e-06,
"loss": 2.97735857963562,
"step": 1074
},
{
"epoch": 0.84913112164297,
"grad_norm": 11.486390013948144,
"learning_rate": 9.016807481929948e-06,
"loss": 3.1126694679260254,
"step": 1075
},
{
"epoch": 0.8499210110584519,
"grad_norm": 6.894815895299225,
"learning_rate": 9.014069108904526e-06,
"loss": 2.364047050476074,
"step": 1076
},
{
"epoch": 0.8507109004739336,
"grad_norm": 23.140984836947318,
"learning_rate": 9.011327344780306e-06,
"loss": 2.1283740997314453,
"step": 1077
},
{
"epoch": 0.8515007898894155,
"grad_norm": 10.70156784462954,
"learning_rate": 9.008582191873531e-06,
"loss": 2.7940797805786133,
"step": 1078
},
{
"epoch": 0.8522906793048973,
"grad_norm": 7.795231096778107,
"learning_rate": 9.005833652503323e-06,
"loss": 2.638899803161621,
"step": 1079
},
{
"epoch": 0.8530805687203792,
"grad_norm": 8.536488429058528,
"learning_rate": 9.003081728991654e-06,
"loss": 2.5842251777648926,
"step": 1080
},
{
"epoch": 0.8538704581358609,
"grad_norm": 12.351330943815283,
"learning_rate": 9.000326423663356e-06,
"loss": 2.5075812339782715,
"step": 1081
},
{
"epoch": 0.8546603475513428,
"grad_norm": 7.148232698465105,
"learning_rate": 8.997567738846126e-06,
"loss": 2.6017603874206543,
"step": 1082
},
{
"epoch": 0.8554502369668247,
"grad_norm": 7.110606099151946,
"learning_rate": 8.994805676870504e-06,
"loss": 2.1451048851013184,
"step": 1083
},
{
"epoch": 0.8562401263823065,
"grad_norm": 7.011285288703447,
"learning_rate": 8.992040240069892e-06,
"loss": 2.408576488494873,
"step": 1084
},
{
"epoch": 0.8570300157977883,
"grad_norm": 6.646872041848021,
"learning_rate": 8.98927143078054e-06,
"loss": 2.678819179534912,
"step": 1085
},
{
"epoch": 0.8578199052132701,
"grad_norm": 14.417901398540348,
"learning_rate": 8.986499251341545e-06,
"loss": 2.4109766483306885,
"step": 1086
},
{
"epoch": 0.858609794628752,
"grad_norm": 18.976709689582428,
"learning_rate": 8.983723704094856e-06,
"loss": 2.79660701751709,
"step": 1087
},
{
"epoch": 0.8593996840442338,
"grad_norm": 13.97392247737316,
"learning_rate": 8.980944791385262e-06,
"loss": 2.5753228664398193,
"step": 1088
},
{
"epoch": 0.8601895734597157,
"grad_norm": 10.646221312923302,
"learning_rate": 8.9781625155604e-06,
"loss": 3.145460367202759,
"step": 1089
},
{
"epoch": 0.8609794628751974,
"grad_norm": 12.125091970169855,
"learning_rate": 8.975376878970744e-06,
"loss": 3.016714334487915,
"step": 1090
},
{
"epoch": 0.8617693522906793,
"grad_norm": 10.247379269546157,
"learning_rate": 8.972587883969612e-06,
"loss": 3.0661256313323975,
"step": 1091
},
{
"epoch": 0.8625592417061612,
"grad_norm": 7.510993195440685,
"learning_rate": 8.969795532913152e-06,
"loss": 3.1030752658843994,
"step": 1092
},
{
"epoch": 0.863349131121643,
"grad_norm": 11.085936006300932,
"learning_rate": 8.966999828160355e-06,
"loss": 2.8964810371398926,
"step": 1093
},
{
"epoch": 0.8641390205371248,
"grad_norm": 10.635255649754786,
"learning_rate": 8.96420077207304e-06,
"loss": 2.5778634548187256,
"step": 1094
},
{
"epoch": 0.8649289099526066,
"grad_norm": 11.699323935123369,
"learning_rate": 8.961398367015857e-06,
"loss": 2.6623075008392334,
"step": 1095
},
{
"epoch": 0.8657187993680885,
"grad_norm": 17.05618719334767,
"learning_rate": 8.95859261535629e-06,
"loss": 3.076087474822998,
"step": 1096
},
{
"epoch": 0.8665086887835703,
"grad_norm": 9.989617355430017,
"learning_rate": 8.955783519464644e-06,
"loss": 2.579759120941162,
"step": 1097
},
{
"epoch": 0.8672985781990521,
"grad_norm": 13.983057400258621,
"learning_rate": 8.952971081714056e-06,
"loss": 2.9468941688537598,
"step": 1098
},
{
"epoch": 0.868088467614534,
"grad_norm": 11.807806126259434,
"learning_rate": 8.950155304480482e-06,
"loss": 2.851876974105835,
"step": 1099
},
{
"epoch": 0.8688783570300158,
"grad_norm": 9.121474991816212,
"learning_rate": 8.947336190142696e-06,
"loss": 2.568575382232666,
"step": 1100
},
{
"epoch": 0.8696682464454977,
"grad_norm": 25.714257060738078,
"learning_rate": 8.9445137410823e-06,
"loss": 4.189000129699707,
"step": 1101
},
{
"epoch": 0.8704581358609794,
"grad_norm": 9.883995956029263,
"learning_rate": 8.941687959683707e-06,
"loss": 2.751539707183838,
"step": 1102
},
{
"epoch": 0.8712480252764613,
"grad_norm": 25.188255479459396,
"learning_rate": 8.938858848334144e-06,
"loss": 3.325855255126953,
"step": 1103
},
{
"epoch": 0.8720379146919431,
"grad_norm": 13.67010382519282,
"learning_rate": 8.936026409423656e-06,
"loss": 2.7301278114318848,
"step": 1104
},
{
"epoch": 0.872827804107425,
"grad_norm": 11.48889119309666,
"learning_rate": 8.933190645345096e-06,
"loss": 2.7599031925201416,
"step": 1105
},
{
"epoch": 0.8736176935229067,
"grad_norm": 14.87256495310459,
"learning_rate": 8.930351558494128e-06,
"loss": 3.0885415077209473,
"step": 1106
},
{
"epoch": 0.8744075829383886,
"grad_norm": 10.449701555194096,
"learning_rate": 8.92750915126922e-06,
"loss": 2.876091718673706,
"step": 1107
},
{
"epoch": 0.8751974723538705,
"grad_norm": 14.84773136100038,
"learning_rate": 8.924663426071647e-06,
"loss": 3.0341625213623047,
"step": 1108
},
{
"epoch": 0.8759873617693523,
"grad_norm": 8.15947775366893,
"learning_rate": 8.921814385305489e-06,
"loss": 2.277728796005249,
"step": 1109
},
{
"epoch": 0.8767772511848341,
"grad_norm": 12.99890714367993,
"learning_rate": 8.918962031377622e-06,
"loss": 2.8230514526367188,
"step": 1110
},
{
"epoch": 0.8775671406003159,
"grad_norm": 9.545433136406718,
"learning_rate": 8.916106366697728e-06,
"loss": 2.9621667861938477,
"step": 1111
},
{
"epoch": 0.8783570300157978,
"grad_norm": 10.39494016316492,
"learning_rate": 8.913247393678278e-06,
"loss": 2.3225202560424805,
"step": 1112
},
{
"epoch": 0.8791469194312796,
"grad_norm": 8.192269609832815,
"learning_rate": 8.910385114734544e-06,
"loss": 2.7106945514678955,
"step": 1113
},
{
"epoch": 0.8799368088467614,
"grad_norm": 10.432502214764677,
"learning_rate": 8.907519532284589e-06,
"loss": 2.4334917068481445,
"step": 1114
},
{
"epoch": 0.8807266982622433,
"grad_norm": 12.431139105375072,
"learning_rate": 8.904650648749264e-06,
"loss": 2.7972915172576904,
"step": 1115
},
{
"epoch": 0.8815165876777251,
"grad_norm": 9.209050505947106,
"learning_rate": 8.901778466552215e-06,
"loss": 2.729956865310669,
"step": 1116
},
{
"epoch": 0.882306477093207,
"grad_norm": 34.636350949112945,
"learning_rate": 8.898902988119869e-06,
"loss": 4.157067775726318,
"step": 1117
},
{
"epoch": 0.8830963665086888,
"grad_norm": 10.426826060433754,
"learning_rate": 8.896024215881439e-06,
"loss": 2.5612125396728516,
"step": 1118
},
{
"epoch": 0.8838862559241706,
"grad_norm": 15.903953548700464,
"learning_rate": 8.89314215226892e-06,
"loss": 2.477797031402588,
"step": 1119
},
{
"epoch": 0.8846761453396524,
"grad_norm": 14.039357640579942,
"learning_rate": 8.890256799717092e-06,
"loss": 2.5441317558288574,
"step": 1120
},
{
"epoch": 0.8854660347551343,
"grad_norm": 12.158639837027732,
"learning_rate": 8.88736816066351e-06,
"loss": 2.358978748321533,
"step": 1121
},
{
"epoch": 0.8862559241706162,
"grad_norm": 23.052840320992853,
"learning_rate": 8.884476237548503e-06,
"loss": 2.7650527954101562,
"step": 1122
},
{
"epoch": 0.8870458135860979,
"grad_norm": 8.158346405347508,
"learning_rate": 8.88158103281518e-06,
"loss": 2.303537368774414,
"step": 1123
},
{
"epoch": 0.8878357030015798,
"grad_norm": 20.354107714614678,
"learning_rate": 8.87868254890942e-06,
"loss": 2.8699512481689453,
"step": 1124
},
{
"epoch": 0.8886255924170616,
"grad_norm": 12.005895316989173,
"learning_rate": 8.875780788279868e-06,
"loss": 2.5772287845611572,
"step": 1125
},
{
"epoch": 0.8894154818325435,
"grad_norm": 12.654751625675802,
"learning_rate": 8.872875753377943e-06,
"loss": 2.408010244369507,
"step": 1126
},
{
"epoch": 0.8902053712480252,
"grad_norm": 11.36505652800493,
"learning_rate": 8.86996744665783e-06,
"loss": 2.7588155269622803,
"step": 1127
},
{
"epoch": 0.8909952606635071,
"grad_norm": 10.687132877981576,
"learning_rate": 8.867055870576474e-06,
"loss": 2.394656181335449,
"step": 1128
},
{
"epoch": 0.891785150078989,
"grad_norm": 16.83799896924904,
"learning_rate": 8.864141027593585e-06,
"loss": 2.3354270458221436,
"step": 1129
},
{
"epoch": 0.8925750394944708,
"grad_norm": 13.157340664207496,
"learning_rate": 8.86122292017163e-06,
"loss": 3.017643690109253,
"step": 1130
},
{
"epoch": 0.8933649289099526,
"grad_norm": 11.447127195057677,
"learning_rate": 8.858301550775836e-06,
"loss": 2.929154396057129,
"step": 1131
},
{
"epoch": 0.8941548183254344,
"grad_norm": 9.601561380597447,
"learning_rate": 8.855376921874188e-06,
"loss": 2.9352359771728516,
"step": 1132
},
{
"epoch": 0.8949447077409163,
"grad_norm": 12.643220814871029,
"learning_rate": 8.85244903593742e-06,
"loss": 2.5534372329711914,
"step": 1133
},
{
"epoch": 0.8957345971563981,
"grad_norm": 13.712786164898073,
"learning_rate": 8.849517895439022e-06,
"loss": 2.969341516494751,
"step": 1134
},
{
"epoch": 0.8965244865718799,
"grad_norm": 27.475418365680294,
"learning_rate": 8.846583502855229e-06,
"loss": 2.770808696746826,
"step": 1135
},
{
"epoch": 0.8973143759873617,
"grad_norm": 19.579432202817973,
"learning_rate": 8.843645860665024e-06,
"loss": 2.384641408920288,
"step": 1136
},
{
"epoch": 0.8981042654028436,
"grad_norm": 19.78607010018951,
"learning_rate": 8.84070497135014e-06,
"loss": 2.5281355381011963,
"step": 1137
},
{
"epoch": 0.8988941548183255,
"grad_norm": 14.094462449831422,
"learning_rate": 8.83776083739505e-06,
"loss": 2.215435028076172,
"step": 1138
},
{
"epoch": 0.8996840442338072,
"grad_norm": 17.933215742543844,
"learning_rate": 8.834813461286965e-06,
"loss": 2.291853904724121,
"step": 1139
},
{
"epoch": 0.9004739336492891,
"grad_norm": 8.843018958262709,
"learning_rate": 8.831862845515842e-06,
"loss": 2.638589382171631,
"step": 1140
},
{
"epoch": 0.9012638230647709,
"grad_norm": 9.967486938214911,
"learning_rate": 8.828908992574366e-06,
"loss": 2.7979438304901123,
"step": 1141
},
{
"epoch": 0.9020537124802528,
"grad_norm": 13.195252873575834,
"learning_rate": 8.825951904957967e-06,
"loss": 3.2847375869750977,
"step": 1142
},
{
"epoch": 0.9028436018957346,
"grad_norm": 8.059436353539473,
"learning_rate": 8.822991585164799e-06,
"loss": 3.350722312927246,
"step": 1143
},
{
"epoch": 0.9036334913112164,
"grad_norm": 8.411890662561238,
"learning_rate": 8.82002803569575e-06,
"loss": 2.7170395851135254,
"step": 1144
},
{
"epoch": 0.9044233807266983,
"grad_norm": 6.173660264218937,
"learning_rate": 8.81706125905444e-06,
"loss": 2.3877620697021484,
"step": 1145
},
{
"epoch": 0.9052132701421801,
"grad_norm": 7.345392187325678,
"learning_rate": 8.814091257747206e-06,
"loss": 2.355260133743286,
"step": 1146
},
{
"epoch": 0.906003159557662,
"grad_norm": 18.230032497512713,
"learning_rate": 8.811118034283118e-06,
"loss": 2.5785627365112305,
"step": 1147
},
{
"epoch": 0.9067930489731437,
"grad_norm": 69.44999711709127,
"learning_rate": 8.808141591173966e-06,
"loss": 2.761636734008789,
"step": 1148
},
{
"epoch": 0.9075829383886256,
"grad_norm": 12.183701745113998,
"learning_rate": 8.805161930934256e-06,
"loss": 2.5530524253845215,
"step": 1149
},
{
"epoch": 0.9083728278041074,
"grad_norm": 7.936573557522932,
"learning_rate": 8.802179056081217e-06,
"loss": 2.907682418823242,
"step": 1150
},
{
"epoch": 0.9091627172195893,
"grad_norm": 10.173205195310263,
"learning_rate": 8.799192969134792e-06,
"loss": 2.7638745307922363,
"step": 1151
},
{
"epoch": 0.909952606635071,
"grad_norm": 11.128588471862935,
"learning_rate": 8.796203672617634e-06,
"loss": 2.4181337356567383,
"step": 1152
},
{
"epoch": 0.9107424960505529,
"grad_norm": 9.655844045925859,
"learning_rate": 8.793211169055114e-06,
"loss": 2.773463249206543,
"step": 1153
},
{
"epoch": 0.9115323854660348,
"grad_norm": 11.03401768931024,
"learning_rate": 8.790215460975307e-06,
"loss": 2.9100022315979004,
"step": 1154
},
{
"epoch": 0.9123222748815166,
"grad_norm": 7.123266399146448,
"learning_rate": 8.787216550908997e-06,
"loss": 2.4781482219696045,
"step": 1155
},
{
"epoch": 0.9131121642969984,
"grad_norm": 17.622221819502133,
"learning_rate": 8.784214441389675e-06,
"loss": 2.452256202697754,
"step": 1156
},
{
"epoch": 0.9139020537124802,
"grad_norm": 13.113971999138425,
"learning_rate": 8.78120913495353e-06,
"loss": 2.896636486053467,
"step": 1157
},
{
"epoch": 0.9146919431279621,
"grad_norm": 9.000975431480043,
"learning_rate": 8.778200634139456e-06,
"loss": 2.7890336513519287,
"step": 1158
},
{
"epoch": 0.915481832543444,
"grad_norm": 7.481715343714205,
"learning_rate": 8.775188941489046e-06,
"loss": 2.496453046798706,
"step": 1159
},
{
"epoch": 0.9162717219589257,
"grad_norm": 10.923526680550703,
"learning_rate": 8.772174059546587e-06,
"loss": 2.843217372894287,
"step": 1160
},
{
"epoch": 0.9170616113744076,
"grad_norm": 11.10478528656359,
"learning_rate": 8.76915599085906e-06,
"loss": 2.435269355773926,
"step": 1161
},
{
"epoch": 0.9178515007898894,
"grad_norm": 6.772268942857849,
"learning_rate": 8.76613473797614e-06,
"loss": 2.2593870162963867,
"step": 1162
},
{
"epoch": 0.9186413902053713,
"grad_norm": 9.493749507534623,
"learning_rate": 8.76311030345019e-06,
"loss": 2.758202075958252,
"step": 1163
},
{
"epoch": 0.919431279620853,
"grad_norm": 14.911631831896822,
"learning_rate": 8.760082689836267e-06,
"loss": 3.124772071838379,
"step": 1164
},
{
"epoch": 0.9202211690363349,
"grad_norm": 12.853328106271473,
"learning_rate": 8.757051899692104e-06,
"loss": 2.7403624057769775,
"step": 1165
},
{
"epoch": 0.9210110584518167,
"grad_norm": 16.513030408815556,
"learning_rate": 8.754017935578124e-06,
"loss": 2.6747732162475586,
"step": 1166
},
{
"epoch": 0.9218009478672986,
"grad_norm": 19.976818001028782,
"learning_rate": 8.75098080005743e-06,
"loss": 3.0514602661132812,
"step": 1167
},
{
"epoch": 0.9225908372827805,
"grad_norm": 15.44765720208878,
"learning_rate": 8.747940495695804e-06,
"loss": 3.660196304321289,
"step": 1168
},
{
"epoch": 0.9233807266982622,
"grad_norm": 46.251367241411415,
"learning_rate": 8.744897025061704e-06,
"loss": 3.5469841957092285,
"step": 1169
},
{
"epoch": 0.9241706161137441,
"grad_norm": 10.692755449897618,
"learning_rate": 8.741850390726268e-06,
"loss": 3.707672595977783,
"step": 1170
},
{
"epoch": 0.9249605055292259,
"grad_norm": 17.772477199253103,
"learning_rate": 8.7388005952633e-06,
"loss": 2.4242305755615234,
"step": 1171
},
{
"epoch": 0.9257503949447078,
"grad_norm": 18.70690435962618,
"learning_rate": 8.735747641249276e-06,
"loss": 2.7367851734161377,
"step": 1172
},
{
"epoch": 0.9265402843601895,
"grad_norm": 14.123094504926451,
"learning_rate": 8.732691531263344e-06,
"loss": 2.673311471939087,
"step": 1173
},
{
"epoch": 0.9273301737756714,
"grad_norm": 6.632016401084768,
"learning_rate": 8.729632267887313e-06,
"loss": 2.4245405197143555,
"step": 1174
},
{
"epoch": 0.9281200631911533,
"grad_norm": 12.470479655533174,
"learning_rate": 8.726569853705662e-06,
"loss": 2.9810891151428223,
"step": 1175
},
{
"epoch": 0.9289099526066351,
"grad_norm": 10.985987841138897,
"learning_rate": 8.723504291305526e-06,
"loss": 2.6249923706054688,
"step": 1176
},
{
"epoch": 0.9296998420221169,
"grad_norm": 10.933662910087982,
"learning_rate": 8.720435583276706e-06,
"loss": 2.765192985534668,
"step": 1177
},
{
"epoch": 0.9304897314375987,
"grad_norm": 11.327209932917212,
"learning_rate": 8.71736373221165e-06,
"loss": 2.9828245639801025,
"step": 1178
},
{
"epoch": 0.9312796208530806,
"grad_norm": 13.902491029618451,
"learning_rate": 8.714288740705475e-06,
"loss": 2.574052572250366,
"step": 1179
},
{
"epoch": 0.9320695102685624,
"grad_norm": 7.981484058770302,
"learning_rate": 8.711210611355938e-06,
"loss": 3.03520131111145,
"step": 1180
},
{
"epoch": 0.9328593996840442,
"grad_norm": 7.784551362187513,
"learning_rate": 8.708129346763457e-06,
"loss": 2.7354891300201416,
"step": 1181
},
{
"epoch": 0.933649289099526,
"grad_norm": 8.702455827026565,
"learning_rate": 8.705044949531092e-06,
"loss": 2.977090358734131,
"step": 1182
},
{
"epoch": 0.9344391785150079,
"grad_norm": 10.8130926994093,
"learning_rate": 8.701957422264555e-06,
"loss": 2.7915494441986084,
"step": 1183
},
{
"epoch": 0.9352290679304898,
"grad_norm": 8.086616773290197,
"learning_rate": 8.698866767572196e-06,
"loss": 2.89163875579834,
"step": 1184
},
{
"epoch": 0.9360189573459715,
"grad_norm": 13.946757867057391,
"learning_rate": 8.695772988065011e-06,
"loss": 2.9507193565368652,
"step": 1185
},
{
"epoch": 0.9368088467614534,
"grad_norm": 12.841477106561502,
"learning_rate": 8.692676086356637e-06,
"loss": 2.862083673477173,
"step": 1186
},
{
"epoch": 0.9375987361769352,
"grad_norm": 14.373739928948876,
"learning_rate": 8.689576065063343e-06,
"loss": 2.9636409282684326,
"step": 1187
},
{
"epoch": 0.9383886255924171,
"grad_norm": 11.002313560997857,
"learning_rate": 8.686472926804041e-06,
"loss": 2.7443132400512695,
"step": 1188
},
{
"epoch": 0.9391785150078988,
"grad_norm": 18.302833638551288,
"learning_rate": 8.683366674200271e-06,
"loss": 2.8994405269622803,
"step": 1189
},
{
"epoch": 0.9399684044233807,
"grad_norm": 20.60950939701956,
"learning_rate": 8.680257309876205e-06,
"loss": 2.9267029762268066,
"step": 1190
},
{
"epoch": 0.9407582938388626,
"grad_norm": 8.17531589588896,
"learning_rate": 8.677144836458645e-06,
"loss": 2.567711591720581,
"step": 1191
},
{
"epoch": 0.9415481832543444,
"grad_norm": 18.719866253308787,
"learning_rate": 8.674029256577016e-06,
"loss": 3.6237592697143555,
"step": 1192
},
{
"epoch": 0.9423380726698263,
"grad_norm": 5.9652452107958736,
"learning_rate": 8.670910572863376e-06,
"loss": 2.7660140991210938,
"step": 1193
},
{
"epoch": 0.943127962085308,
"grad_norm": 13.08925807761372,
"learning_rate": 8.667788787952395e-06,
"loss": 2.949338674545288,
"step": 1194
},
{
"epoch": 0.9439178515007899,
"grad_norm": 8.927437335126726,
"learning_rate": 8.664663904481367e-06,
"loss": 2.4335386753082275,
"step": 1195
},
{
"epoch": 0.9447077409162717,
"grad_norm": 13.919010389840851,
"learning_rate": 8.661535925090205e-06,
"loss": 2.838738441467285,
"step": 1196
},
{
"epoch": 0.9454976303317536,
"grad_norm": 11.103546933243214,
"learning_rate": 8.658404852421436e-06,
"loss": 2.6665523052215576,
"step": 1197
},
{
"epoch": 0.9462875197472354,
"grad_norm": 8.53073315483788,
"learning_rate": 8.655270689120201e-06,
"loss": 2.4120519161224365,
"step": 1198
},
{
"epoch": 0.9470774091627172,
"grad_norm": 8.850565917179576,
"learning_rate": 8.652133437834251e-06,
"loss": 2.8354744911193848,
"step": 1199
},
{
"epoch": 0.9478672985781991,
"grad_norm": 11.217378073646433,
"learning_rate": 8.648993101213944e-06,
"loss": 2.960960626602173,
"step": 1200
},
{
"epoch": 0.9486571879936809,
"grad_norm": 10.898057483385264,
"learning_rate": 8.645849681912253e-06,
"loss": 2.597686767578125,
"step": 1201
},
{
"epoch": 0.9494470774091627,
"grad_norm": 6.67140736518561,
"learning_rate": 8.642703182584743e-06,
"loss": 2.3367583751678467,
"step": 1202
},
{
"epoch": 0.9502369668246445,
"grad_norm": 9.820716233427401,
"learning_rate": 8.639553605889588e-06,
"loss": 2.967700481414795,
"step": 1203
},
{
"epoch": 0.9510268562401264,
"grad_norm": 7.868965032787582,
"learning_rate": 8.636400954487563e-06,
"loss": 3.142509937286377,
"step": 1204
},
{
"epoch": 0.9518167456556083,
"grad_norm": 9.187837043838497,
"learning_rate": 8.633245231042038e-06,
"loss": 2.2670068740844727,
"step": 1205
},
{
"epoch": 0.95260663507109,
"grad_norm": 9.246511337142001,
"learning_rate": 8.630086438218976e-06,
"loss": 2.476592540740967,
"step": 1206
},
{
"epoch": 0.9533965244865719,
"grad_norm": 11.415361373055639,
"learning_rate": 8.626924578686937e-06,
"loss": 3.100846767425537,
"step": 1207
},
{
"epoch": 0.9541864139020537,
"grad_norm": 21.60356512140913,
"learning_rate": 8.623759655117072e-06,
"loss": 2.767620801925659,
"step": 1208
},
{
"epoch": 0.9549763033175356,
"grad_norm": 12.131835094302646,
"learning_rate": 8.620591670183116e-06,
"loss": 2.683656692504883,
"step": 1209
},
{
"epoch": 0.9557661927330173,
"grad_norm": 14.171005073002354,
"learning_rate": 8.617420626561394e-06,
"loss": 1.9756850004196167,
"step": 1210
},
{
"epoch": 0.9565560821484992,
"grad_norm": 18.6056036289689,
"learning_rate": 8.614246526930816e-06,
"loss": 2.502706527709961,
"step": 1211
},
{
"epoch": 0.957345971563981,
"grad_norm": 9.743897509731939,
"learning_rate": 8.61106937397287e-06,
"loss": 2.774007797241211,
"step": 1212
},
{
"epoch": 0.9581358609794629,
"grad_norm": 8.660394055988597,
"learning_rate": 8.607889170371627e-06,
"loss": 3.046370506286621,
"step": 1213
},
{
"epoch": 0.9589257503949447,
"grad_norm": 15.639249371215595,
"learning_rate": 8.604705918813729e-06,
"loss": 2.9602065086364746,
"step": 1214
},
{
"epoch": 0.9597156398104265,
"grad_norm": 18.947552868983692,
"learning_rate": 8.601519621988402e-06,
"loss": 2.3588128089904785,
"step": 1215
},
{
"epoch": 0.9605055292259084,
"grad_norm": 8.36234759685658,
"learning_rate": 8.59833028258744e-06,
"loss": 2.4646201133728027,
"step": 1216
},
{
"epoch": 0.9612954186413902,
"grad_norm": 12.494361272602035,
"learning_rate": 8.595137903305205e-06,
"loss": 2.6268255710601807,
"step": 1217
},
{
"epoch": 0.9620853080568721,
"grad_norm": 24.950930936425536,
"learning_rate": 8.591942486838629e-06,
"loss": 2.661611318588257,
"step": 1218
},
{
"epoch": 0.9628751974723538,
"grad_norm": 19.123936642513613,
"learning_rate": 8.588744035887214e-06,
"loss": 2.785714626312256,
"step": 1219
},
{
"epoch": 0.9636650868878357,
"grad_norm": 11.666101454211423,
"learning_rate": 8.585542553153017e-06,
"loss": 3.2729837894439697,
"step": 1220
},
{
"epoch": 0.9644549763033176,
"grad_norm": 5.6088413152939856,
"learning_rate": 8.582338041340668e-06,
"loss": 2.532158613204956,
"step": 1221
},
{
"epoch": 0.9652448657187994,
"grad_norm": 10.219761775257519,
"learning_rate": 8.579130503157343e-06,
"loss": 3.0070722103118896,
"step": 1222
},
{
"epoch": 0.9660347551342812,
"grad_norm": 9.057639040127485,
"learning_rate": 8.575919941312782e-06,
"loss": 2.773895263671875,
"step": 1223
},
{
"epoch": 0.966824644549763,
"grad_norm": 7.267537688630795,
"learning_rate": 8.57270635851928e-06,
"loss": 3.1114962100982666,
"step": 1224
},
{
"epoch": 0.9676145339652449,
"grad_norm": 24.757703365939474,
"learning_rate": 8.569489757491681e-06,
"loss": 3.395624876022339,
"step": 1225
},
{
"epoch": 0.9684044233807267,
"grad_norm": 11.88925219508561,
"learning_rate": 8.566270140947381e-06,
"loss": 2.6596829891204834,
"step": 1226
},
{
"epoch": 0.9691943127962085,
"grad_norm": 7.189319095204579,
"learning_rate": 8.563047511606322e-06,
"loss": 2.3300154209136963,
"step": 1227
},
{
"epoch": 0.9699842022116903,
"grad_norm": 7.840035719762905,
"learning_rate": 8.559821872190993e-06,
"loss": 2.5409016609191895,
"step": 1228
},
{
"epoch": 0.9707740916271722,
"grad_norm": 13.281718877860365,
"learning_rate": 8.556593225426424e-06,
"loss": 2.4998364448547363,
"step": 1229
},
{
"epoch": 0.9715639810426541,
"grad_norm": 5.968185731325506,
"learning_rate": 8.553361574040185e-06,
"loss": 2.6161770820617676,
"step": 1230
},
{
"epoch": 0.9723538704581358,
"grad_norm": 8.962503347087875,
"learning_rate": 8.550126920762389e-06,
"loss": 2.6376500129699707,
"step": 1231
},
{
"epoch": 0.9731437598736177,
"grad_norm": 15.044203306326212,
"learning_rate": 8.546889268325678e-06,
"loss": 2.7106094360351562,
"step": 1232
},
{
"epoch": 0.9739336492890995,
"grad_norm": 9.519163730532522,
"learning_rate": 8.543648619465232e-06,
"loss": 2.801136016845703,
"step": 1233
},
{
"epoch": 0.9747235387045814,
"grad_norm": 15.500075730991261,
"learning_rate": 8.540404976918766e-06,
"loss": 3.1230263710021973,
"step": 1234
},
{
"epoch": 0.9755134281200631,
"grad_norm": 11.401461873064811,
"learning_rate": 8.537158343426515e-06,
"loss": 2.6839194297790527,
"step": 1235
},
{
"epoch": 0.976303317535545,
"grad_norm": 19.111412494475047,
"learning_rate": 8.53390872173125e-06,
"loss": 2.720089912414551,
"step": 1236
},
{
"epoch": 0.9770932069510269,
"grad_norm": 8.343576881810694,
"learning_rate": 8.530656114578258e-06,
"loss": 2.9516029357910156,
"step": 1237
},
{
"epoch": 0.9778830963665087,
"grad_norm": 12.186373113060878,
"learning_rate": 8.527400524715355e-06,
"loss": 3.0438735485076904,
"step": 1238
},
{
"epoch": 0.9786729857819905,
"grad_norm": 12.434900360729275,
"learning_rate": 8.524141954892872e-06,
"loss": 2.8403427600860596,
"step": 1239
},
{
"epoch": 0.9794628751974723,
"grad_norm": 8.116131181660196,
"learning_rate": 8.52088040786366e-06,
"loss": 2.6644649505615234,
"step": 1240
},
{
"epoch": 0.9802527646129542,
"grad_norm": 9.291988122273889,
"learning_rate": 8.517615886383087e-06,
"loss": 2.3029625415802,
"step": 1241
},
{
"epoch": 0.981042654028436,
"grad_norm": 14.010297592734409,
"learning_rate": 8.514348393209029e-06,
"loss": 3.050496816635132,
"step": 1242
},
{
"epoch": 0.9818325434439179,
"grad_norm": 7.98145981401455,
"learning_rate": 8.511077931101875e-06,
"loss": 2.6487746238708496,
"step": 1243
},
{
"epoch": 0.9826224328593997,
"grad_norm": 12.609272032565443,
"learning_rate": 8.507804502824524e-06,
"loss": 2.459247589111328,
"step": 1244
},
{
"epoch": 0.9834123222748815,
"grad_norm": 16.863961424074052,
"learning_rate": 8.504528111142376e-06,
"loss": 2.5375590324401855,
"step": 1245
},
{
"epoch": 0.9842022116903634,
"grad_norm": 11.57902809675688,
"learning_rate": 8.501248758823342e-06,
"loss": 2.7621288299560547,
"step": 1246
},
{
"epoch": 0.9849921011058452,
"grad_norm": 11.15937321516801,
"learning_rate": 8.497966448637825e-06,
"loss": 2.9258103370666504,
"step": 1247
},
{
"epoch": 0.985781990521327,
"grad_norm": 19.29175817663025,
"learning_rate": 8.494681183358735e-06,
"loss": 2.59159255027771,
"step": 1248
},
{
"epoch": 0.9865718799368088,
"grad_norm": 8.989580829357356,
"learning_rate": 8.491392965761472e-06,
"loss": 2.8336706161499023,
"step": 1249
},
{
"epoch": 0.9873617693522907,
"grad_norm": 24.49208358302412,
"learning_rate": 8.488101798623934e-06,
"loss": 3.0984175205230713,
"step": 1250
},
{
"epoch": 0.9881516587677726,
"grad_norm": 11.127902851400203,
"learning_rate": 8.484807684726513e-06,
"loss": 3.016453981399536,
"step": 1251
},
{
"epoch": 0.9889415481832543,
"grad_norm": 11.677023812873243,
"learning_rate": 8.481510626852082e-06,
"loss": 2.415400266647339,
"step": 1252
},
{
"epoch": 0.9897314375987362,
"grad_norm": 6.718579596480926,
"learning_rate": 8.478210627786008e-06,
"loss": 3.216947078704834,
"step": 1253
},
{
"epoch": 0.990521327014218,
"grad_norm": 16.963884976243023,
"learning_rate": 8.474907690316143e-06,
"loss": 2.9188220500946045,
"step": 1254
},
{
"epoch": 0.9913112164296999,
"grad_norm": 18.12845817805615,
"learning_rate": 8.47160181723282e-06,
"loss": 2.854201078414917,
"step": 1255
},
{
"epoch": 0.9921011058451816,
"grad_norm": 10.387630225459594,
"learning_rate": 8.468293011328848e-06,
"loss": 2.473886489868164,
"step": 1256
},
{
"epoch": 0.9928909952606635,
"grad_norm": 10.817521575505426,
"learning_rate": 8.46498127539952e-06,
"loss": 2.68951153755188,
"step": 1257
},
{
"epoch": 0.9936808846761453,
"grad_norm": 8.916073091749583,
"learning_rate": 8.4616666122426e-06,
"loss": 3.3685083389282227,
"step": 1258
},
{
"epoch": 0.9944707740916272,
"grad_norm": 18.44181632696859,
"learning_rate": 8.458349024658326e-06,
"loss": 3.140796184539795,
"step": 1259
},
{
"epoch": 0.995260663507109,
"grad_norm": 15.378117707047336,
"learning_rate": 8.455028515449407e-06,
"loss": 3.070150852203369,
"step": 1260
},
{
"epoch": 0.9960505529225908,
"grad_norm": 14.235029232259741,
"learning_rate": 8.451705087421023e-06,
"loss": 2.6762990951538086,
"step": 1261
},
{
"epoch": 0.9968404423380727,
"grad_norm": 14.728149790719765,
"learning_rate": 8.448378743380816e-06,
"loss": 2.3728647232055664,
"step": 1262
},
{
"epoch": 0.9976303317535545,
"grad_norm": 16.863014645031626,
"learning_rate": 8.445049486138887e-06,
"loss": 2.7437081336975098,
"step": 1263
},
{
"epoch": 0.9984202211690363,
"grad_norm": 14.200104485327296,
"learning_rate": 8.441717318507811e-06,
"loss": 2.6188509464263916,
"step": 1264
},
{
"epoch": 0.9992101105845181,
"grad_norm": 10.67666931812201,
"learning_rate": 8.438382243302609e-06,
"loss": 2.8497185707092285,
"step": 1265
},
{
"epoch": 1.0,
"grad_norm": 13.692512039732208,
"learning_rate": 8.435044263340766e-06,
"loss": 3.2503585815429688,
"step": 1266
},
{
"epoch": 1.0007898894154819,
"grad_norm": 8.63492881542288,
"learning_rate": 8.431703381442219e-06,
"loss": 2.442643165588379,
"step": 1267
},
{
"epoch": 1.0015797788309637,
"grad_norm": 12.845854999880341,
"learning_rate": 8.428359600429352e-06,
"loss": 1.3568103313446045,
"step": 1268
},
{
"epoch": 1.0023696682464456,
"grad_norm": 9.925730237026748,
"learning_rate": 8.425012923127007e-06,
"loss": 1.8518157005310059,
"step": 1269
},
{
"epoch": 1.0031595576619274,
"grad_norm": 11.088071862507528,
"learning_rate": 8.421663352362465e-06,
"loss": 1.3870903253555298,
"step": 1270
},
{
"epoch": 1.003949447077409,
"grad_norm": 10.346802750253469,
"learning_rate": 8.418310890965456e-06,
"loss": 1.3119181394577026,
"step": 1271
},
{
"epoch": 1.004739336492891,
"grad_norm": 9.99087600807621,
"learning_rate": 8.414955541768148e-06,
"loss": 1.453743577003479,
"step": 1272
},
{
"epoch": 1.0055292259083728,
"grad_norm": 15.017657166973448,
"learning_rate": 8.411597307605156e-06,
"loss": 1.2929190397262573,
"step": 1273
},
{
"epoch": 1.0063191153238547,
"grad_norm": 7.93537553692124,
"learning_rate": 8.408236191313523e-06,
"loss": 1.5082969665527344,
"step": 1274
},
{
"epoch": 1.0071090047393365,
"grad_norm": 15.016597946841564,
"learning_rate": 8.40487219573273e-06,
"loss": 0.6322309374809265,
"step": 1275
},
{
"epoch": 1.0078988941548184,
"grad_norm": 20.805758505260766,
"learning_rate": 8.401505323704697e-06,
"loss": 1.5762568712234497,
"step": 1276
},
{
"epoch": 1.0086887835703002,
"grad_norm": 32.14895799539077,
"learning_rate": 8.398135578073763e-06,
"loss": 1.2138099670410156,
"step": 1277
},
{
"epoch": 1.009478672985782,
"grad_norm": 19.797132562379517,
"learning_rate": 8.394762961686706e-06,
"loss": 2.1060357093811035,
"step": 1278
},
{
"epoch": 1.0102685624012637,
"grad_norm": 9.137353495256644,
"learning_rate": 8.391387477392718e-06,
"loss": 1.1503310203552246,
"step": 1279
},
{
"epoch": 1.0110584518167456,
"grad_norm": 10.486340395654485,
"learning_rate": 8.38800912804342e-06,
"loss": 1.0871237516403198,
"step": 1280
},
{
"epoch": 1.0118483412322274,
"grad_norm": 13.381876680770137,
"learning_rate": 8.384627916492856e-06,
"loss": 2.127761125564575,
"step": 1281
},
{
"epoch": 1.0126382306477093,
"grad_norm": 7.526496737374128,
"learning_rate": 8.381243845597482e-06,
"loss": 1.72287917137146,
"step": 1282
},
{
"epoch": 1.0134281200631912,
"grad_norm": 11.304822146622485,
"learning_rate": 8.377856918216171e-06,
"loss": 1.1881051063537598,
"step": 1283
},
{
"epoch": 1.014218009478673,
"grad_norm": 9.044097061649525,
"learning_rate": 8.374467137210216e-06,
"loss": 1.359776496887207,
"step": 1284
},
{
"epoch": 1.0150078988941549,
"grad_norm": 17.550879838319734,
"learning_rate": 8.371074505443309e-06,
"loss": 1.7276735305786133,
"step": 1285
},
{
"epoch": 1.0157977883096367,
"grad_norm": 15.204121265895848,
"learning_rate": 8.367679025781559e-06,
"loss": 1.7062349319458008,
"step": 1286
},
{
"epoch": 1.0165876777251184,
"grad_norm": 11.379360194095462,
"learning_rate": 8.36428070109348e-06,
"loss": 1.3219001293182373,
"step": 1287
},
{
"epoch": 1.0173775671406002,
"grad_norm": 16.20401037301683,
"learning_rate": 8.360879534249984e-06,
"loss": 1.7655143737792969,
"step": 1288
},
{
"epoch": 1.018167456556082,
"grad_norm": 17.858221806964767,
"learning_rate": 8.35747552812439e-06,
"loss": 0.791549563407898,
"step": 1289
},
{
"epoch": 1.018957345971564,
"grad_norm": 12.734062958157567,
"learning_rate": 8.354068685592414e-06,
"loss": 2.4911937713623047,
"step": 1290
},
{
"epoch": 1.0197472353870458,
"grad_norm": 12.18144169507221,
"learning_rate": 8.350659009532169e-06,
"loss": 1.9642527103424072,
"step": 1291
},
{
"epoch": 1.0205371248025277,
"grad_norm": 8.090065379000105,
"learning_rate": 8.347246502824158e-06,
"loss": 1.8792918920516968,
"step": 1292
},
{
"epoch": 1.0213270142180095,
"grad_norm": 10.304570516520158,
"learning_rate": 8.343831168351279e-06,
"loss": 1.270374059677124,
"step": 1293
},
{
"epoch": 1.0221169036334914,
"grad_norm": 11.563660724782284,
"learning_rate": 8.340413008998818e-06,
"loss": 1.5317769050598145,
"step": 1294
},
{
"epoch": 1.0229067930489733,
"grad_norm": 15.047299484001131,
"learning_rate": 8.336992027654446e-06,
"loss": 1.1459553241729736,
"step": 1295
},
{
"epoch": 1.0236966824644549,
"grad_norm": 8.413193349303429,
"learning_rate": 8.333568227208221e-06,
"loss": 1.7619214057922363,
"step": 1296
},
{
"epoch": 1.0244865718799367,
"grad_norm": 17.455533064998168,
"learning_rate": 8.330141610552582e-06,
"loss": 1.3511468172073364,
"step": 1297
},
{
"epoch": 1.0252764612954186,
"grad_norm": 14.604139592871947,
"learning_rate": 8.326712180582343e-06,
"loss": 1.5933293104171753,
"step": 1298
},
{
"epoch": 1.0260663507109005,
"grad_norm": 27.973131188038252,
"learning_rate": 8.323279940194697e-06,
"loss": 1.5758417844772339,
"step": 1299
},
{
"epoch": 1.0268562401263823,
"grad_norm": 8.089809601055391,
"learning_rate": 8.319844892289218e-06,
"loss": 1.7407735586166382,
"step": 1300
},
{
"epoch": 1.0276461295418642,
"grad_norm": 10.840887730701892,
"learning_rate": 8.316407039767839e-06,
"loss": 1.3549420833587646,
"step": 1301
},
{
"epoch": 1.028436018957346,
"grad_norm": 12.182733951436646,
"learning_rate": 8.312966385534873e-06,
"loss": 0.9703428745269775,
"step": 1302
},
{
"epoch": 1.029225908372828,
"grad_norm": 14.482542096417097,
"learning_rate": 8.309522932496994e-06,
"loss": 1.5941872596740723,
"step": 1303
},
{
"epoch": 1.0300157977883095,
"grad_norm": 11.163985480995702,
"learning_rate": 8.306076683563245e-06,
"loss": 1.6860015392303467,
"step": 1304
},
{
"epoch": 1.0308056872037914,
"grad_norm": 18.11399534891752,
"learning_rate": 8.302627641645025e-06,
"loss": 2.1102190017700195,
"step": 1305
},
{
"epoch": 1.0315955766192733,
"grad_norm": 9.377953260206045,
"learning_rate": 8.299175809656099e-06,
"loss": 1.077817440032959,
"step": 1306
},
{
"epoch": 1.0323854660347551,
"grad_norm": 14.295491793789918,
"learning_rate": 8.295721190512584e-06,
"loss": 1.9387364387512207,
"step": 1307
},
{
"epoch": 1.033175355450237,
"grad_norm": 11.182947577292902,
"learning_rate": 8.292263787132955e-06,
"loss": 1.3131287097930908,
"step": 1308
},
{
"epoch": 1.0339652448657188,
"grad_norm": 9.345350781275434,
"learning_rate": 8.288803602438037e-06,
"loss": 1.1507320404052734,
"step": 1309
},
{
"epoch": 1.0347551342812007,
"grad_norm": 9.05362446143137,
"learning_rate": 8.285340639351005e-06,
"loss": 1.400825023651123,
"step": 1310
},
{
"epoch": 1.0355450236966826,
"grad_norm": 12.34226551872451,
"learning_rate": 8.281874900797384e-06,
"loss": 1.3061918020248413,
"step": 1311
},
{
"epoch": 1.0363349131121642,
"grad_norm": 12.863696082340267,
"learning_rate": 8.278406389705038e-06,
"loss": 1.2435519695281982,
"step": 1312
},
{
"epoch": 1.037124802527646,
"grad_norm": 9.912824621386932,
"learning_rate": 8.274935109004179e-06,
"loss": 1.968496561050415,
"step": 1313
},
{
"epoch": 1.037914691943128,
"grad_norm": 6.595291570397836,
"learning_rate": 8.271461061627356e-06,
"loss": 1.566641092300415,
"step": 1314
},
{
"epoch": 1.0387045813586098,
"grad_norm": 13.422406591433509,
"learning_rate": 8.267984250509456e-06,
"loss": 1.5585157871246338,
"step": 1315
},
{
"epoch": 1.0394944707740916,
"grad_norm": 15.367990128433833,
"learning_rate": 8.264504678587697e-06,
"loss": 1.4819953441619873,
"step": 1316
},
{
"epoch": 1.0402843601895735,
"grad_norm": 14.804150275595717,
"learning_rate": 8.261022348801638e-06,
"loss": 1.8838303089141846,
"step": 1317
},
{
"epoch": 1.0410742496050553,
"grad_norm": 10.730588920504516,
"learning_rate": 8.257537264093158e-06,
"loss": 1.101423978805542,
"step": 1318
},
{
"epoch": 1.0418641390205372,
"grad_norm": 20.04793554392532,
"learning_rate": 8.254049427406472e-06,
"loss": 1.2617835998535156,
"step": 1319
},
{
"epoch": 1.042654028436019,
"grad_norm": 8.148456102914626,
"learning_rate": 8.250558841688114e-06,
"loss": 1.9862099885940552,
"step": 1320
},
{
"epoch": 1.0434439178515007,
"grad_norm": 10.559290761333362,
"learning_rate": 8.247065509886943e-06,
"loss": 1.2836475372314453,
"step": 1321
},
{
"epoch": 1.0442338072669826,
"grad_norm": 10.381133061098705,
"learning_rate": 8.243569434954136e-06,
"loss": 1.5940964221954346,
"step": 1322
},
{
"epoch": 1.0450236966824644,
"grad_norm": 12.297214798561669,
"learning_rate": 8.24007061984319e-06,
"loss": 1.687330722808838,
"step": 1323
},
{
"epoch": 1.0458135860979463,
"grad_norm": 13.218655430214906,
"learning_rate": 8.236569067509914e-06,
"loss": 1.3215546607971191,
"step": 1324
},
{
"epoch": 1.0466034755134281,
"grad_norm": 11.341162960064988,
"learning_rate": 8.233064780912431e-06,
"loss": 1.0560698509216309,
"step": 1325
},
{
"epoch": 1.04739336492891,
"grad_norm": 22.19384496177019,
"learning_rate": 8.229557763011176e-06,
"loss": 1.9986159801483154,
"step": 1326
},
{
"epoch": 1.0481832543443919,
"grad_norm": 11.222176676313888,
"learning_rate": 8.226048016768888e-06,
"loss": 1.0500805377960205,
"step": 1327
},
{
"epoch": 1.0489731437598737,
"grad_norm": 8.465112348761927,
"learning_rate": 8.222535545150612e-06,
"loss": 1.7865219116210938,
"step": 1328
},
{
"epoch": 1.0497630331753554,
"grad_norm": 10.856718757186945,
"learning_rate": 8.219020351123695e-06,
"loss": 0.9995932579040527,
"step": 1329
},
{
"epoch": 1.0505529225908372,
"grad_norm": 12.139594241353372,
"learning_rate": 8.215502437657785e-06,
"loss": 1.1767065525054932,
"step": 1330
},
{
"epoch": 1.051342812006319,
"grad_norm": 21.039747631184284,
"learning_rate": 8.211981807724828e-06,
"loss": 1.5868887901306152,
"step": 1331
},
{
"epoch": 1.052132701421801,
"grad_norm": 10.89948650178149,
"learning_rate": 8.208458464299061e-06,
"loss": 1.2937088012695312,
"step": 1332
},
{
"epoch": 1.0529225908372828,
"grad_norm": 11.666317075229031,
"learning_rate": 8.20493241035702e-06,
"loss": 1.497636318206787,
"step": 1333
},
{
"epoch": 1.0537124802527646,
"grad_norm": 11.771023636832185,
"learning_rate": 8.201403648877523e-06,
"loss": 1.822361707687378,
"step": 1334
},
{
"epoch": 1.0545023696682465,
"grad_norm": 9.652940358468332,
"learning_rate": 8.197872182841684e-06,
"loss": 1.5103296041488647,
"step": 1335
},
{
"epoch": 1.0552922590837284,
"grad_norm": 11.614519754388345,
"learning_rate": 8.194338015232893e-06,
"loss": 1.5001356601715088,
"step": 1336
},
{
"epoch": 1.05608214849921,
"grad_norm": 9.35354809288492,
"learning_rate": 8.190801149036828e-06,
"loss": 1.7350692749023438,
"step": 1337
},
{
"epoch": 1.0568720379146919,
"grad_norm": 30.14331426380188,
"learning_rate": 8.187261587241444e-06,
"loss": 1.2237708568572998,
"step": 1338
},
{
"epoch": 1.0576619273301737,
"grad_norm": 16.364193953877205,
"learning_rate": 8.183719332836976e-06,
"loss": 1.5238933563232422,
"step": 1339
},
{
"epoch": 1.0584518167456556,
"grad_norm": 11.363557290265472,
"learning_rate": 8.180174388815933e-06,
"loss": 1.208627700805664,
"step": 1340
},
{
"epoch": 1.0592417061611374,
"grad_norm": 14.373459768882784,
"learning_rate": 8.176626758173093e-06,
"loss": 2.0444703102111816,
"step": 1341
},
{
"epoch": 1.0600315955766193,
"grad_norm": 18.032219552306884,
"learning_rate": 8.173076443905506e-06,
"loss": 1.2289549112319946,
"step": 1342
},
{
"epoch": 1.0608214849921012,
"grad_norm": 10.187495171136964,
"learning_rate": 8.16952344901249e-06,
"loss": 1.079569935798645,
"step": 1343
},
{
"epoch": 1.061611374407583,
"grad_norm": 26.979192881381238,
"learning_rate": 8.16596777649563e-06,
"loss": 3.324129343032837,
"step": 1344
},
{
"epoch": 1.0624012638230647,
"grad_norm": 10.606069599550024,
"learning_rate": 8.162409429358764e-06,
"loss": 1.3011245727539062,
"step": 1345
},
{
"epoch": 1.0631911532385465,
"grad_norm": 9.756876263614647,
"learning_rate": 8.158848410608e-06,
"loss": 3.0105724334716797,
"step": 1346
},
{
"epoch": 1.0639810426540284,
"grad_norm": 11.406355450796742,
"learning_rate": 8.155284723251697e-06,
"loss": 1.46226167678833,
"step": 1347
},
{
"epoch": 1.0647709320695102,
"grad_norm": 13.374416212880506,
"learning_rate": 8.15171837030047e-06,
"loss": 1.8406553268432617,
"step": 1348
},
{
"epoch": 1.065560821484992,
"grad_norm": 10.14760836082322,
"learning_rate": 8.148149354767183e-06,
"loss": 2.1282176971435547,
"step": 1349
},
{
"epoch": 1.066350710900474,
"grad_norm": 9.351026637130168,
"learning_rate": 8.144577679666955e-06,
"loss": 1.2611757516860962,
"step": 1350
},
{
"epoch": 1.0671406003159558,
"grad_norm": 12.303709820191473,
"learning_rate": 8.14100334801715e-06,
"loss": 1.4453020095825195,
"step": 1351
},
{
"epoch": 1.0679304897314377,
"grad_norm": 8.355205018000369,
"learning_rate": 8.13742636283737e-06,
"loss": 0.9628135561943054,
"step": 1352
},
{
"epoch": 1.0687203791469195,
"grad_norm": 9.764330703940649,
"learning_rate": 8.13384672714947e-06,
"loss": 1.3544663190841675,
"step": 1353
},
{
"epoch": 1.0695102685624012,
"grad_norm": 12.402831239510688,
"learning_rate": 8.130264443977534e-06,
"loss": 2.4541101455688477,
"step": 1354
},
{
"epoch": 1.070300157977883,
"grad_norm": 14.656238462966826,
"learning_rate": 8.126679516347887e-06,
"loss": 1.269155502319336,
"step": 1355
},
{
"epoch": 1.0710900473933649,
"grad_norm": 15.277650232839177,
"learning_rate": 8.123091947289089e-06,
"loss": 1.9917043447494507,
"step": 1356
},
{
"epoch": 1.0718799368088467,
"grad_norm": 8.257937346753295,
"learning_rate": 8.119501739831929e-06,
"loss": 1.520797610282898,
"step": 1357
},
{
"epoch": 1.0726698262243286,
"grad_norm": 12.639910986990902,
"learning_rate": 8.11590889700943e-06,
"loss": 1.22843599319458,
"step": 1358
},
{
"epoch": 1.0734597156398105,
"grad_norm": 10.525028379122327,
"learning_rate": 8.11231342185683e-06,
"loss": 1.971350908279419,
"step": 1359
},
{
"epoch": 1.0742496050552923,
"grad_norm": 12.312343896152003,
"learning_rate": 8.108715317411606e-06,
"loss": 1.4550845623016357,
"step": 1360
},
{
"epoch": 1.0750394944707742,
"grad_norm": 15.40492755500279,
"learning_rate": 8.105114586713442e-06,
"loss": 1.4124548435211182,
"step": 1361
},
{
"epoch": 1.0758293838862558,
"grad_norm": 11.11985081751917,
"learning_rate": 8.101511232804251e-06,
"loss": 1.9308778047561646,
"step": 1362
},
{
"epoch": 1.0766192733017377,
"grad_norm": 7.884346798092688,
"learning_rate": 8.097905258728158e-06,
"loss": 1.8403844833374023,
"step": 1363
},
{
"epoch": 1.0774091627172195,
"grad_norm": 10.235048213024509,
"learning_rate": 8.094296667531501e-06,
"loss": 2.4593963623046875,
"step": 1364
},
{
"epoch": 1.0781990521327014,
"grad_norm": 15.402993599882011,
"learning_rate": 8.090685462262828e-06,
"loss": 1.2213027477264404,
"step": 1365
},
{
"epoch": 1.0789889415481833,
"grad_norm": 13.057398754067725,
"learning_rate": 8.0870716459729e-06,
"loss": 1.3943471908569336,
"step": 1366
},
{
"epoch": 1.0797788309636651,
"grad_norm": 11.831339156526893,
"learning_rate": 8.08345522171468e-06,
"loss": 1.2609305381774902,
"step": 1367
},
{
"epoch": 1.080568720379147,
"grad_norm": 10.352369944456171,
"learning_rate": 8.079836192543335e-06,
"loss": 1.6797740459442139,
"step": 1368
},
{
"epoch": 1.0813586097946288,
"grad_norm": 8.167152293191428,
"learning_rate": 8.076214561516234e-06,
"loss": 1.5496795177459717,
"step": 1369
},
{
"epoch": 1.0821484992101107,
"grad_norm": 10.632786869980947,
"learning_rate": 8.07259033169294e-06,
"loss": 1.1447343826293945,
"step": 1370
},
{
"epoch": 1.0829383886255923,
"grad_norm": 9.772931829234892,
"learning_rate": 8.068963506135214e-06,
"loss": 1.840114712715149,
"step": 1371
},
{
"epoch": 1.0837282780410742,
"grad_norm": 12.315109646538895,
"learning_rate": 8.065334087907016e-06,
"loss": 1.73477041721344,
"step": 1372
},
{
"epoch": 1.084518167456556,
"grad_norm": 18.168778117659386,
"learning_rate": 8.061702080074483e-06,
"loss": 2.225961923599243,
"step": 1373
},
{
"epoch": 1.085308056872038,
"grad_norm": 32.98324986784259,
"learning_rate": 8.05806748570595e-06,
"loss": 1.1636614799499512,
"step": 1374
},
{
"epoch": 1.0860979462875198,
"grad_norm": 9.324662001634294,
"learning_rate": 8.054430307871939e-06,
"loss": 1.8258980512619019,
"step": 1375
},
{
"epoch": 1.0868878357030016,
"grad_norm": 9.190806390754526,
"learning_rate": 8.050790549645141e-06,
"loss": 1.2510772943496704,
"step": 1376
},
{
"epoch": 1.0876777251184835,
"grad_norm": 16.506042815565262,
"learning_rate": 8.047148214100445e-06,
"loss": 1.6223645210266113,
"step": 1377
},
{
"epoch": 1.0884676145339653,
"grad_norm": 13.858600832931351,
"learning_rate": 8.043503304314901e-06,
"loss": 1.3856348991394043,
"step": 1378
},
{
"epoch": 1.089257503949447,
"grad_norm": 10.071290706752858,
"learning_rate": 8.039855823367744e-06,
"loss": 0.7631658911705017,
"step": 1379
},
{
"epoch": 1.0900473933649288,
"grad_norm": 12.6598160611525,
"learning_rate": 8.036205774340378e-06,
"loss": 1.197335958480835,
"step": 1380
},
{
"epoch": 1.0908372827804107,
"grad_norm": 11.751428435172853,
"learning_rate": 8.032553160316374e-06,
"loss": 1.2686131000518799,
"step": 1381
},
{
"epoch": 1.0916271721958926,
"grad_norm": 10.67438658042041,
"learning_rate": 8.028897984381475e-06,
"loss": 1.5238006114959717,
"step": 1382
},
{
"epoch": 1.0924170616113744,
"grad_norm": 11.743621256195373,
"learning_rate": 8.025240249623583e-06,
"loss": 1.500381588935852,
"step": 1383
},
{
"epoch": 1.0932069510268563,
"grad_norm": 7.559878550374157,
"learning_rate": 8.021579959132768e-06,
"loss": 1.3321250677108765,
"step": 1384
},
{
"epoch": 1.0939968404423381,
"grad_norm": 23.292812697121704,
"learning_rate": 8.017917116001253e-06,
"loss": 2.0412609577178955,
"step": 1385
},
{
"epoch": 1.09478672985782,
"grad_norm": 13.85465573871678,
"learning_rate": 8.01425172332342e-06,
"loss": 2.5268537998199463,
"step": 1386
},
{
"epoch": 1.0955766192733019,
"grad_norm": 9.779613579870743,
"learning_rate": 8.010583784195804e-06,
"loss": 1.6717355251312256,
"step": 1387
},
{
"epoch": 1.0963665086887835,
"grad_norm": 8.068370833710562,
"learning_rate": 8.006913301717097e-06,
"loss": 1.9225399494171143,
"step": 1388
},
{
"epoch": 1.0971563981042654,
"grad_norm": 27.77102947325972,
"learning_rate": 8.003240278988127e-06,
"loss": 1.9709285497665405,
"step": 1389
},
{
"epoch": 1.0979462875197472,
"grad_norm": 14.301520105323926,
"learning_rate": 7.999564719111884e-06,
"loss": 1.1914315223693848,
"step": 1390
},
{
"epoch": 1.098736176935229,
"grad_norm": 8.296671084591999,
"learning_rate": 7.995886625193484e-06,
"loss": 1.6483365297317505,
"step": 1391
},
{
"epoch": 1.099526066350711,
"grad_norm": 13.348402769682588,
"learning_rate": 7.9922060003402e-06,
"loss": 1.258619785308838,
"step": 1392
},
{
"epoch": 1.1003159557661928,
"grad_norm": 11.776544553295185,
"learning_rate": 7.988522847661432e-06,
"loss": 1.841566801071167,
"step": 1393
},
{
"epoch": 1.1011058451816746,
"grad_norm": 10.875063836555396,
"learning_rate": 7.984837170268725e-06,
"loss": 1.3339906930923462,
"step": 1394
},
{
"epoch": 1.1018957345971563,
"grad_norm": 13.070070571580835,
"learning_rate": 7.981148971275744e-06,
"loss": 1.6276012659072876,
"step": 1395
},
{
"epoch": 1.1026856240126381,
"grad_norm": 8.908917736629908,
"learning_rate": 7.977458253798296e-06,
"loss": 1.7726579904556274,
"step": 1396
},
{
"epoch": 1.10347551342812,
"grad_norm": 18.167256312241264,
"learning_rate": 7.973765020954312e-06,
"loss": 1.8422517776489258,
"step": 1397
},
{
"epoch": 1.1042654028436019,
"grad_norm": 7.381573039738737,
"learning_rate": 7.970069275863844e-06,
"loss": 1.8285367488861084,
"step": 1398
},
{
"epoch": 1.1050552922590837,
"grad_norm": 13.52831771786112,
"learning_rate": 7.96637102164907e-06,
"loss": 1.3294421434402466,
"step": 1399
},
{
"epoch": 1.1058451816745656,
"grad_norm": 10.887865537597344,
"learning_rate": 7.96267026143429e-06,
"loss": 1.9184372425079346,
"step": 1400
},
{
"epoch": 1.1066350710900474,
"grad_norm": 14.487647084460335,
"learning_rate": 7.95896699834592e-06,
"loss": 1.6796505451202393,
"step": 1401
},
{
"epoch": 1.1074249605055293,
"grad_norm": 11.249265183893405,
"learning_rate": 7.955261235512483e-06,
"loss": 1.4529224634170532,
"step": 1402
},
{
"epoch": 1.1082148499210112,
"grad_norm": 9.645198748151126,
"learning_rate": 7.951552976064623e-06,
"loss": 1.2036597728729248,
"step": 1403
},
{
"epoch": 1.1090047393364928,
"grad_norm": 6.984305487322073,
"learning_rate": 7.947842223135089e-06,
"loss": 1.6848506927490234,
"step": 1404
},
{
"epoch": 1.1097946287519747,
"grad_norm": 10.21639035313384,
"learning_rate": 7.944128979858739e-06,
"loss": 2.247422218322754,
"step": 1405
},
{
"epoch": 1.1105845181674565,
"grad_norm": 11.108091790322238,
"learning_rate": 7.940413249372529e-06,
"loss": 1.2086236476898193,
"step": 1406
},
{
"epoch": 1.1113744075829384,
"grad_norm": 16.112732227769964,
"learning_rate": 7.936695034815527e-06,
"loss": 1.3832933902740479,
"step": 1407
},
{
"epoch": 1.1121642969984202,
"grad_norm": 10.731967743134,
"learning_rate": 7.932974339328887e-06,
"loss": 1.68961763381958,
"step": 1408
},
{
"epoch": 1.112954186413902,
"grad_norm": 9.149310423822971,
"learning_rate": 7.929251166055867e-06,
"loss": 0.9486649632453918,
"step": 1409
},
{
"epoch": 1.113744075829384,
"grad_norm": 9.704188919512742,
"learning_rate": 7.925525518141816e-06,
"loss": 1.669637680053711,
"step": 1410
},
{
"epoch": 1.1145339652448658,
"grad_norm": 12.16446463953121,
"learning_rate": 7.921797398734174e-06,
"loss": 1.8166450262069702,
"step": 1411
},
{
"epoch": 1.1153238546603474,
"grad_norm": 13.23762039499484,
"learning_rate": 7.918066810982468e-06,
"loss": 2.1429643630981445,
"step": 1412
},
{
"epoch": 1.1161137440758293,
"grad_norm": 11.441036394254365,
"learning_rate": 7.914333758038311e-06,
"loss": 1.7969956398010254,
"step": 1413
},
{
"epoch": 1.1169036334913112,
"grad_norm": 11.37805305383338,
"learning_rate": 7.910598243055396e-06,
"loss": 1.394661784172058,
"step": 1414
},
{
"epoch": 1.117693522906793,
"grad_norm": 10.617508342494792,
"learning_rate": 7.906860269189504e-06,
"loss": 2.4616918563842773,
"step": 1415
},
{
"epoch": 1.1184834123222749,
"grad_norm": 15.908117706983552,
"learning_rate": 7.903119839598482e-06,
"loss": 1.3925102949142456,
"step": 1416
},
{
"epoch": 1.1192733017377567,
"grad_norm": 6.98257561740011,
"learning_rate": 7.899376957442262e-06,
"loss": 1.6068451404571533,
"step": 1417
},
{
"epoch": 1.1200631911532386,
"grad_norm": 8.912600940978086,
"learning_rate": 7.89563162588284e-06,
"loss": 1.1992769241333008,
"step": 1418
},
{
"epoch": 1.1208530805687205,
"grad_norm": 8.211754082626802,
"learning_rate": 7.891883848084283e-06,
"loss": 1.857825517654419,
"step": 1419
},
{
"epoch": 1.1216429699842023,
"grad_norm": 9.581409453425586,
"learning_rate": 7.88813362721273e-06,
"loss": 1.6415996551513672,
"step": 1420
},
{
"epoch": 1.122432859399684,
"grad_norm": 18.329912927070012,
"learning_rate": 7.884380966436379e-06,
"loss": 1.7880502939224243,
"step": 1421
},
{
"epoch": 1.1232227488151658,
"grad_norm": 9.810032143627515,
"learning_rate": 7.880625868925487e-06,
"loss": 1.8203599452972412,
"step": 1422
},
{
"epoch": 1.1240126382306477,
"grad_norm": 9.06670473315355,
"learning_rate": 7.876868337852377e-06,
"loss": 1.596606731414795,
"step": 1423
},
{
"epoch": 1.1248025276461295,
"grad_norm": 9.942294890261461,
"learning_rate": 7.87310837639142e-06,
"loss": 1.4776908159255981,
"step": 1424
},
{
"epoch": 1.1255924170616114,
"grad_norm": 10.54945063589167,
"learning_rate": 7.869345987719047e-06,
"loss": 1.6829514503479004,
"step": 1425
},
{
"epoch": 1.1263823064770933,
"grad_norm": 11.580449343278849,
"learning_rate": 7.865581175013735e-06,
"loss": 1.1238837242126465,
"step": 1426
},
{
"epoch": 1.1271721958925751,
"grad_norm": 18.360177466539106,
"learning_rate": 7.86181394145601e-06,
"loss": 1.6330945491790771,
"step": 1427
},
{
"epoch": 1.1279620853080567,
"grad_norm": 13.657848488790902,
"learning_rate": 7.858044290228443e-06,
"loss": 1.6493346691131592,
"step": 1428
},
{
"epoch": 1.1287519747235386,
"grad_norm": 8.8436841483474,
"learning_rate": 7.854272224515646e-06,
"loss": 1.7612037658691406,
"step": 1429
},
{
"epoch": 1.1295418641390205,
"grad_norm": 15.349132583927682,
"learning_rate": 7.850497747504278e-06,
"loss": 1.4374094009399414,
"step": 1430
},
{
"epoch": 1.1303317535545023,
"grad_norm": 7.20960688510771,
"learning_rate": 7.846720862383024e-06,
"loss": 1.5491845607757568,
"step": 1431
},
{
"epoch": 1.1311216429699842,
"grad_norm": 8.9481729357505,
"learning_rate": 7.842941572342613e-06,
"loss": 1.082542896270752,
"step": 1432
},
{
"epoch": 1.131911532385466,
"grad_norm": 11.741346122561193,
"learning_rate": 7.839159880575798e-06,
"loss": 1.0406155586242676,
"step": 1433
},
{
"epoch": 1.132701421800948,
"grad_norm": 12.852699840775466,
"learning_rate": 7.835375790277367e-06,
"loss": 1.5804665088653564,
"step": 1434
},
{
"epoch": 1.1334913112164298,
"grad_norm": 11.234205111215255,
"learning_rate": 7.831589304644132e-06,
"loss": 1.6930797100067139,
"step": 1435
},
{
"epoch": 1.1342812006319116,
"grad_norm": 12.465436878993371,
"learning_rate": 7.827800426874927e-06,
"loss": 2.350858211517334,
"step": 1436
},
{
"epoch": 1.1350710900473935,
"grad_norm": 10.417673706008966,
"learning_rate": 7.824009160170611e-06,
"loss": 2.1081159114837646,
"step": 1437
},
{
"epoch": 1.1358609794628751,
"grad_norm": 6.972975321299533,
"learning_rate": 7.820215507734057e-06,
"loss": 1.550991415977478,
"step": 1438
},
{
"epoch": 1.136650868878357,
"grad_norm": 9.653383800691586,
"learning_rate": 7.816419472770156e-06,
"loss": 2.1447458267211914,
"step": 1439
},
{
"epoch": 1.1374407582938388,
"grad_norm": 10.486086790783105,
"learning_rate": 7.812621058485807e-06,
"loss": 1.8012946844100952,
"step": 1440
},
{
"epoch": 1.1382306477093207,
"grad_norm": 18.588695506303544,
"learning_rate": 7.808820268089928e-06,
"loss": 0.923102617263794,
"step": 1441
},
{
"epoch": 1.1390205371248026,
"grad_norm": 11.289408037095642,
"learning_rate": 7.805017104793438e-06,
"loss": 1.2512264251708984,
"step": 1442
},
{
"epoch": 1.1398104265402844,
"grad_norm": 17.187322631022983,
"learning_rate": 7.801211571809258e-06,
"loss": 2.007840633392334,
"step": 1443
},
{
"epoch": 1.1406003159557663,
"grad_norm": 13.141510083418034,
"learning_rate": 7.79740367235232e-06,
"loss": 1.7152661085128784,
"step": 1444
},
{
"epoch": 1.141390205371248,
"grad_norm": 15.112494999687053,
"learning_rate": 7.793593409639547e-06,
"loss": 2.0199451446533203,
"step": 1445
},
{
"epoch": 1.1421800947867298,
"grad_norm": 85.2167253034368,
"learning_rate": 7.789780786889862e-06,
"loss": 2.34489369392395,
"step": 1446
},
{
"epoch": 1.1429699842022116,
"grad_norm": 9.312272447955333,
"learning_rate": 7.785965807324182e-06,
"loss": 1.8030688762664795,
"step": 1447
},
{
"epoch": 1.1437598736176935,
"grad_norm": 6.989138103509471,
"learning_rate": 7.782148474165414e-06,
"loss": 1.0236456394195557,
"step": 1448
},
{
"epoch": 1.1445497630331753,
"grad_norm": 13.66043355155644,
"learning_rate": 7.778328790638454e-06,
"loss": 1.432175636291504,
"step": 1449
},
{
"epoch": 1.1453396524486572,
"grad_norm": 11.738420362584556,
"learning_rate": 7.774506759970183e-06,
"loss": 1.8172495365142822,
"step": 1450
},
{
"epoch": 1.146129541864139,
"grad_norm": 12.2141920832457,
"learning_rate": 7.770682385389461e-06,
"loss": 1.540172815322876,
"step": 1451
},
{
"epoch": 1.146919431279621,
"grad_norm": 11.67045045892725,
"learning_rate": 7.76685567012714e-06,
"loss": 1.0208656787872314,
"step": 1452
},
{
"epoch": 1.1477093206951028,
"grad_norm": 19.223683414914778,
"learning_rate": 7.763026617416037e-06,
"loss": 2.1578786373138428,
"step": 1453
},
{
"epoch": 1.1484992101105844,
"grad_norm": 11.975180223168232,
"learning_rate": 7.759195230490948e-06,
"loss": 1.0847947597503662,
"step": 1454
},
{
"epoch": 1.1492890995260663,
"grad_norm": 14.479350754911671,
"learning_rate": 7.755361512588641e-06,
"loss": 0.8910523653030396,
"step": 1455
},
{
"epoch": 1.1500789889415481,
"grad_norm": 12.322055451503076,
"learning_rate": 7.751525466947858e-06,
"loss": 1.9108871221542358,
"step": 1456
},
{
"epoch": 1.15086887835703,
"grad_norm": 8.486429198821625,
"learning_rate": 7.747687096809298e-06,
"loss": 1.3047826290130615,
"step": 1457
},
{
"epoch": 1.1516587677725119,
"grad_norm": 22.216033260635363,
"learning_rate": 7.74384640541563e-06,
"loss": 1.3196589946746826,
"step": 1458
},
{
"epoch": 1.1524486571879937,
"grad_norm": 13.4404210054278,
"learning_rate": 7.740003396011485e-06,
"loss": 1.7318623065948486,
"step": 1459
},
{
"epoch": 1.1532385466034756,
"grad_norm": 10.120534079823207,
"learning_rate": 7.736158071843446e-06,
"loss": 1.0442365407943726,
"step": 1460
},
{
"epoch": 1.1540284360189574,
"grad_norm": 11.889433421917458,
"learning_rate": 7.73231043616006e-06,
"loss": 1.421532392501831,
"step": 1461
},
{
"epoch": 1.154818325434439,
"grad_norm": 8.828012658172716,
"learning_rate": 7.728460492211818e-06,
"loss": 1.5032670497894287,
"step": 1462
},
{
"epoch": 1.155608214849921,
"grad_norm": 16.251861602372543,
"learning_rate": 7.724608243251168e-06,
"loss": 1.5522558689117432,
"step": 1463
},
{
"epoch": 1.1563981042654028,
"grad_norm": 14.565593393435774,
"learning_rate": 7.720753692532503e-06,
"loss": 1.4599350690841675,
"step": 1464
},
{
"epoch": 1.1571879936808847,
"grad_norm": 8.480351738134685,
"learning_rate": 7.716896843312158e-06,
"loss": 1.7275468111038208,
"step": 1465
},
{
"epoch": 1.1579778830963665,
"grad_norm": 13.805554305973507,
"learning_rate": 7.713037698848414e-06,
"loss": 1.4946775436401367,
"step": 1466
},
{
"epoch": 1.1587677725118484,
"grad_norm": 14.030328008782464,
"learning_rate": 7.709176262401488e-06,
"loss": 1.6568756103515625,
"step": 1467
},
{
"epoch": 1.1595576619273302,
"grad_norm": 18.97968668941531,
"learning_rate": 7.705312537233534e-06,
"loss": 2.4846177101135254,
"step": 1468
},
{
"epoch": 1.160347551342812,
"grad_norm": 21.074569384938723,
"learning_rate": 7.70144652660864e-06,
"loss": 1.6268469095230103,
"step": 1469
},
{
"epoch": 1.161137440758294,
"grad_norm": 11.221094852791536,
"learning_rate": 7.697578233792824e-06,
"loss": 1.4419642686843872,
"step": 1470
},
{
"epoch": 1.1619273301737756,
"grad_norm": 12.283969197549489,
"learning_rate": 7.693707662054033e-06,
"loss": 2.8970489501953125,
"step": 1471
},
{
"epoch": 1.1627172195892574,
"grad_norm": 10.191761634757446,
"learning_rate": 7.68983481466214e-06,
"loss": 1.694975733757019,
"step": 1472
},
{
"epoch": 1.1635071090047393,
"grad_norm": 9.871959664749303,
"learning_rate": 7.685959694888935e-06,
"loss": 1.5678069591522217,
"step": 1473
},
{
"epoch": 1.1642969984202212,
"grad_norm": 13.739000495877802,
"learning_rate": 7.682082306008138e-06,
"loss": 2.2661375999450684,
"step": 1474
},
{
"epoch": 1.165086887835703,
"grad_norm": 13.029810676854682,
"learning_rate": 7.678202651295377e-06,
"loss": 1.779642105102539,
"step": 1475
},
{
"epoch": 1.1658767772511849,
"grad_norm": 11.679296000479976,
"learning_rate": 7.674320734028193e-06,
"loss": 0.8130778074264526,
"step": 1476
},
{
"epoch": 1.1666666666666667,
"grad_norm": 14.423216823781184,
"learning_rate": 7.67043655748605e-06,
"loss": 1.6942588090896606,
"step": 1477
},
{
"epoch": 1.1674565560821484,
"grad_norm": 11.441884700380314,
"learning_rate": 7.66655012495031e-06,
"loss": 1.179840087890625,
"step": 1478
},
{
"epoch": 1.1682464454976302,
"grad_norm": 16.02771109914067,
"learning_rate": 7.662661439704241e-06,
"loss": 2.1548268795013428,
"step": 1479
},
{
"epoch": 1.169036334913112,
"grad_norm": 14.394873535724342,
"learning_rate": 7.658770505033022e-06,
"loss": 1.1767363548278809,
"step": 1480
},
{
"epoch": 1.169826224328594,
"grad_norm": 16.396153231459706,
"learning_rate": 7.654877324223723e-06,
"loss": 1.0473990440368652,
"step": 1481
},
{
"epoch": 1.1706161137440758,
"grad_norm": 36.207662154699634,
"learning_rate": 7.65098190056532e-06,
"loss": 1.4607884883880615,
"step": 1482
},
{
"epoch": 1.1714060031595577,
"grad_norm": 17.99136981853802,
"learning_rate": 7.647084237348676e-06,
"loss": 1.0910999774932861,
"step": 1483
},
{
"epoch": 1.1721958925750395,
"grad_norm": 17.53587070270741,
"learning_rate": 7.643184337866551e-06,
"loss": 1.9439430236816406,
"step": 1484
},
{
"epoch": 1.1729857819905214,
"grad_norm": 21.036905024705902,
"learning_rate": 7.639282205413595e-06,
"loss": 1.2512264251708984,
"step": 1485
},
{
"epoch": 1.1737756714060033,
"grad_norm": 14.69734973276274,
"learning_rate": 7.63537784328634e-06,
"loss": 1.736374855041504,
"step": 1486
},
{
"epoch": 1.174565560821485,
"grad_norm": 10.589187353935765,
"learning_rate": 7.631471254783205e-06,
"loss": 2.0629525184631348,
"step": 1487
},
{
"epoch": 1.1753554502369667,
"grad_norm": 12.298150667488217,
"learning_rate": 7.627562443204488e-06,
"loss": 1.75897216796875,
"step": 1488
},
{
"epoch": 1.1761453396524486,
"grad_norm": 11.144800996692458,
"learning_rate": 7.623651411852369e-06,
"loss": 1.8783044815063477,
"step": 1489
},
{
"epoch": 1.1769352290679305,
"grad_norm": 18.274971673090146,
"learning_rate": 7.619738164030894e-06,
"loss": 1.2765934467315674,
"step": 1490
},
{
"epoch": 1.1777251184834123,
"grad_norm": 13.609333078610245,
"learning_rate": 7.615822703045993e-06,
"loss": 0.9716304540634155,
"step": 1491
},
{
"epoch": 1.1785150078988942,
"grad_norm": 9.06953052854543,
"learning_rate": 7.611905032205459e-06,
"loss": 1.2346336841583252,
"step": 1492
},
{
"epoch": 1.179304897314376,
"grad_norm": 7.894635099442645,
"learning_rate": 7.607985154818951e-06,
"loss": 1.3905161619186401,
"step": 1493
},
{
"epoch": 1.180094786729858,
"grad_norm": 11.930990729479253,
"learning_rate": 7.6040630741979985e-06,
"loss": 1.5520501136779785,
"step": 1494
},
{
"epoch": 1.1808846761453395,
"grad_norm": 13.518366545034592,
"learning_rate": 7.6001387936559845e-06,
"loss": 1.478689193725586,
"step": 1495
},
{
"epoch": 1.1816745655608214,
"grad_norm": 13.55754756002125,
"learning_rate": 7.596212316508155e-06,
"loss": 1.195493221282959,
"step": 1496
},
{
"epoch": 1.1824644549763033,
"grad_norm": 9.14918248953396,
"learning_rate": 7.592283646071612e-06,
"loss": 1.777099609375,
"step": 1497
},
{
"epoch": 1.1832543443917851,
"grad_norm": 12.578589306666348,
"learning_rate": 7.588352785665307e-06,
"loss": 1.780318260192871,
"step": 1498
},
{
"epoch": 1.184044233807267,
"grad_norm": 16.331287548330316,
"learning_rate": 7.5844197386100475e-06,
"loss": 0.6511062383651733,
"step": 1499
},
{
"epoch": 1.1848341232227488,
"grad_norm": 16.33379318813503,
"learning_rate": 7.58048450822848e-06,
"loss": 1.3710471391677856,
"step": 1500
},
{
"epoch": 1.1856240126382307,
"grad_norm": 7.772998081101554,
"learning_rate": 7.576547097845105e-06,
"loss": 1.412032127380371,
"step": 1501
},
{
"epoch": 1.1864139020537126,
"grad_norm": 8.398419142031567,
"learning_rate": 7.572607510786256e-06,
"loss": 1.2059210538864136,
"step": 1502
},
{
"epoch": 1.1872037914691944,
"grad_norm": 9.901903204130111,
"learning_rate": 7.568665750380111e-06,
"loss": 1.7022660970687866,
"step": 1503
},
{
"epoch": 1.187993680884676,
"grad_norm": 19.304850784714255,
"learning_rate": 7.56472181995668e-06,
"loss": 0.9760904908180237,
"step": 1504
},
{
"epoch": 1.188783570300158,
"grad_norm": 7.87702786044823,
"learning_rate": 7.560775722847811e-06,
"loss": 1.2958579063415527,
"step": 1505
},
{
"epoch": 1.1895734597156398,
"grad_norm": 8.790058222382367,
"learning_rate": 7.556827462387179e-06,
"loss": 1.5401803255081177,
"step": 1506
},
{
"epoch": 1.1903633491311216,
"grad_norm": 10.161647102455948,
"learning_rate": 7.5528770419102845e-06,
"loss": 1.2612812519073486,
"step": 1507
},
{
"epoch": 1.1911532385466035,
"grad_norm": 8.100962154198587,
"learning_rate": 7.548924464754462e-06,
"loss": 1.4908281564712524,
"step": 1508
},
{
"epoch": 1.1919431279620853,
"grad_norm": 7.739278263317995,
"learning_rate": 7.544969734258856e-06,
"loss": 1.3893706798553467,
"step": 1509
},
{
"epoch": 1.1927330173775672,
"grad_norm": 9.236436622854567,
"learning_rate": 7.54101285376444e-06,
"loss": 1.428430438041687,
"step": 1510
},
{
"epoch": 1.193522906793049,
"grad_norm": 9.21444761020881,
"learning_rate": 7.537053826613995e-06,
"loss": 1.2131574153900146,
"step": 1511
},
{
"epoch": 1.1943127962085307,
"grad_norm": 12.77391408135766,
"learning_rate": 7.533092656152123e-06,
"loss": 1.4747178554534912,
"step": 1512
},
{
"epoch": 1.1951026856240126,
"grad_norm": 10.144190882358012,
"learning_rate": 7.529129345725234e-06,
"loss": 1.2598638534545898,
"step": 1513
},
{
"epoch": 1.1958925750394944,
"grad_norm": 11.632673780290576,
"learning_rate": 7.525163898681547e-06,
"loss": 2.219787120819092,
"step": 1514
},
{
"epoch": 1.1966824644549763,
"grad_norm": 11.100505677689917,
"learning_rate": 7.521196318371082e-06,
"loss": 1.492173433303833,
"step": 1515
},
{
"epoch": 1.1974723538704581,
"grad_norm": 13.348061220126512,
"learning_rate": 7.517226608145665e-06,
"loss": 1.3286212682724,
"step": 1516
},
{
"epoch": 1.19826224328594,
"grad_norm": 10.85425623441688,
"learning_rate": 7.51325477135892e-06,
"loss": 1.711487054824829,
"step": 1517
},
{
"epoch": 1.1990521327014219,
"grad_norm": 10.554910275401799,
"learning_rate": 7.509280811366268e-06,
"loss": 1.6877734661102295,
"step": 1518
},
{
"epoch": 1.1998420221169037,
"grad_norm": 9.907008317230424,
"learning_rate": 7.505304731524925e-06,
"loss": 1.5272674560546875,
"step": 1519
},
{
"epoch": 1.2006319115323856,
"grad_norm": 6.706994018715486,
"learning_rate": 7.501326535193893e-06,
"loss": 1.4311678409576416,
"step": 1520
},
{
"epoch": 1.2014218009478672,
"grad_norm": 10.997085955938047,
"learning_rate": 7.497346225733968e-06,
"loss": 2.118659734725952,
"step": 1521
},
{
"epoch": 1.202211690363349,
"grad_norm": 11.689892035649317,
"learning_rate": 7.493363806507727e-06,
"loss": 1.3729009628295898,
"step": 1522
},
{
"epoch": 1.203001579778831,
"grad_norm": 9.397482204646643,
"learning_rate": 7.489379280879532e-06,
"loss": 1.257511854171753,
"step": 1523
},
{
"epoch": 1.2037914691943128,
"grad_norm": 11.597065678347706,
"learning_rate": 7.485392652215524e-06,
"loss": 2.0744144916534424,
"step": 1524
},
{
"epoch": 1.2045813586097947,
"grad_norm": 9.18002114901742,
"learning_rate": 7.481403923883618e-06,
"loss": 2.4062094688415527,
"step": 1525
},
{
"epoch": 1.2053712480252765,
"grad_norm": 11.51200612045866,
"learning_rate": 7.477413099253507e-06,
"loss": 1.3634181022644043,
"step": 1526
},
{
"epoch": 1.2061611374407584,
"grad_norm": 9.886739815987795,
"learning_rate": 7.4734201816966504e-06,
"loss": 1.8432646989822388,
"step": 1527
},
{
"epoch": 1.20695102685624,
"grad_norm": 7.821912561074209,
"learning_rate": 7.46942517458628e-06,
"loss": 1.6155750751495361,
"step": 1528
},
{
"epoch": 1.2077409162717219,
"grad_norm": 208.73695380738994,
"learning_rate": 7.46542808129739e-06,
"loss": 2.2105181217193604,
"step": 1529
},
{
"epoch": 1.2085308056872037,
"grad_norm": 9.442612830841552,
"learning_rate": 7.461428905206738e-06,
"loss": 1.035463571548462,
"step": 1530
},
{
"epoch": 1.2093206951026856,
"grad_norm": 14.92511763923537,
"learning_rate": 7.457427649692842e-06,
"loss": 1.388323187828064,
"step": 1531
},
{
"epoch": 1.2101105845181674,
"grad_norm": 10.04534923313997,
"learning_rate": 7.453424318135976e-06,
"loss": 1.5381313562393188,
"step": 1532
},
{
"epoch": 1.2109004739336493,
"grad_norm": 10.045984329726831,
"learning_rate": 7.4494189139181664e-06,
"loss": 2.077467441558838,
"step": 1533
},
{
"epoch": 1.2116903633491312,
"grad_norm": 11.346686651643333,
"learning_rate": 7.445411440423191e-06,
"loss": 1.4641904830932617,
"step": 1534
},
{
"epoch": 1.212480252764613,
"grad_norm": 10.573921142627707,
"learning_rate": 7.4414019010365804e-06,
"loss": 1.4743311405181885,
"step": 1535
},
{
"epoch": 1.2132701421800949,
"grad_norm": 13.824451819029829,
"learning_rate": 7.437390299145602e-06,
"loss": 1.4657893180847168,
"step": 1536
},
{
"epoch": 1.2140600315955767,
"grad_norm": 7.208254750492127,
"learning_rate": 7.433376638139273e-06,
"loss": 1.7224773168563843,
"step": 1537
},
{
"epoch": 1.2148499210110584,
"grad_norm": 14.6258689437837,
"learning_rate": 7.429360921408348e-06,
"loss": 1.240898847579956,
"step": 1538
},
{
"epoch": 1.2156398104265402,
"grad_norm": 9.289403617691221,
"learning_rate": 7.4253431523453156e-06,
"loss": 1.5302990674972534,
"step": 1539
},
{
"epoch": 1.216429699842022,
"grad_norm": 15.992588174167665,
"learning_rate": 7.421323334344403e-06,
"loss": 1.2926700115203857,
"step": 1540
},
{
"epoch": 1.217219589257504,
"grad_norm": 13.29806452130727,
"learning_rate": 7.417301470801563e-06,
"loss": 1.858204960823059,
"step": 1541
},
{
"epoch": 1.2180094786729858,
"grad_norm": 10.981694927001664,
"learning_rate": 7.41327756511448e-06,
"loss": 1.6751494407653809,
"step": 1542
},
{
"epoch": 1.2187993680884677,
"grad_norm": 12.919094806936021,
"learning_rate": 7.409251620682565e-06,
"loss": 1.1227457523345947,
"step": 1543
},
{
"epoch": 1.2195892575039495,
"grad_norm": 12.822642145949752,
"learning_rate": 7.405223640906945e-06,
"loss": 1.8671314716339111,
"step": 1544
},
{
"epoch": 1.2203791469194312,
"grad_norm": 13.207524616649813,
"learning_rate": 7.401193629190475e-06,
"loss": 2.4761242866516113,
"step": 1545
},
{
"epoch": 1.221169036334913,
"grad_norm": 10.043425267983269,
"learning_rate": 7.397161588937718e-06,
"loss": 1.4061156511306763,
"step": 1546
},
{
"epoch": 1.2219589257503949,
"grad_norm": 10.369465986167071,
"learning_rate": 7.393127523554959e-06,
"loss": 1.6333423852920532,
"step": 1547
},
{
"epoch": 1.2227488151658767,
"grad_norm": 28.920248006785016,
"learning_rate": 7.389091436450185e-06,
"loss": 2.198690414428711,
"step": 1548
},
{
"epoch": 1.2235387045813586,
"grad_norm": 10.220829768400877,
"learning_rate": 7.3850533310330995e-06,
"loss": 1.6852078437805176,
"step": 1549
},
{
"epoch": 1.2243285939968405,
"grad_norm": 10.219543177913568,
"learning_rate": 7.381013210715106e-06,
"loss": 1.4084906578063965,
"step": 1550
},
{
"epoch": 1.2251184834123223,
"grad_norm": 11.198352523535156,
"learning_rate": 7.376971078909312e-06,
"loss": 1.3699990510940552,
"step": 1551
},
{
"epoch": 1.2259083728278042,
"grad_norm": 11.335053711356307,
"learning_rate": 7.3729269390305226e-06,
"loss": 1.7044817209243774,
"step": 1552
},
{
"epoch": 1.226698262243286,
"grad_norm": 12.010639946062005,
"learning_rate": 7.36888079449524e-06,
"loss": 1.949375033378601,
"step": 1553
},
{
"epoch": 1.2274881516587677,
"grad_norm": 8.245851384242593,
"learning_rate": 7.364832648721664e-06,
"loss": 1.9039802551269531,
"step": 1554
},
{
"epoch": 1.2282780410742495,
"grad_norm": 7.08878950015621,
"learning_rate": 7.360782505129675e-06,
"loss": 1.9304604530334473,
"step": 1555
},
{
"epoch": 1.2290679304897314,
"grad_norm": 15.151405491025901,
"learning_rate": 7.356730367140856e-06,
"loss": 1.5305366516113281,
"step": 1556
},
{
"epoch": 1.2298578199052133,
"grad_norm": 25.91795312714592,
"learning_rate": 7.35267623817846e-06,
"loss": 1.2220271825790405,
"step": 1557
},
{
"epoch": 1.2306477093206951,
"grad_norm": 15.027153220859233,
"learning_rate": 7.348620121667431e-06,
"loss": 1.7458312511444092,
"step": 1558
},
{
"epoch": 1.231437598736177,
"grad_norm": 11.869844226224354,
"learning_rate": 7.34456202103439e-06,
"loss": 1.1599485874176025,
"step": 1559
},
{
"epoch": 1.2322274881516588,
"grad_norm": 9.271449236873991,
"learning_rate": 7.3405019397076314e-06,
"loss": 1.1800360679626465,
"step": 1560
},
{
"epoch": 1.2330173775671407,
"grad_norm": 11.38399689838541,
"learning_rate": 7.336439881117128e-06,
"loss": 1.3739066123962402,
"step": 1561
},
{
"epoch": 1.2338072669826223,
"grad_norm": 10.444548453126936,
"learning_rate": 7.332375848694519e-06,
"loss": 1.8069045543670654,
"step": 1562
},
{
"epoch": 1.2345971563981042,
"grad_norm": 17.266827050977902,
"learning_rate": 7.328309845873112e-06,
"loss": 2.19236159324646,
"step": 1563
},
{
"epoch": 1.235387045813586,
"grad_norm": 13.591954894799105,
"learning_rate": 7.3242418760878805e-06,
"loss": 2.5699267387390137,
"step": 1564
},
{
"epoch": 1.236176935229068,
"grad_norm": 11.237452612969626,
"learning_rate": 7.320171942775458e-06,
"loss": 1.278207778930664,
"step": 1565
},
{
"epoch": 1.2369668246445498,
"grad_norm": 17.089988693890064,
"learning_rate": 7.316100049374139e-06,
"loss": 1.3304542303085327,
"step": 1566
},
{
"epoch": 1.2377567140600316,
"grad_norm": 11.92363646482957,
"learning_rate": 7.312026199323869e-06,
"loss": 1.8582803010940552,
"step": 1567
},
{
"epoch": 1.2385466034755135,
"grad_norm": 12.460514274935408,
"learning_rate": 7.307950396066257e-06,
"loss": 1.5322096347808838,
"step": 1568
},
{
"epoch": 1.2393364928909953,
"grad_norm": 12.477145420644185,
"learning_rate": 7.303872643044545e-06,
"loss": 1.9782072305679321,
"step": 1569
},
{
"epoch": 1.2401263823064772,
"grad_norm": 12.74235377003251,
"learning_rate": 7.299792943703642e-06,
"loss": 1.2845888137817383,
"step": 1570
},
{
"epoch": 1.2409162717219588,
"grad_norm": 10.673508111722988,
"learning_rate": 7.295711301490084e-06,
"loss": 1.6466004848480225,
"step": 1571
},
{
"epoch": 1.2417061611374407,
"grad_norm": 9.652022285685392,
"learning_rate": 7.291627719852059e-06,
"loss": 1.3372915983200073,
"step": 1572
},
{
"epoch": 1.2424960505529226,
"grad_norm": 9.596732477385675,
"learning_rate": 7.287542202239392e-06,
"loss": 1.8011666536331177,
"step": 1573
},
{
"epoch": 1.2432859399684044,
"grad_norm": 7.083810704363881,
"learning_rate": 7.283454752103538e-06,
"loss": 1.1540056467056274,
"step": 1574
},
{
"epoch": 1.2440758293838863,
"grad_norm": 9.393392719670336,
"learning_rate": 7.27936537289759e-06,
"loss": 1.677268385887146,
"step": 1575
},
{
"epoch": 1.2448657187993681,
"grad_norm": 17.733645365732354,
"learning_rate": 7.27527406807627e-06,
"loss": 0.8276841640472412,
"step": 1576
},
{
"epoch": 1.24565560821485,
"grad_norm": 13.067729880728594,
"learning_rate": 7.271180841095924e-06,
"loss": 1.3127690553665161,
"step": 1577
},
{
"epoch": 1.2464454976303316,
"grad_norm": 13.042413994956872,
"learning_rate": 7.267085695414525e-06,
"loss": 1.4197359085083008,
"step": 1578
},
{
"epoch": 1.2472353870458135,
"grad_norm": 10.928197034241155,
"learning_rate": 7.262988634491664e-06,
"loss": 1.261674165725708,
"step": 1579
},
{
"epoch": 1.2480252764612954,
"grad_norm": 10.758652573808499,
"learning_rate": 7.258889661788554e-06,
"loss": 1.5431902408599854,
"step": 1580
},
{
"epoch": 1.2488151658767772,
"grad_norm": 10.6083190072106,
"learning_rate": 7.254788780768018e-06,
"loss": 1.829581379890442,
"step": 1581
},
{
"epoch": 1.249605055292259,
"grad_norm": 13.989251634457949,
"learning_rate": 7.250685994894496e-06,
"loss": 1.342024564743042,
"step": 1582
},
{
"epoch": 1.250394944707741,
"grad_norm": 10.951019623163544,
"learning_rate": 7.246581307634033e-06,
"loss": 1.6949963569641113,
"step": 1583
},
{
"epoch": 1.2511848341232228,
"grad_norm": 14.242065090086145,
"learning_rate": 7.242474722454286e-06,
"loss": 1.4241949319839478,
"step": 1584
},
{
"epoch": 1.2519747235387046,
"grad_norm": 12.522283827649254,
"learning_rate": 7.238366242824506e-06,
"loss": 2.3611106872558594,
"step": 1585
},
{
"epoch": 1.2527646129541865,
"grad_norm": 10.607084561048083,
"learning_rate": 7.234255872215554e-06,
"loss": 1.5256626605987549,
"step": 1586
},
{
"epoch": 1.2535545023696684,
"grad_norm": 7.96307207456326,
"learning_rate": 7.230143614099885e-06,
"loss": 1.2228140830993652,
"step": 1587
},
{
"epoch": 1.25434439178515,
"grad_norm": 9.008816824433687,
"learning_rate": 7.226029471951545e-06,
"loss": 1.4289093017578125,
"step": 1588
},
{
"epoch": 1.2551342812006319,
"grad_norm": 20.920359812194434,
"learning_rate": 7.221913449246178e-06,
"loss": 1.3880183696746826,
"step": 1589
},
{
"epoch": 1.2559241706161137,
"grad_norm": 15.238445561586087,
"learning_rate": 7.217795549461012e-06,
"loss": 1.5309922695159912,
"step": 1590
},
{
"epoch": 1.2567140600315956,
"grad_norm": 16.4951650860319,
"learning_rate": 7.213675776074862e-06,
"loss": 1.835852026939392,
"step": 1591
},
{
"epoch": 1.2575039494470774,
"grad_norm": 8.574885331151954,
"learning_rate": 7.209554132568128e-06,
"loss": 0.49211519956588745,
"step": 1592
},
{
"epoch": 1.2582938388625593,
"grad_norm": 19.73342294585578,
"learning_rate": 7.205430622422786e-06,
"loss": 1.538137435913086,
"step": 1593
},
{
"epoch": 1.259083728278041,
"grad_norm": 18.023327663051525,
"learning_rate": 7.201305249122392e-06,
"loss": 1.457167148590088,
"step": 1594
},
{
"epoch": 1.2598736176935228,
"grad_norm": 12.076397086371236,
"learning_rate": 7.197178016152076e-06,
"loss": 1.1796395778656006,
"step": 1595
},
{
"epoch": 1.2606635071090047,
"grad_norm": 10.685605988239777,
"learning_rate": 7.193048926998535e-06,
"loss": 1.7177990674972534,
"step": 1596
},
{
"epoch": 1.2614533965244865,
"grad_norm": 11.711401184144716,
"learning_rate": 7.188917985150038e-06,
"loss": 1.2435717582702637,
"step": 1597
},
{
"epoch": 1.2622432859399684,
"grad_norm": 11.385740810589741,
"learning_rate": 7.184785194096421e-06,
"loss": 1.6954989433288574,
"step": 1598
},
{
"epoch": 1.2630331753554502,
"grad_norm": 11.48602327882758,
"learning_rate": 7.180650557329076e-06,
"loss": 1.3193989992141724,
"step": 1599
},
{
"epoch": 1.263823064770932,
"grad_norm": 12.906600512802692,
"learning_rate": 7.1765140783409615e-06,
"loss": 1.6648807525634766,
"step": 1600
},
{
"epoch": 1.264612954186414,
"grad_norm": 10.918513023394707,
"learning_rate": 7.1723757606265845e-06,
"loss": 1.1041438579559326,
"step": 1601
},
{
"epoch": 1.2654028436018958,
"grad_norm": 10.843029403978079,
"learning_rate": 7.16823560768201e-06,
"loss": 1.7871309518814087,
"step": 1602
},
{
"epoch": 1.2661927330173777,
"grad_norm": 19.286859146429332,
"learning_rate": 7.164093623004858e-06,
"loss": 2.269460439682007,
"step": 1603
},
{
"epoch": 1.2669826224328595,
"grad_norm": 15.634640920458306,
"learning_rate": 7.159949810094283e-06,
"loss": 1.2490503787994385,
"step": 1604
},
{
"epoch": 1.2677725118483412,
"grad_norm": 9.491676824429073,
"learning_rate": 7.155804172450999e-06,
"loss": 1.3973164558410645,
"step": 1605
},
{
"epoch": 1.268562401263823,
"grad_norm": 8.966724738021117,
"learning_rate": 7.151656713577248e-06,
"loss": 1.924464225769043,
"step": 1606
},
{
"epoch": 1.2693522906793049,
"grad_norm": 10.340849066157938,
"learning_rate": 7.147507436976823e-06,
"loss": 0.9530832171440125,
"step": 1607
},
{
"epoch": 1.2701421800947867,
"grad_norm": 8.707473817662407,
"learning_rate": 7.143356346155042e-06,
"loss": 1.555940866470337,
"step": 1608
},
{
"epoch": 1.2709320695102686,
"grad_norm": 10.219855234886426,
"learning_rate": 7.139203444618762e-06,
"loss": 1.6086714267730713,
"step": 1609
},
{
"epoch": 1.2717219589257505,
"grad_norm": 8.944536325339534,
"learning_rate": 7.135048735876368e-06,
"loss": 1.2452785968780518,
"step": 1610
},
{
"epoch": 1.272511848341232,
"grad_norm": 12.328677473799477,
"learning_rate": 7.130892223437771e-06,
"loss": 2.04856014251709,
"step": 1611
},
{
"epoch": 1.273301737756714,
"grad_norm": 12.463645792972384,
"learning_rate": 7.126733910814406e-06,
"loss": 1.6133408546447754,
"step": 1612
},
{
"epoch": 1.2740916271721958,
"grad_norm": 19.208477820593387,
"learning_rate": 7.122573801519229e-06,
"loss": 1.7257981300354004,
"step": 1613
},
{
"epoch": 1.2748815165876777,
"grad_norm": 10.648285597019955,
"learning_rate": 7.118411899066717e-06,
"loss": 1.6325335502624512,
"step": 1614
},
{
"epoch": 1.2756714060031595,
"grad_norm": 8.899110923088042,
"learning_rate": 7.114248206972856e-06,
"loss": 2.037642478942871,
"step": 1615
},
{
"epoch": 1.2764612954186414,
"grad_norm": 11.20674724929018,
"learning_rate": 7.110082728755147e-06,
"loss": 1.7042773962020874,
"step": 1616
},
{
"epoch": 1.2772511848341233,
"grad_norm": 18.487835882110616,
"learning_rate": 7.105915467932601e-06,
"loss": 1.3538520336151123,
"step": 1617
},
{
"epoch": 1.2780410742496051,
"grad_norm": 7.198026203991742,
"learning_rate": 7.101746428025732e-06,
"loss": 1.396047592163086,
"step": 1618
},
{
"epoch": 1.278830963665087,
"grad_norm": 13.95821454839482,
"learning_rate": 7.097575612556561e-06,
"loss": 1.2027523517608643,
"step": 1619
},
{
"epoch": 1.2796208530805688,
"grad_norm": 11.242118092533596,
"learning_rate": 7.0934030250486044e-06,
"loss": 2.4239554405212402,
"step": 1620
},
{
"epoch": 1.2804107424960507,
"grad_norm": 11.487912451476449,
"learning_rate": 7.089228669026878e-06,
"loss": 1.6502265930175781,
"step": 1621
},
{
"epoch": 1.2812006319115323,
"grad_norm": 13.921903791621322,
"learning_rate": 7.085052548017893e-06,
"loss": 2.114908456802368,
"step": 1622
},
{
"epoch": 1.2819905213270142,
"grad_norm": 7.371765331620391,
"learning_rate": 7.08087466554965e-06,
"loss": 1.7077139616012573,
"step": 1623
},
{
"epoch": 1.282780410742496,
"grad_norm": 16.302760837877532,
"learning_rate": 7.076695025151636e-06,
"loss": 1.2382946014404297,
"step": 1624
},
{
"epoch": 1.283570300157978,
"grad_norm": 13.124566165615319,
"learning_rate": 7.072513630354827e-06,
"loss": 2.201374053955078,
"step": 1625
},
{
"epoch": 1.2843601895734598,
"grad_norm": 7.528109529798553,
"learning_rate": 7.068330484691679e-06,
"loss": 1.3300297260284424,
"step": 1626
},
{
"epoch": 1.2851500789889416,
"grad_norm": 12.041483320882802,
"learning_rate": 7.0641455916961244e-06,
"loss": 0.9577473402023315,
"step": 1627
},
{
"epoch": 1.2859399684044233,
"grad_norm": 8.602253519605528,
"learning_rate": 7.0599589549035785e-06,
"loss": 1.0266809463500977,
"step": 1628
},
{
"epoch": 1.2867298578199051,
"grad_norm": 11.151691436114625,
"learning_rate": 7.055770577850925e-06,
"loss": 2.103332757949829,
"step": 1629
},
{
"epoch": 1.287519747235387,
"grad_norm": 12.866943913296561,
"learning_rate": 7.051580464076515e-06,
"loss": 1.1621947288513184,
"step": 1630
},
{
"epoch": 1.2883096366508688,
"grad_norm": 13.134566302958063,
"learning_rate": 7.047388617120175e-06,
"loss": 2.0336053371429443,
"step": 1631
},
{
"epoch": 1.2890995260663507,
"grad_norm": 18.298472533368262,
"learning_rate": 7.043195040523187e-06,
"loss": 1.6731476783752441,
"step": 1632
},
{
"epoch": 1.2898894154818326,
"grad_norm": 11.064624121392312,
"learning_rate": 7.0389997378283034e-06,
"loss": 1.4558773040771484,
"step": 1633
},
{
"epoch": 1.2906793048973144,
"grad_norm": 12.035862755722269,
"learning_rate": 7.034802712579725e-06,
"loss": 1.4987773895263672,
"step": 1634
},
{
"epoch": 1.2914691943127963,
"grad_norm": 11.000347165868366,
"learning_rate": 7.030603968323115e-06,
"loss": 1.652524471282959,
"step": 1635
},
{
"epoch": 1.2922590837282781,
"grad_norm": 11.920424614729273,
"learning_rate": 7.026403508605586e-06,
"loss": 1.199878215789795,
"step": 1636
},
{
"epoch": 1.29304897314376,
"grad_norm": 12.250643358349953,
"learning_rate": 7.022201336975701e-06,
"loss": 1.563902735710144,
"step": 1637
},
{
"epoch": 1.2938388625592416,
"grad_norm": 10.264821730924035,
"learning_rate": 7.017997456983465e-06,
"loss": 1.4691765308380127,
"step": 1638
},
{
"epoch": 1.2946287519747235,
"grad_norm": 13.243547862779414,
"learning_rate": 7.013791872180333e-06,
"loss": 2.1046786308288574,
"step": 1639
},
{
"epoch": 1.2954186413902053,
"grad_norm": 7.356657896392653,
"learning_rate": 7.009584586119198e-06,
"loss": 1.6833205223083496,
"step": 1640
},
{
"epoch": 1.2962085308056872,
"grad_norm": 7.547893143985833,
"learning_rate": 7.005375602354385e-06,
"loss": 1.406240463256836,
"step": 1641
},
{
"epoch": 1.296998420221169,
"grad_norm": 10.735447099759265,
"learning_rate": 7.00116492444166e-06,
"loss": 1.8073036670684814,
"step": 1642
},
{
"epoch": 1.297788309636651,
"grad_norm": 10.170543223857521,
"learning_rate": 6.996952555938217e-06,
"loss": 1.3641012907028198,
"step": 1643
},
{
"epoch": 1.2985781990521326,
"grad_norm": 6.261340458786887,
"learning_rate": 6.992738500402679e-06,
"loss": 1.5656462907791138,
"step": 1644
},
{
"epoch": 1.2993680884676144,
"grad_norm": 12.61218163311875,
"learning_rate": 6.988522761395093e-06,
"loss": 1.4655276536941528,
"step": 1645
},
{
"epoch": 1.3001579778830963,
"grad_norm": 9.829462165844339,
"learning_rate": 6.984305342476931e-06,
"loss": 1.841139554977417,
"step": 1646
},
{
"epoch": 1.3009478672985781,
"grad_norm": 10.428718423453438,
"learning_rate": 6.980086247211082e-06,
"loss": 1.9564460515975952,
"step": 1647
},
{
"epoch": 1.30173775671406,
"grad_norm": 7.749615781587074,
"learning_rate": 6.975865479161849e-06,
"loss": 1.387686848640442,
"step": 1648
},
{
"epoch": 1.3025276461295419,
"grad_norm": 9.844486088428916,
"learning_rate": 6.971643041894953e-06,
"loss": 0.7119489312171936,
"step": 1649
},
{
"epoch": 1.3033175355450237,
"grad_norm": 8.565082994108224,
"learning_rate": 6.967418938977524e-06,
"loss": 1.407379150390625,
"step": 1650
},
{
"epoch": 1.3041074249605056,
"grad_norm": 9.682368107994689,
"learning_rate": 6.963193173978095e-06,
"loss": 1.277189016342163,
"step": 1651
},
{
"epoch": 1.3048973143759874,
"grad_norm": 13.513336697960028,
"learning_rate": 6.9589657504666095e-06,
"loss": 1.2872400283813477,
"step": 1652
},
{
"epoch": 1.3056872037914693,
"grad_norm": 20.735938043035457,
"learning_rate": 6.954736672014406e-06,
"loss": 2.1411285400390625,
"step": 1653
},
{
"epoch": 1.3064770932069512,
"grad_norm": 13.154183276627684,
"learning_rate": 6.950505942194226e-06,
"loss": 1.1476898193359375,
"step": 1654
},
{
"epoch": 1.3072669826224328,
"grad_norm": 22.670571761450503,
"learning_rate": 6.946273564580202e-06,
"loss": 1.3200886249542236,
"step": 1655
},
{
"epoch": 1.3080568720379147,
"grad_norm": 13.376955794870272,
"learning_rate": 6.942039542747863e-06,
"loss": 1.7086883783340454,
"step": 1656
},
{
"epoch": 1.3088467614533965,
"grad_norm": 6.135115256463472,
"learning_rate": 6.937803880274122e-06,
"loss": 1.4040107727050781,
"step": 1657
},
{
"epoch": 1.3096366508688784,
"grad_norm": 10.847847813759419,
"learning_rate": 6.933566580737282e-06,
"loss": 1.342395544052124,
"step": 1658
},
{
"epoch": 1.3104265402843602,
"grad_norm": 9.846488420083572,
"learning_rate": 6.929327647717028e-06,
"loss": 1.7640855312347412,
"step": 1659
},
{
"epoch": 1.311216429699842,
"grad_norm": 17.579714964873293,
"learning_rate": 6.925087084794422e-06,
"loss": 1.837497353553772,
"step": 1660
},
{
"epoch": 1.3120063191153237,
"grad_norm": 13.308781650389735,
"learning_rate": 6.92084489555191e-06,
"loss": 1.5832990407943726,
"step": 1661
},
{
"epoch": 1.3127962085308056,
"grad_norm": 15.704664883204273,
"learning_rate": 6.916601083573302e-06,
"loss": 2.004098653793335,
"step": 1662
},
{
"epoch": 1.3135860979462874,
"grad_norm": 8.685136767072184,
"learning_rate": 6.912355652443791e-06,
"loss": 1.037060260772705,
"step": 1663
},
{
"epoch": 1.3143759873617693,
"grad_norm": 10.120809843785937,
"learning_rate": 6.908108605749925e-06,
"loss": 1.7781083583831787,
"step": 1664
},
{
"epoch": 1.3151658767772512,
"grad_norm": 8.220963621405001,
"learning_rate": 6.903859947079625e-06,
"loss": 1.7696709632873535,
"step": 1665
},
{
"epoch": 1.315955766192733,
"grad_norm": 10.606659026468149,
"learning_rate": 6.899609680022175e-06,
"loss": 1.7073678970336914,
"step": 1666
},
{
"epoch": 1.3167456556082149,
"grad_norm": 10.40542278789583,
"learning_rate": 6.895357808168209e-06,
"loss": 1.32874596118927,
"step": 1667
},
{
"epoch": 1.3175355450236967,
"grad_norm": 14.900627896368178,
"learning_rate": 6.8911043351097265e-06,
"loss": 1.4731537103652954,
"step": 1668
},
{
"epoch": 1.3183254344391786,
"grad_norm": 14.289865839749087,
"learning_rate": 6.886849264440074e-06,
"loss": 2.3647401332855225,
"step": 1669
},
{
"epoch": 1.3191153238546605,
"grad_norm": 10.096520767718063,
"learning_rate": 6.88259259975395e-06,
"loss": 1.7499630451202393,
"step": 1670
},
{
"epoch": 1.319905213270142,
"grad_norm": 12.58275455787123,
"learning_rate": 6.878334344647399e-06,
"loss": 1.4793438911437988,
"step": 1671
},
{
"epoch": 1.320695102685624,
"grad_norm": 9.213580765946672,
"learning_rate": 6.874074502717807e-06,
"loss": 1.533569574356079,
"step": 1672
},
{
"epoch": 1.3214849921011058,
"grad_norm": 11.233518853535507,
"learning_rate": 6.869813077563905e-06,
"loss": 1.5673787593841553,
"step": 1673
},
{
"epoch": 1.3222748815165877,
"grad_norm": 11.5105493592369,
"learning_rate": 6.865550072785757e-06,
"loss": 1.8369773626327515,
"step": 1674
},
{
"epoch": 1.3230647709320695,
"grad_norm": 14.469541616554176,
"learning_rate": 6.861285491984764e-06,
"loss": 1.5419373512268066,
"step": 1675
},
{
"epoch": 1.3238546603475514,
"grad_norm": 10.063880181802128,
"learning_rate": 6.857019338763657e-06,
"loss": 1.5308325290679932,
"step": 1676
},
{
"epoch": 1.3246445497630333,
"grad_norm": 10.589111964405951,
"learning_rate": 6.852751616726501e-06,
"loss": 1.3474477529525757,
"step": 1677
},
{
"epoch": 1.325434439178515,
"grad_norm": 16.81222045065148,
"learning_rate": 6.848482329478675e-06,
"loss": 2.032487630844116,
"step": 1678
},
{
"epoch": 1.3262243285939967,
"grad_norm": 11.866053591597979,
"learning_rate": 6.844211480626892e-06,
"loss": 1.1282556056976318,
"step": 1679
},
{
"epoch": 1.3270142180094786,
"grad_norm": 9.019099813424168,
"learning_rate": 6.839939073779177e-06,
"loss": 1.6074413061141968,
"step": 1680
},
{
"epoch": 1.3278041074249605,
"grad_norm": 8.783590478924218,
"learning_rate": 6.835665112544871e-06,
"loss": 1.224888801574707,
"step": 1681
},
{
"epoch": 1.3285939968404423,
"grad_norm": 9.151154533135232,
"learning_rate": 6.831389600534637e-06,
"loss": 1.2797878980636597,
"step": 1682
},
{
"epoch": 1.3293838862559242,
"grad_norm": 15.249803779803319,
"learning_rate": 6.8271125413604344e-06,
"loss": 1.8004652261734009,
"step": 1683
},
{
"epoch": 1.330173775671406,
"grad_norm": 22.765578093916663,
"learning_rate": 6.822833938635543e-06,
"loss": 1.466485619544983,
"step": 1684
},
{
"epoch": 1.330963665086888,
"grad_norm": 9.461043264779885,
"learning_rate": 6.818553795974536e-06,
"loss": 2.0474472045898438,
"step": 1685
},
{
"epoch": 1.3317535545023698,
"grad_norm": 12.50340732157009,
"learning_rate": 6.814272116993294e-06,
"loss": 2.1818690299987793,
"step": 1686
},
{
"epoch": 1.3325434439178516,
"grad_norm": 19.829927714642757,
"learning_rate": 6.809988905308993e-06,
"loss": 2.1175107955932617,
"step": 1687
},
{
"epoch": 1.3333333333333333,
"grad_norm": 10.675607140843853,
"learning_rate": 6.805704164540105e-06,
"loss": 1.130906343460083,
"step": 1688
},
{
"epoch": 1.3341232227488151,
"grad_norm": 10.39793547659673,
"learning_rate": 6.8014178983063914e-06,
"loss": 1.098733901977539,
"step": 1689
},
{
"epoch": 1.334913112164297,
"grad_norm": 10.503557720518131,
"learning_rate": 6.7971301102289054e-06,
"loss": 1.2792344093322754,
"step": 1690
},
{
"epoch": 1.3357030015797788,
"grad_norm": 10.340351227260813,
"learning_rate": 6.792840803929987e-06,
"loss": 1.5470017194747925,
"step": 1691
},
{
"epoch": 1.3364928909952607,
"grad_norm": 9.882170112299617,
"learning_rate": 6.78854998303325e-06,
"loss": 1.1156797409057617,
"step": 1692
},
{
"epoch": 1.3372827804107426,
"grad_norm": 10.827400927376479,
"learning_rate": 6.7842576511636e-06,
"loss": 1.2364952564239502,
"step": 1693
},
{
"epoch": 1.3380726698262242,
"grad_norm": 12.60283058617565,
"learning_rate": 6.779963811947211e-06,
"loss": 1.4180421829223633,
"step": 1694
},
{
"epoch": 1.338862559241706,
"grad_norm": 12.037561798280398,
"learning_rate": 6.775668469011531e-06,
"loss": 1.811230182647705,
"step": 1695
},
{
"epoch": 1.339652448657188,
"grad_norm": 14.317321439652712,
"learning_rate": 6.771371625985282e-06,
"loss": 1.5977898836135864,
"step": 1696
},
{
"epoch": 1.3404423380726698,
"grad_norm": 12.550990602775505,
"learning_rate": 6.767073286498449e-06,
"loss": 1.4557725191116333,
"step": 1697
},
{
"epoch": 1.3412322274881516,
"grad_norm": 9.595105877395659,
"learning_rate": 6.762773454182285e-06,
"loss": 1.620849609375,
"step": 1698
},
{
"epoch": 1.3420221169036335,
"grad_norm": 26.931211154992397,
"learning_rate": 6.7584721326693024e-06,
"loss": 1.621090292930603,
"step": 1699
},
{
"epoch": 1.3428120063191153,
"grad_norm": 18.2341302921318,
"learning_rate": 6.754169325593273e-06,
"loss": 1.1882051229476929,
"step": 1700
},
{
"epoch": 1.3436018957345972,
"grad_norm": 13.079885370924936,
"learning_rate": 6.749865036589219e-06,
"loss": 1.9173457622528076,
"step": 1701
},
{
"epoch": 1.344391785150079,
"grad_norm": 10.73628142485319,
"learning_rate": 6.7455592692934205e-06,
"loss": 1.4644713401794434,
"step": 1702
},
{
"epoch": 1.345181674565561,
"grad_norm": 13.507997760455746,
"learning_rate": 6.741252027343403e-06,
"loss": 1.3514494895935059,
"step": 1703
},
{
"epoch": 1.3459715639810428,
"grad_norm": 9.434097289624326,
"learning_rate": 6.736943314377939e-06,
"loss": 1.4577107429504395,
"step": 1704
},
{
"epoch": 1.3467614533965244,
"grad_norm": 12.60917651464132,
"learning_rate": 6.732633134037043e-06,
"loss": 1.3438491821289062,
"step": 1705
},
{
"epoch": 1.3475513428120063,
"grad_norm": 34.45628221491453,
"learning_rate": 6.7283214899619685e-06,
"loss": 1.6202247142791748,
"step": 1706
},
{
"epoch": 1.3483412322274881,
"grad_norm": 11.815572511405222,
"learning_rate": 6.724008385795211e-06,
"loss": 1.8743906021118164,
"step": 1707
},
{
"epoch": 1.34913112164297,
"grad_norm": 14.955379827242334,
"learning_rate": 6.719693825180491e-06,
"loss": 1.5417712926864624,
"step": 1708
},
{
"epoch": 1.3499210110584519,
"grad_norm": 13.982695999884012,
"learning_rate": 6.7153778117627635e-06,
"loss": 1.5238138437271118,
"step": 1709
},
{
"epoch": 1.3507109004739337,
"grad_norm": 13.558773627084195,
"learning_rate": 6.711060349188213e-06,
"loss": 1.0729316473007202,
"step": 1710
},
{
"epoch": 1.3515007898894154,
"grad_norm": 15.460928981412394,
"learning_rate": 6.706741441104243e-06,
"loss": 1.6075103282928467,
"step": 1711
},
{
"epoch": 1.3522906793048972,
"grad_norm": 27.070598507663696,
"learning_rate": 6.702421091159487e-06,
"loss": 1.9741368293762207,
"step": 1712
},
{
"epoch": 1.353080568720379,
"grad_norm": 9.716774266414612,
"learning_rate": 6.698099303003784e-06,
"loss": 1.7203664779663086,
"step": 1713
},
{
"epoch": 1.353870458135861,
"grad_norm": 10.642254002327258,
"learning_rate": 6.693776080288198e-06,
"loss": 1.163581132888794,
"step": 1714
},
{
"epoch": 1.3546603475513428,
"grad_norm": 8.496905620664727,
"learning_rate": 6.689451426665004e-06,
"loss": 2.0292768478393555,
"step": 1715
},
{
"epoch": 1.3554502369668247,
"grad_norm": 10.126564260567976,
"learning_rate": 6.685125345787679e-06,
"loss": 2.220201015472412,
"step": 1716
},
{
"epoch": 1.3562401263823065,
"grad_norm": 13.061476880554993,
"learning_rate": 6.680797841310914e-06,
"loss": 2.056443214416504,
"step": 1717
},
{
"epoch": 1.3570300157977884,
"grad_norm": 13.47212904208046,
"learning_rate": 6.676468916890597e-06,
"loss": 1.468867540359497,
"step": 1718
},
{
"epoch": 1.3578199052132702,
"grad_norm": 13.538453828063716,
"learning_rate": 6.67213857618382e-06,
"loss": 0.8965187072753906,
"step": 1719
},
{
"epoch": 1.358609794628752,
"grad_norm": 7.238990017595394,
"learning_rate": 6.667806822848865e-06,
"loss": 1.4493942260742188,
"step": 1720
},
{
"epoch": 1.3593996840442337,
"grad_norm": 13.06712117871877,
"learning_rate": 6.663473660545217e-06,
"loss": 1.303008794784546,
"step": 1721
},
{
"epoch": 1.3601895734597156,
"grad_norm": 18.893117290210554,
"learning_rate": 6.659139092933542e-06,
"loss": 2.37463641166687,
"step": 1722
},
{
"epoch": 1.3609794628751974,
"grad_norm": 10.071203220715486,
"learning_rate": 6.6548031236756975e-06,
"loss": 2.426710605621338,
"step": 1723
},
{
"epoch": 1.3617693522906793,
"grad_norm": 7.544794430629949,
"learning_rate": 6.650465756434724e-06,
"loss": 0.8276800513267517,
"step": 1724
},
{
"epoch": 1.3625592417061612,
"grad_norm": 9.200917908287058,
"learning_rate": 6.6461269948748445e-06,
"loss": 2.1654391288757324,
"step": 1725
},
{
"epoch": 1.363349131121643,
"grad_norm": 25.451210894852924,
"learning_rate": 6.64178684266146e-06,
"loss": 1.5984547138214111,
"step": 1726
},
{
"epoch": 1.3641390205371249,
"grad_norm": 12.38315288359827,
"learning_rate": 6.637445303461143e-06,
"loss": 0.9575186967849731,
"step": 1727
},
{
"epoch": 1.3649289099526065,
"grad_norm": 13.455605811383814,
"learning_rate": 6.633102380941643e-06,
"loss": 1.4046566486358643,
"step": 1728
},
{
"epoch": 1.3657187993680884,
"grad_norm": 11.193113951721786,
"learning_rate": 6.628758078771873e-06,
"loss": 2.0002331733703613,
"step": 1729
},
{
"epoch": 1.3665086887835702,
"grad_norm": 10.231357721297691,
"learning_rate": 6.624412400621916e-06,
"loss": 1.3115997314453125,
"step": 1730
},
{
"epoch": 1.367298578199052,
"grad_norm": 15.410181553876129,
"learning_rate": 6.620065350163012e-06,
"loss": 1.2699280977249146,
"step": 1731
},
{
"epoch": 1.368088467614534,
"grad_norm": 10.808531808691974,
"learning_rate": 6.615716931067566e-06,
"loss": 2.0870327949523926,
"step": 1732
},
{
"epoch": 1.3688783570300158,
"grad_norm": 9.638709809302068,
"learning_rate": 6.6113671470091355e-06,
"loss": 2.032163619995117,
"step": 1733
},
{
"epoch": 1.3696682464454977,
"grad_norm": 9.573439798053808,
"learning_rate": 6.607016001662434e-06,
"loss": 1.5165016651153564,
"step": 1734
},
{
"epoch": 1.3704581358609795,
"grad_norm": 16.637270126365415,
"learning_rate": 6.602663498703323e-06,
"loss": 1.579211711883545,
"step": 1735
},
{
"epoch": 1.3712480252764614,
"grad_norm": 12.245545065101416,
"learning_rate": 6.598309641808809e-06,
"loss": 1.451501727104187,
"step": 1736
},
{
"epoch": 1.3720379146919433,
"grad_norm": 10.871654496984037,
"learning_rate": 6.593954434657047e-06,
"loss": 1.3775560855865479,
"step": 1737
},
{
"epoch": 1.3728278041074249,
"grad_norm": 14.826876502837509,
"learning_rate": 6.5895978809273295e-06,
"loss": 1.1092920303344727,
"step": 1738
},
{
"epoch": 1.3736176935229067,
"grad_norm": 9.937827702763318,
"learning_rate": 6.585239984300088e-06,
"loss": 1.1688158512115479,
"step": 1739
},
{
"epoch": 1.3744075829383886,
"grad_norm": 10.154954545278624,
"learning_rate": 6.580880748456888e-06,
"loss": 1.9065393209457397,
"step": 1740
},
{
"epoch": 1.3751974723538705,
"grad_norm": 20.375541326941555,
"learning_rate": 6.576520177080425e-06,
"loss": 3.503018379211426,
"step": 1741
},
{
"epoch": 1.3759873617693523,
"grad_norm": 15.460278049176381,
"learning_rate": 6.572158273854527e-06,
"loss": 1.5270636081695557,
"step": 1742
},
{
"epoch": 1.3767772511848342,
"grad_norm": 21.983057924875485,
"learning_rate": 6.567795042464139e-06,
"loss": 1.8938589096069336,
"step": 1743
},
{
"epoch": 1.3775671406003158,
"grad_norm": 16.471995959910757,
"learning_rate": 6.563430486595339e-06,
"loss": 1.7760772705078125,
"step": 1744
},
{
"epoch": 1.3783570300157977,
"grad_norm": 15.032482903239565,
"learning_rate": 6.5590646099353135e-06,
"loss": 1.6687235832214355,
"step": 1745
},
{
"epoch": 1.3791469194312795,
"grad_norm": 12.844038712723703,
"learning_rate": 6.554697416172373e-06,
"loss": 1.5714036226272583,
"step": 1746
},
{
"epoch": 1.3799368088467614,
"grad_norm": 17.21043619059917,
"learning_rate": 6.5503289089959354e-06,
"loss": 3.404308795928955,
"step": 1747
},
{
"epoch": 1.3807266982622433,
"grad_norm": 11.175765500846673,
"learning_rate": 6.5459590920965295e-06,
"loss": 1.9153468608856201,
"step": 1748
},
{
"epoch": 1.3815165876777251,
"grad_norm": 12.6196975598501,
"learning_rate": 6.541587969165793e-06,
"loss": 2.095952033996582,
"step": 1749
},
{
"epoch": 1.382306477093207,
"grad_norm": 10.31639931220762,
"learning_rate": 6.537215543896463e-06,
"loss": 1.1625829935073853,
"step": 1750
},
{
"epoch": 1.3830963665086888,
"grad_norm": 29.7649936622185,
"learning_rate": 6.532841819982381e-06,
"loss": 3.0494065284729004,
"step": 1751
},
{
"epoch": 1.3838862559241707,
"grad_norm": 10.666741440827831,
"learning_rate": 6.52846680111848e-06,
"loss": 1.2341630458831787,
"step": 1752
},
{
"epoch": 1.3846761453396526,
"grad_norm": 12.22687954037762,
"learning_rate": 6.524090491000793e-06,
"loss": 1.308075189590454,
"step": 1753
},
{
"epoch": 1.3854660347551344,
"grad_norm": 32.49773976246368,
"learning_rate": 6.519712893326439e-06,
"loss": 1.6739758253097534,
"step": 1754
},
{
"epoch": 1.386255924170616,
"grad_norm": 10.896992529162494,
"learning_rate": 6.515334011793629e-06,
"loss": 2.6935648918151855,
"step": 1755
},
{
"epoch": 1.387045813586098,
"grad_norm": 14.152067149422212,
"learning_rate": 6.510953850101658e-06,
"loss": 1.4659454822540283,
"step": 1756
},
{
"epoch": 1.3878357030015798,
"grad_norm": 9.490627971683756,
"learning_rate": 6.506572411950896e-06,
"loss": 1.4744107723236084,
"step": 1757
},
{
"epoch": 1.3886255924170616,
"grad_norm": 9.178009642515413,
"learning_rate": 6.5021897010428006e-06,
"loss": 1.4771769046783447,
"step": 1758
},
{
"epoch": 1.3894154818325435,
"grad_norm": 7.901450497913,
"learning_rate": 6.4978057210798995e-06,
"loss": 1.5277764797210693,
"step": 1759
},
{
"epoch": 1.3902053712480253,
"grad_norm": 16.24677122520427,
"learning_rate": 6.4934204757657925e-06,
"loss": 1.8329733610153198,
"step": 1760
},
{
"epoch": 1.390995260663507,
"grad_norm": 17.706742868384215,
"learning_rate": 6.489033968805149e-06,
"loss": 1.9825087785720825,
"step": 1761
},
{
"epoch": 1.3917851500789888,
"grad_norm": 15.685673090253694,
"learning_rate": 6.484646203903706e-06,
"loss": 1.2657302618026733,
"step": 1762
},
{
"epoch": 1.3925750394944707,
"grad_norm": 10.790748538927113,
"learning_rate": 6.4802571847682616e-06,
"loss": 3.029433250427246,
"step": 1763
},
{
"epoch": 1.3933649289099526,
"grad_norm": 9.163423388663407,
"learning_rate": 6.4758669151066724e-06,
"loss": 1.4058465957641602,
"step": 1764
},
{
"epoch": 1.3941548183254344,
"grad_norm": 10.067549014086303,
"learning_rate": 6.4714753986278535e-06,
"loss": 1.397748589515686,
"step": 1765
},
{
"epoch": 1.3949447077409163,
"grad_norm": 13.05855432795011,
"learning_rate": 6.467082639041772e-06,
"loss": 1.4465043544769287,
"step": 1766
},
{
"epoch": 1.3957345971563981,
"grad_norm": 14.849808480124686,
"learning_rate": 6.462688640059446e-06,
"loss": 1.5454909801483154,
"step": 1767
},
{
"epoch": 1.39652448657188,
"grad_norm": 11.702650248241305,
"learning_rate": 6.4582934053929405e-06,
"loss": 1.1027021408081055,
"step": 1768
},
{
"epoch": 1.3973143759873619,
"grad_norm": 7.374612887912086,
"learning_rate": 6.453896938755362e-06,
"loss": 1.7206413745880127,
"step": 1769
},
{
"epoch": 1.3981042654028437,
"grad_norm": 13.105128531972968,
"learning_rate": 6.449499243860865e-06,
"loss": 2.0842576026916504,
"step": 1770
},
{
"epoch": 1.3988941548183254,
"grad_norm": 16.342442684958176,
"learning_rate": 6.445100324424631e-06,
"loss": 1.9844783544540405,
"step": 1771
},
{
"epoch": 1.3996840442338072,
"grad_norm": 7.941412914747672,
"learning_rate": 6.440700184162887e-06,
"loss": 1.3604265451431274,
"step": 1772
},
{
"epoch": 1.400473933649289,
"grad_norm": 7.665007430651305,
"learning_rate": 6.436298826792882e-06,
"loss": 0.7555409669876099,
"step": 1773
},
{
"epoch": 1.401263823064771,
"grad_norm": 9.961027255210812,
"learning_rate": 6.431896256032896e-06,
"loss": 1.5438669919967651,
"step": 1774
},
{
"epoch": 1.4020537124802528,
"grad_norm": 12.80133317623476,
"learning_rate": 6.427492475602242e-06,
"loss": 1.6424999237060547,
"step": 1775
},
{
"epoch": 1.4028436018957346,
"grad_norm": 11.021716530624692,
"learning_rate": 6.423087489221241e-06,
"loss": 1.6980810165405273,
"step": 1776
},
{
"epoch": 1.4036334913112165,
"grad_norm": 20.704996350287434,
"learning_rate": 6.418681300611244e-06,
"loss": 1.92954421043396,
"step": 1777
},
{
"epoch": 1.4044233807266981,
"grad_norm": 12.682678877921038,
"learning_rate": 6.414273913494612e-06,
"loss": 0.9070745706558228,
"step": 1778
},
{
"epoch": 1.40521327014218,
"grad_norm": 12.513930307508547,
"learning_rate": 6.409865331594721e-06,
"loss": 1.1177334785461426,
"step": 1779
},
{
"epoch": 1.4060031595576619,
"grad_norm": 19.948578372309566,
"learning_rate": 6.4054555586359556e-06,
"loss": 1.5609198808670044,
"step": 1780
},
{
"epoch": 1.4067930489731437,
"grad_norm": 10.304880695036104,
"learning_rate": 6.401044598343705e-06,
"loss": 1.9408183097839355,
"step": 1781
},
{
"epoch": 1.4075829383886256,
"grad_norm": 11.485897348203851,
"learning_rate": 6.3966324544443646e-06,
"loss": 2.1067936420440674,
"step": 1782
},
{
"epoch": 1.4083728278041074,
"grad_norm": 12.36441904247693,
"learning_rate": 6.392219130665328e-06,
"loss": 1.3297131061553955,
"step": 1783
},
{
"epoch": 1.4091627172195893,
"grad_norm": 11.454598254103093,
"learning_rate": 6.387804630734985e-06,
"loss": 1.3817702531814575,
"step": 1784
},
{
"epoch": 1.4099526066350712,
"grad_norm": 11.226000917241246,
"learning_rate": 6.383388958382719e-06,
"loss": 2.0304312705993652,
"step": 1785
},
{
"epoch": 1.410742496050553,
"grad_norm": 15.283704119726693,
"learning_rate": 6.378972117338908e-06,
"loss": 1.2152456045150757,
"step": 1786
},
{
"epoch": 1.4115323854660349,
"grad_norm": 14.282288242101943,
"learning_rate": 6.374554111334908e-06,
"loss": 2.027944564819336,
"step": 1787
},
{
"epoch": 1.4123222748815165,
"grad_norm": 19.074788092743933,
"learning_rate": 6.37013494410307e-06,
"loss": 1.8768101930618286,
"step": 1788
},
{
"epoch": 1.4131121642969984,
"grad_norm": 27.405083978695842,
"learning_rate": 6.365714619376722e-06,
"loss": 1.921675205230713,
"step": 1789
},
{
"epoch": 1.4139020537124802,
"grad_norm": 12.762740990846117,
"learning_rate": 6.361293140890161e-06,
"loss": 1.2969154119491577,
"step": 1790
},
{
"epoch": 1.414691943127962,
"grad_norm": 13.968926857294704,
"learning_rate": 6.356870512378675e-06,
"loss": 1.6537883281707764,
"step": 1791
},
{
"epoch": 1.415481832543444,
"grad_norm": 10.371222981346492,
"learning_rate": 6.3524467375785125e-06,
"loss": 2.2494006156921387,
"step": 1792
},
{
"epoch": 1.4162717219589258,
"grad_norm": 7.840468212923819,
"learning_rate": 6.348021820226891e-06,
"loss": 1.4510530233383179,
"step": 1793
},
{
"epoch": 1.4170616113744074,
"grad_norm": 16.008361511520068,
"learning_rate": 6.343595764061999e-06,
"loss": 2.526327133178711,
"step": 1794
},
{
"epoch": 1.4178515007898893,
"grad_norm": 7.9497180354538415,
"learning_rate": 6.339168572822978e-06,
"loss": 1.4724477529525757,
"step": 1795
},
{
"epoch": 1.4186413902053712,
"grad_norm": 6.971397200044914,
"learning_rate": 6.334740250249938e-06,
"loss": 1.9278626441955566,
"step": 1796
},
{
"epoch": 1.419431279620853,
"grad_norm": 14.711227502395163,
"learning_rate": 6.33031080008394e-06,
"loss": 1.0732061862945557,
"step": 1797
},
{
"epoch": 1.4202211690363349,
"grad_norm": 8.10829033173858,
"learning_rate": 6.325880226066997e-06,
"loss": 1.8444054126739502,
"step": 1798
},
{
"epoch": 1.4210110584518167,
"grad_norm": 8.220785031600306,
"learning_rate": 6.321448531942072e-06,
"loss": 1.180464267730713,
"step": 1799
},
{
"epoch": 1.4218009478672986,
"grad_norm": 7.13121507626065,
"learning_rate": 6.317015721453077e-06,
"loss": 1.8239995241165161,
"step": 1800
},
{
"epoch": 1.4225908372827805,
"grad_norm": 10.075780879909827,
"learning_rate": 6.3125817983448624e-06,
"loss": 1.9934985637664795,
"step": 1801
},
{
"epoch": 1.4233807266982623,
"grad_norm": 11.805782298666543,
"learning_rate": 6.308146766363225e-06,
"loss": 1.1996102333068848,
"step": 1802
},
{
"epoch": 1.4241706161137442,
"grad_norm": 14.758482012061515,
"learning_rate": 6.303710629254889e-06,
"loss": 1.6951093673706055,
"step": 1803
},
{
"epoch": 1.424960505529226,
"grad_norm": 6.415227648148693,
"learning_rate": 6.29927339076752e-06,
"loss": 1.2805756330490112,
"step": 1804
},
{
"epoch": 1.4257503949447077,
"grad_norm": 16.190811963050376,
"learning_rate": 6.294835054649714e-06,
"loss": 1.6054199934005737,
"step": 1805
},
{
"epoch": 1.4265402843601895,
"grad_norm": 19.35675670483708,
"learning_rate": 6.290395624650988e-06,
"loss": 2.000746726989746,
"step": 1806
},
{
"epoch": 1.4273301737756714,
"grad_norm": 10.953221955867015,
"learning_rate": 6.28595510452179e-06,
"loss": 1.7414836883544922,
"step": 1807
},
{
"epoch": 1.4281200631911533,
"grad_norm": 14.62887358716854,
"learning_rate": 6.281513498013485e-06,
"loss": 1.2090952396392822,
"step": 1808
},
{
"epoch": 1.4289099526066351,
"grad_norm": 8.841041489194227,
"learning_rate": 6.277070808878356e-06,
"loss": 1.1835849285125732,
"step": 1809
},
{
"epoch": 1.429699842022117,
"grad_norm": 9.037929728341096,
"learning_rate": 6.2726270408696035e-06,
"loss": 1.8852903842926025,
"step": 1810
},
{
"epoch": 1.4304897314375986,
"grad_norm": 14.839212546666879,
"learning_rate": 6.268182197741336e-06,
"loss": 2.071831703186035,
"step": 1811
},
{
"epoch": 1.4312796208530805,
"grad_norm": 12.37353569458125,
"learning_rate": 6.263736283248571e-06,
"loss": 1.7889072895050049,
"step": 1812
},
{
"epoch": 1.4320695102685623,
"grad_norm": 14.308437007860489,
"learning_rate": 6.259289301147233e-06,
"loss": 2.109577178955078,
"step": 1813
},
{
"epoch": 1.4328593996840442,
"grad_norm": 10.588829861541612,
"learning_rate": 6.254841255194148e-06,
"loss": 0.9634921550750732,
"step": 1814
},
{
"epoch": 1.433649289099526,
"grad_norm": 8.269928453685496,
"learning_rate": 6.250392149147035e-06,
"loss": 1.1673877239227295,
"step": 1815
},
{
"epoch": 1.434439178515008,
"grad_norm": 12.167224980723917,
"learning_rate": 6.2459419867645195e-06,
"loss": 1.7496429681777954,
"step": 1816
},
{
"epoch": 1.4352290679304898,
"grad_norm": 11.74353184566792,
"learning_rate": 6.241490771806111e-06,
"loss": 1.6411113739013672,
"step": 1817
},
{
"epoch": 1.4360189573459716,
"grad_norm": 10.038663586636575,
"learning_rate": 6.2370385080322085e-06,
"loss": 1.6785610914230347,
"step": 1818
},
{
"epoch": 1.4368088467614535,
"grad_norm": 12.705482849240397,
"learning_rate": 6.232585199204102e-06,
"loss": 1.4933853149414062,
"step": 1819
},
{
"epoch": 1.4375987361769353,
"grad_norm": 8.797279147148013,
"learning_rate": 6.22813084908396e-06,
"loss": 1.2085703611373901,
"step": 1820
},
{
"epoch": 1.438388625592417,
"grad_norm": 9.13523458275769,
"learning_rate": 6.223675461434831e-06,
"loss": 1.3428914546966553,
"step": 1821
},
{
"epoch": 1.4391785150078988,
"grad_norm": 7.848994958997547,
"learning_rate": 6.219219040020646e-06,
"loss": 1.0880684852600098,
"step": 1822
},
{
"epoch": 1.4399684044233807,
"grad_norm": 13.565272501706762,
"learning_rate": 6.214761588606199e-06,
"loss": 2.37025785446167,
"step": 1823
},
{
"epoch": 1.4407582938388626,
"grad_norm": 11.496245700277813,
"learning_rate": 6.210303110957161e-06,
"loss": 1.0500859022140503,
"step": 1824
},
{
"epoch": 1.4415481832543444,
"grad_norm": 11.699195594543191,
"learning_rate": 6.205843610840071e-06,
"loss": 1.364439606666565,
"step": 1825
},
{
"epoch": 1.4423380726698263,
"grad_norm": 12.92363516840581,
"learning_rate": 6.201383092022326e-06,
"loss": 1.575169324874878,
"step": 1826
},
{
"epoch": 1.4431279620853081,
"grad_norm": 9.636126549348427,
"learning_rate": 6.196921558272191e-06,
"loss": 1.2875540256500244,
"step": 1827
},
{
"epoch": 1.4439178515007898,
"grad_norm": 25.404058624847497,
"learning_rate": 6.19245901335878e-06,
"loss": 1.948940634727478,
"step": 1828
},
{
"epoch": 1.4447077409162716,
"grad_norm": 16.28182362055586,
"learning_rate": 6.187995461052067e-06,
"loss": 1.686116337776184,
"step": 1829
},
{
"epoch": 1.4454976303317535,
"grad_norm": 12.142680163856589,
"learning_rate": 6.183530905122881e-06,
"loss": 1.513108253479004,
"step": 1830
},
{
"epoch": 1.4462875197472354,
"grad_norm": 9.796255845103145,
"learning_rate": 6.179065349342885e-06,
"loss": 1.6807351112365723,
"step": 1831
},
{
"epoch": 1.4470774091627172,
"grad_norm": 14.771373088976882,
"learning_rate": 6.1745987974846e-06,
"loss": 1.763586163520813,
"step": 1832
},
{
"epoch": 1.447867298578199,
"grad_norm": 11.746125463949197,
"learning_rate": 6.170131253321384e-06,
"loss": 1.5122478008270264,
"step": 1833
},
{
"epoch": 1.448657187993681,
"grad_norm": 10.693788951485445,
"learning_rate": 6.16566272062743e-06,
"loss": 2.1962921619415283,
"step": 1834
},
{
"epoch": 1.4494470774091628,
"grad_norm": 12.44749625950401,
"learning_rate": 6.161193203177773e-06,
"loss": 1.9236458539962769,
"step": 1835
},
{
"epoch": 1.4502369668246446,
"grad_norm": 17.61123152678093,
"learning_rate": 6.156722704748273e-06,
"loss": 1.6482089757919312,
"step": 1836
},
{
"epoch": 1.4510268562401265,
"grad_norm": 7.824657050413297,
"learning_rate": 6.152251229115625e-06,
"loss": 1.2396411895751953,
"step": 1837
},
{
"epoch": 1.4518167456556081,
"grad_norm": 12.100352057209935,
"learning_rate": 6.147778780057342e-06,
"loss": 1.445483922958374,
"step": 1838
},
{
"epoch": 1.45260663507109,
"grad_norm": 10.176922246717954,
"learning_rate": 6.143305361351766e-06,
"loss": 2.227597713470459,
"step": 1839
},
{
"epoch": 1.4533965244865719,
"grad_norm": 23.08434863955186,
"learning_rate": 6.1388309767780575e-06,
"loss": 2.184255361557007,
"step": 1840
},
{
"epoch": 1.4541864139020537,
"grad_norm": 15.247266132776515,
"learning_rate": 6.134355630116189e-06,
"loss": 1.357899785041809,
"step": 1841
},
{
"epoch": 1.4549763033175356,
"grad_norm": 7.690736401984131,
"learning_rate": 6.129879325146948e-06,
"loss": 1.3187198638916016,
"step": 1842
},
{
"epoch": 1.4557661927330174,
"grad_norm": 17.084504147813014,
"learning_rate": 6.125402065651933e-06,
"loss": 1.1403226852416992,
"step": 1843
},
{
"epoch": 1.456556082148499,
"grad_norm": 10.647716875554703,
"learning_rate": 6.120923855413546e-06,
"loss": 1.5669901371002197,
"step": 1844
},
{
"epoch": 1.457345971563981,
"grad_norm": 17.169042345137168,
"learning_rate": 6.116444698214996e-06,
"loss": 1.8641374111175537,
"step": 1845
},
{
"epoch": 1.4581358609794628,
"grad_norm": 10.230215295961285,
"learning_rate": 6.111964597840288e-06,
"loss": 1.8520389795303345,
"step": 1846
},
{
"epoch": 1.4589257503949447,
"grad_norm": 14.086738280821706,
"learning_rate": 6.1074835580742274e-06,
"loss": 1.135934829711914,
"step": 1847
},
{
"epoch": 1.4597156398104265,
"grad_norm": 20.40220132997962,
"learning_rate": 6.103001582702408e-06,
"loss": 1.4812136888504028,
"step": 1848
},
{
"epoch": 1.4605055292259084,
"grad_norm": 31.44316031849772,
"learning_rate": 6.098518675511221e-06,
"loss": 2.650513172149658,
"step": 1849
},
{
"epoch": 1.4612954186413902,
"grad_norm": 8.456469054005813,
"learning_rate": 6.094034840287838e-06,
"loss": 1.8254547119140625,
"step": 1850
},
{
"epoch": 1.462085308056872,
"grad_norm": 8.984175379105094,
"learning_rate": 6.08955008082022e-06,
"loss": 1.96278715133667,
"step": 1851
},
{
"epoch": 1.462875197472354,
"grad_norm": 17.024546561160296,
"learning_rate": 6.085064400897106e-06,
"loss": 1.2698783874511719,
"step": 1852
},
{
"epoch": 1.4636650868878358,
"grad_norm": 9.736224700781001,
"learning_rate": 6.080577804308012e-06,
"loss": 1.6380083560943604,
"step": 1853
},
{
"epoch": 1.4644549763033177,
"grad_norm": 20.83200309530118,
"learning_rate": 6.076090294843233e-06,
"loss": 1.6041996479034424,
"step": 1854
},
{
"epoch": 1.4652448657187993,
"grad_norm": 10.731218316042554,
"learning_rate": 6.0716018762938265e-06,
"loss": 1.3004403114318848,
"step": 1855
},
{
"epoch": 1.4660347551342812,
"grad_norm": 12.047943285394913,
"learning_rate": 6.067112552451628e-06,
"loss": 1.7745938301086426,
"step": 1856
},
{
"epoch": 1.466824644549763,
"grad_norm": 11.575538627172165,
"learning_rate": 6.062622327109231e-06,
"loss": 1.8333407640457153,
"step": 1857
},
{
"epoch": 1.4676145339652449,
"grad_norm": 8.54961227395821,
"learning_rate": 6.0581312040599926e-06,
"loss": 1.6345336437225342,
"step": 1858
},
{
"epoch": 1.4684044233807267,
"grad_norm": 15.409163973089639,
"learning_rate": 6.053639187098028e-06,
"loss": 0.9887954592704773,
"step": 1859
},
{
"epoch": 1.4691943127962086,
"grad_norm": 12.434754832094152,
"learning_rate": 6.04914628001821e-06,
"loss": 1.7964859008789062,
"step": 1860
},
{
"epoch": 1.4699842022116902,
"grad_norm": 30.51669618244978,
"learning_rate": 6.044652486616159e-06,
"loss": 2.461520195007324,
"step": 1861
},
{
"epoch": 1.470774091627172,
"grad_norm": 10.994064510333782,
"learning_rate": 6.040157810688245e-06,
"loss": 1.86288583278656,
"step": 1862
},
{
"epoch": 1.471563981042654,
"grad_norm": 9.30779783354174,
"learning_rate": 6.035662256031592e-06,
"loss": 1.46977698802948,
"step": 1863
},
{
"epoch": 1.4723538704581358,
"grad_norm": 7.934486944796402,
"learning_rate": 6.03116582644405e-06,
"loss": 1.5796260833740234,
"step": 1864
},
{
"epoch": 1.4731437598736177,
"grad_norm": 14.423496800164589,
"learning_rate": 6.026668525724226e-06,
"loss": 1.8589414358139038,
"step": 1865
},
{
"epoch": 1.4739336492890995,
"grad_norm": 10.939581596140304,
"learning_rate": 6.022170357671448e-06,
"loss": 2.25348162651062,
"step": 1866
},
{
"epoch": 1.4747235387045814,
"grad_norm": 18.444767247036655,
"learning_rate": 6.017671326085787e-06,
"loss": 2.1810455322265625,
"step": 1867
},
{
"epoch": 1.4755134281200633,
"grad_norm": 6.294704216554538,
"learning_rate": 6.013171434768039e-06,
"loss": 1.2740647792816162,
"step": 1868
},
{
"epoch": 1.4763033175355451,
"grad_norm": 11.258105727454375,
"learning_rate": 6.008670687519726e-06,
"loss": 1.537172555923462,
"step": 1869
},
{
"epoch": 1.477093206951027,
"grad_norm": 8.495483927169012,
"learning_rate": 6.004169088143093e-06,
"loss": 1.5573079586029053,
"step": 1870
},
{
"epoch": 1.4778830963665086,
"grad_norm": 15.784635016949512,
"learning_rate": 5.99966664044111e-06,
"loss": 1.1430606842041016,
"step": 1871
},
{
"epoch": 1.4786729857819905,
"grad_norm": 10.067351907649972,
"learning_rate": 5.9951633482174565e-06,
"loss": 1.8512628078460693,
"step": 1872
},
{
"epoch": 1.4794628751974723,
"grad_norm": 12.439277630875722,
"learning_rate": 5.99065921527653e-06,
"loss": 1.0742204189300537,
"step": 1873
},
{
"epoch": 1.4802527646129542,
"grad_norm": 11.790578305048301,
"learning_rate": 5.986154245423435e-06,
"loss": 2.0604054927825928,
"step": 1874
},
{
"epoch": 1.481042654028436,
"grad_norm": 9.886128974159105,
"learning_rate": 5.981648442463987e-06,
"loss": 1.2165788412094116,
"step": 1875
},
{
"epoch": 1.481832543443918,
"grad_norm": 11.693448495370616,
"learning_rate": 5.977141810204702e-06,
"loss": 1.2570345401763916,
"step": 1876
},
{
"epoch": 1.4826224328593998,
"grad_norm": 11.392265499751487,
"learning_rate": 5.972634352452797e-06,
"loss": 1.3734052181243896,
"step": 1877
},
{
"epoch": 1.4834123222748814,
"grad_norm": 9.935897727227923,
"learning_rate": 5.968126073016188e-06,
"loss": 1.536318302154541,
"step": 1878
},
{
"epoch": 1.4842022116903633,
"grad_norm": 13.37009015419332,
"learning_rate": 5.963616975703488e-06,
"loss": 1.6027817726135254,
"step": 1879
},
{
"epoch": 1.4849921011058451,
"grad_norm": 13.54732672385344,
"learning_rate": 5.95910706432399e-06,
"loss": 1.4695227146148682,
"step": 1880
},
{
"epoch": 1.485781990521327,
"grad_norm": 10.426563375284957,
"learning_rate": 5.954596342687686e-06,
"loss": 1.2945826053619385,
"step": 1881
},
{
"epoch": 1.4865718799368088,
"grad_norm": 12.085174713834514,
"learning_rate": 5.950084814605252e-06,
"loss": 1.690997838973999,
"step": 1882
},
{
"epoch": 1.4873617693522907,
"grad_norm": 14.941420232595867,
"learning_rate": 5.945572483888033e-06,
"loss": 1.3554736375808716,
"step": 1883
},
{
"epoch": 1.4881516587677726,
"grad_norm": 12.228142172469568,
"learning_rate": 5.94105935434807e-06,
"loss": 1.512892723083496,
"step": 1884
},
{
"epoch": 1.4889415481832544,
"grad_norm": 11.913640083855395,
"learning_rate": 5.936545429798062e-06,
"loss": 0.7298011779785156,
"step": 1885
},
{
"epoch": 1.4897314375987363,
"grad_norm": 15.962737514375329,
"learning_rate": 5.932030714051392e-06,
"loss": 1.493302583694458,
"step": 1886
},
{
"epoch": 1.4905213270142181,
"grad_norm": 9.775040361342926,
"learning_rate": 5.927515210922107e-06,
"loss": 2.2897167205810547,
"step": 1887
},
{
"epoch": 1.4913112164296998,
"grad_norm": 16.24056817956605,
"learning_rate": 5.922998924224917e-06,
"loss": 1.8842390775680542,
"step": 1888
},
{
"epoch": 1.4921011058451816,
"grad_norm": 10.431504207313186,
"learning_rate": 5.918481857775196e-06,
"loss": 1.5306816101074219,
"step": 1889
},
{
"epoch": 1.4928909952606635,
"grad_norm": 10.932057272276792,
"learning_rate": 5.913964015388976e-06,
"loss": 1.5974483489990234,
"step": 1890
},
{
"epoch": 1.4936808846761453,
"grad_norm": 16.338433560086507,
"learning_rate": 5.909445400882948e-06,
"loss": 0.9032529592514038,
"step": 1891
},
{
"epoch": 1.4944707740916272,
"grad_norm": 16.083461588553835,
"learning_rate": 5.904926018074448e-06,
"loss": 2.5330991744995117,
"step": 1892
},
{
"epoch": 1.495260663507109,
"grad_norm": 7.719604767773579,
"learning_rate": 5.9004058707814715e-06,
"loss": 1.676531434059143,
"step": 1893
},
{
"epoch": 1.4960505529225907,
"grad_norm": 7.901640170457522,
"learning_rate": 5.895884962822648e-06,
"loss": 1.7386832237243652,
"step": 1894
},
{
"epoch": 1.4968404423380726,
"grad_norm": 9.168608646448185,
"learning_rate": 5.891363298017259e-06,
"loss": 1.2910975217819214,
"step": 1895
},
{
"epoch": 1.4976303317535544,
"grad_norm": 23.252379383869613,
"learning_rate": 5.886840880185221e-06,
"loss": 1.953572154045105,
"step": 1896
},
{
"epoch": 1.4984202211690363,
"grad_norm": 14.861072270306032,
"learning_rate": 5.8823177131470845e-06,
"loss": 1.128541350364685,
"step": 1897
},
{
"epoch": 1.4992101105845181,
"grad_norm": 11.269382836461835,
"learning_rate": 5.877793800724041e-06,
"loss": 1.7088985443115234,
"step": 1898
},
{
"epoch": 1.5,
"grad_norm": 10.533224054579982,
"learning_rate": 5.873269146737901e-06,
"loss": 2.03849458694458,
"step": 1899
},
{
"epoch": 1.5007898894154819,
"grad_norm": 10.821862530515162,
"learning_rate": 5.868743755011113e-06,
"loss": 0.8391838669776917,
"step": 1900
},
{
"epoch": 1.5015797788309637,
"grad_norm": 12.815338614755259,
"learning_rate": 5.86421762936674e-06,
"loss": 2.290050745010376,
"step": 1901
},
{
"epoch": 1.5023696682464456,
"grad_norm": 6.75820438876223,
"learning_rate": 5.859690773628466e-06,
"loss": 1.656872034072876,
"step": 1902
},
{
"epoch": 1.5031595576619274,
"grad_norm": 13.057735640244541,
"learning_rate": 5.855163191620597e-06,
"loss": 1.4379336833953857,
"step": 1903
},
{
"epoch": 1.5039494470774093,
"grad_norm": 8.816578457255313,
"learning_rate": 5.8506348871680475e-06,
"loss": 1.9750895500183105,
"step": 1904
},
{
"epoch": 1.5047393364928912,
"grad_norm": 10.27544331263895,
"learning_rate": 5.846105864096343e-06,
"loss": 1.3693504333496094,
"step": 1905
},
{
"epoch": 1.5055292259083728,
"grad_norm": 19.407143316882802,
"learning_rate": 5.84157612623162e-06,
"loss": 1.635138750076294,
"step": 1906
},
{
"epoch": 1.5063191153238547,
"grad_norm": 12.029439868679129,
"learning_rate": 5.837045677400613e-06,
"loss": 0.5531861782073975,
"step": 1907
},
{
"epoch": 1.5071090047393365,
"grad_norm": 14.005078207323043,
"learning_rate": 5.832514521430661e-06,
"loss": 1.1493902206420898,
"step": 1908
},
{
"epoch": 1.5078988941548184,
"grad_norm": 6.882826860067384,
"learning_rate": 5.827982662149703e-06,
"loss": 1.7186492681503296,
"step": 1909
},
{
"epoch": 1.5086887835703,
"grad_norm": 9.057187685271941,
"learning_rate": 5.8234501033862624e-06,
"loss": 2.1788861751556396,
"step": 1910
},
{
"epoch": 1.5094786729857819,
"grad_norm": 11.664865065310822,
"learning_rate": 5.818916848969463e-06,
"loss": 2.0605411529541016,
"step": 1911
},
{
"epoch": 1.5102685624012637,
"grad_norm": 8.847275772967832,
"learning_rate": 5.814382902729015e-06,
"loss": 1.4167741537094116,
"step": 1912
},
{
"epoch": 1.5110584518167456,
"grad_norm": 9.964601394953007,
"learning_rate": 5.809848268495206e-06,
"loss": 0.9648761749267578,
"step": 1913
},
{
"epoch": 1.5118483412322274,
"grad_norm": 18.140123517520358,
"learning_rate": 5.8053129500989156e-06,
"loss": 1.2075505256652832,
"step": 1914
},
{
"epoch": 1.5126382306477093,
"grad_norm": 14.956061178193,
"learning_rate": 5.80077695137159e-06,
"loss": 1.8038408756256104,
"step": 1915
},
{
"epoch": 1.5134281200631912,
"grad_norm": 8.549470794553304,
"learning_rate": 5.7962402761452616e-06,
"loss": 1.2158410549163818,
"step": 1916
},
{
"epoch": 1.514218009478673,
"grad_norm": 10.833339245641687,
"learning_rate": 5.791702928252525e-06,
"loss": 0.7378091812133789,
"step": 1917
},
{
"epoch": 1.5150078988941549,
"grad_norm": 9.732584946999157,
"learning_rate": 5.7871649115265484e-06,
"loss": 1.1355817317962646,
"step": 1918
},
{
"epoch": 1.5157977883096367,
"grad_norm": 15.80570500150481,
"learning_rate": 5.782626229801062e-06,
"loss": 1.5603950023651123,
"step": 1919
},
{
"epoch": 1.5165876777251186,
"grad_norm": 12.04096957012893,
"learning_rate": 5.778086886910359e-06,
"loss": 1.6280852556228638,
"step": 1920
},
{
"epoch": 1.5173775671406005,
"grad_norm": 17.757331266774536,
"learning_rate": 5.773546886689292e-06,
"loss": 1.6459561586380005,
"step": 1921
},
{
"epoch": 1.518167456556082,
"grad_norm": 10.980216863583156,
"learning_rate": 5.769006232973266e-06,
"loss": 1.7304844856262207,
"step": 1922
},
{
"epoch": 1.518957345971564,
"grad_norm": 8.64298769734182,
"learning_rate": 5.764464929598246e-06,
"loss": 1.9379894733428955,
"step": 1923
},
{
"epoch": 1.5197472353870458,
"grad_norm": 8.969841480124796,
"learning_rate": 5.759922980400734e-06,
"loss": 1.1220753192901611,
"step": 1924
},
{
"epoch": 1.5205371248025277,
"grad_norm": 14.709009423534145,
"learning_rate": 5.755380389217785e-06,
"loss": 1.1699135303497314,
"step": 1925
},
{
"epoch": 1.5213270142180095,
"grad_norm": 9.204182201289338,
"learning_rate": 5.750837159886996e-06,
"loss": 1.9453136920928955,
"step": 1926
},
{
"epoch": 1.5221169036334912,
"grad_norm": 20.29037732104967,
"learning_rate": 5.746293296246502e-06,
"loss": 1.1104214191436768,
"step": 1927
},
{
"epoch": 1.522906793048973,
"grad_norm": 9.963584395362918,
"learning_rate": 5.741748802134976e-06,
"loss": 1.0753260850906372,
"step": 1928
},
{
"epoch": 1.5236966824644549,
"grad_norm": 10.194943039916806,
"learning_rate": 5.7372036813916155e-06,
"loss": 1.6703574657440186,
"step": 1929
},
{
"epoch": 1.5244865718799367,
"grad_norm": 12.899448039370167,
"learning_rate": 5.732657937856158e-06,
"loss": 1.3306403160095215,
"step": 1930
},
{
"epoch": 1.5252764612954186,
"grad_norm": 9.761711644822508,
"learning_rate": 5.728111575368865e-06,
"loss": 1.5857300758361816,
"step": 1931
},
{
"epoch": 1.5260663507109005,
"grad_norm": 10.629942354032634,
"learning_rate": 5.723564597770514e-06,
"loss": 1.4648703336715698,
"step": 1932
},
{
"epoch": 1.5268562401263823,
"grad_norm": 13.432843847452732,
"learning_rate": 5.719017008902407e-06,
"loss": 1.4944384098052979,
"step": 1933
},
{
"epoch": 1.5276461295418642,
"grad_norm": 11.813071490114952,
"learning_rate": 5.714468812606364e-06,
"loss": 1.323237657546997,
"step": 1934
},
{
"epoch": 1.528436018957346,
"grad_norm": 8.562112811115774,
"learning_rate": 5.709920012724716e-06,
"loss": 2.0364575386047363,
"step": 1935
},
{
"epoch": 1.529225908372828,
"grad_norm": 13.067657788966802,
"learning_rate": 5.705370613100303e-06,
"loss": 1.4062690734863281,
"step": 1936
},
{
"epoch": 1.5300157977883098,
"grad_norm": 13.707958453722402,
"learning_rate": 5.700820617576472e-06,
"loss": 2.091384172439575,
"step": 1937
},
{
"epoch": 1.5308056872037916,
"grad_norm": 10.413369263252646,
"learning_rate": 5.696270029997078e-06,
"loss": 1.5702612400054932,
"step": 1938
},
{
"epoch": 1.5315955766192733,
"grad_norm": 7.882076727278544,
"learning_rate": 5.691718854206469e-06,
"loss": 1.8636072874069214,
"step": 1939
},
{
"epoch": 1.5323854660347551,
"grad_norm": 9.17078361317583,
"learning_rate": 5.687167094049493e-06,
"loss": 2.772977828979492,
"step": 1940
},
{
"epoch": 1.533175355450237,
"grad_norm": 15.116676322563023,
"learning_rate": 5.682614753371493e-06,
"loss": 1.5175914764404297,
"step": 1941
},
{
"epoch": 1.5339652448657188,
"grad_norm": 7.844046810826043,
"learning_rate": 5.678061836018303e-06,
"loss": 2.061984062194824,
"step": 1942
},
{
"epoch": 1.5347551342812005,
"grad_norm": 10.701078465755538,
"learning_rate": 5.673508345836239e-06,
"loss": 1.6619548797607422,
"step": 1943
},
{
"epoch": 1.5355450236966823,
"grad_norm": 7.891225505862987,
"learning_rate": 5.6689542866721095e-06,
"loss": 1.1752052307128906,
"step": 1944
},
{
"epoch": 1.5363349131121642,
"grad_norm": 6.304249509659231,
"learning_rate": 5.664399662373192e-06,
"loss": 0.8437387943267822,
"step": 1945
},
{
"epoch": 1.537124802527646,
"grad_norm": 17.603728317953017,
"learning_rate": 5.659844476787255e-06,
"loss": 1.1868000030517578,
"step": 1946
},
{
"epoch": 1.537914691943128,
"grad_norm": 66.47257484493905,
"learning_rate": 5.655288733762531e-06,
"loss": 2.8787412643432617,
"step": 1947
},
{
"epoch": 1.5387045813586098,
"grad_norm": 8.360497076634983,
"learning_rate": 5.650732437147725e-06,
"loss": 1.4284359216690063,
"step": 1948
},
{
"epoch": 1.5394944707740916,
"grad_norm": 16.849166837845633,
"learning_rate": 5.646175590792015e-06,
"loss": 1.8208255767822266,
"step": 1949
},
{
"epoch": 1.5402843601895735,
"grad_norm": 15.853584711578053,
"learning_rate": 5.6416181985450365e-06,
"loss": 1.4639555215835571,
"step": 1950
},
{
"epoch": 1.5410742496050553,
"grad_norm": 11.418310736533611,
"learning_rate": 5.637060264256893e-06,
"loss": 1.445953369140625,
"step": 1951
},
{
"epoch": 1.5418641390205372,
"grad_norm": 10.03242971159572,
"learning_rate": 5.632501791778139e-06,
"loss": 1.7128900289535522,
"step": 1952
},
{
"epoch": 1.542654028436019,
"grad_norm": 11.985633656861989,
"learning_rate": 5.6279427849597876e-06,
"loss": 1.3158780336380005,
"step": 1953
},
{
"epoch": 1.543443917851501,
"grad_norm": 13.823142105370444,
"learning_rate": 5.623383247653306e-06,
"loss": 1.426164150238037,
"step": 1954
},
{
"epoch": 1.5442338072669828,
"grad_norm": 12.41483138077164,
"learning_rate": 5.6188231837106024e-06,
"loss": 1.4300283193588257,
"step": 1955
},
{
"epoch": 1.5450236966824644,
"grad_norm": 12.100433098038376,
"learning_rate": 5.6142625969840355e-06,
"loss": 1.334028720855713,
"step": 1956
},
{
"epoch": 1.5458135860979463,
"grad_norm": 7.826204795219399,
"learning_rate": 5.6097014913264036e-06,
"loss": 1.2844315767288208,
"step": 1957
},
{
"epoch": 1.5466034755134281,
"grad_norm": 10.029697199051965,
"learning_rate": 5.605139870590945e-06,
"loss": 1.7111456394195557,
"step": 1958
},
{
"epoch": 1.54739336492891,
"grad_norm": 23.257849538913792,
"learning_rate": 5.600577738631331e-06,
"loss": 1.3320598602294922,
"step": 1959
},
{
"epoch": 1.5481832543443916,
"grad_norm": 28.133569384076022,
"learning_rate": 5.596015099301665e-06,
"loss": 1.7531509399414062,
"step": 1960
},
{
"epoch": 1.5489731437598735,
"grad_norm": 7.595217526983019,
"learning_rate": 5.591451956456482e-06,
"loss": 1.5534119606018066,
"step": 1961
},
{
"epoch": 1.5497630331753554,
"grad_norm": 14.700635546212805,
"learning_rate": 5.586888313950737e-06,
"loss": 0.8609148263931274,
"step": 1962
},
{
"epoch": 1.5505529225908372,
"grad_norm": 11.364286626899796,
"learning_rate": 5.5823241756398115e-06,
"loss": 1.637607216835022,
"step": 1963
},
{
"epoch": 1.551342812006319,
"grad_norm": 9.425435127940757,
"learning_rate": 5.577759545379507e-06,
"loss": 1.3392387628555298,
"step": 1964
},
{
"epoch": 1.552132701421801,
"grad_norm": 13.850940959168309,
"learning_rate": 5.573194427026034e-06,
"loss": 1.3945591449737549,
"step": 1965
},
{
"epoch": 1.5529225908372828,
"grad_norm": 11.948388622145545,
"learning_rate": 5.568628824436022e-06,
"loss": 1.3258531093597412,
"step": 1966
},
{
"epoch": 1.5537124802527646,
"grad_norm": 12.964726484077811,
"learning_rate": 5.564062741466506e-06,
"loss": 1.6788570880889893,
"step": 1967
},
{
"epoch": 1.5545023696682465,
"grad_norm": 14.308086462658784,
"learning_rate": 5.559496181974929e-06,
"loss": 1.5159149169921875,
"step": 1968
},
{
"epoch": 1.5552922590837284,
"grad_norm": 12.57208338798326,
"learning_rate": 5.554929149819136e-06,
"loss": 1.6231142282485962,
"step": 1969
},
{
"epoch": 1.5560821484992102,
"grad_norm": 24.3591417534666,
"learning_rate": 5.550361648857369e-06,
"loss": 2.435429573059082,
"step": 1970
},
{
"epoch": 1.556872037914692,
"grad_norm": 9.389696915132824,
"learning_rate": 5.545793682948269e-06,
"loss": 1.346461296081543,
"step": 1971
},
{
"epoch": 1.5576619273301737,
"grad_norm": 15.375687685358574,
"learning_rate": 5.541225255950868e-06,
"loss": 1.1790099143981934,
"step": 1972
},
{
"epoch": 1.5584518167456556,
"grad_norm": 11.802119366322316,
"learning_rate": 5.536656371724588e-06,
"loss": 1.6181936264038086,
"step": 1973
},
{
"epoch": 1.5592417061611374,
"grad_norm": 9.110006585026039,
"learning_rate": 5.5320870341292396e-06,
"loss": 1.6034982204437256,
"step": 1974
},
{
"epoch": 1.5600315955766193,
"grad_norm": 11.636051169463032,
"learning_rate": 5.527517247025012e-06,
"loss": 1.4720101356506348,
"step": 1975
},
{
"epoch": 1.5608214849921012,
"grad_norm": 11.163295280212573,
"learning_rate": 5.522947014272476e-06,
"loss": 1.140345573425293,
"step": 1976
},
{
"epoch": 1.5616113744075828,
"grad_norm": 13.38890976856887,
"learning_rate": 5.518376339732582e-06,
"loss": 1.2083477973937988,
"step": 1977
},
{
"epoch": 1.5624012638230647,
"grad_norm": 11.809538383883128,
"learning_rate": 5.513805227266648e-06,
"loss": 2.1081316471099854,
"step": 1978
},
{
"epoch": 1.5631911532385465,
"grad_norm": 7.495208922125521,
"learning_rate": 5.5092336807363655e-06,
"loss": 2.039696455001831,
"step": 1979
},
{
"epoch": 1.5639810426540284,
"grad_norm": 13.493304714233394,
"learning_rate": 5.504661704003793e-06,
"loss": 1.486254096031189,
"step": 1980
},
{
"epoch": 1.5647709320695102,
"grad_norm": 7.784789213569366,
"learning_rate": 5.5000893009313515e-06,
"loss": 1.3860276937484741,
"step": 1981
},
{
"epoch": 1.565560821484992,
"grad_norm": 18.85650733652106,
"learning_rate": 5.495516475381822e-06,
"loss": 1.6732574701309204,
"step": 1982
},
{
"epoch": 1.566350710900474,
"grad_norm": 8.123420469888398,
"learning_rate": 5.490943231218343e-06,
"loss": 1.847348928451538,
"step": 1983
},
{
"epoch": 1.5671406003159558,
"grad_norm": 16.49563314393432,
"learning_rate": 5.486369572304404e-06,
"loss": 1.5314483642578125,
"step": 1984
},
{
"epoch": 1.5679304897314377,
"grad_norm": 8.487512867858714,
"learning_rate": 5.48179550250385e-06,
"loss": 1.5116339921951294,
"step": 1985
},
{
"epoch": 1.5687203791469195,
"grad_norm": 6.915693492968808,
"learning_rate": 5.477221025680868e-06,
"loss": 1.482391119003296,
"step": 1986
},
{
"epoch": 1.5695102685624014,
"grad_norm": 19.644776640804693,
"learning_rate": 5.472646145699991e-06,
"loss": 1.6486904621124268,
"step": 1987
},
{
"epoch": 1.5703001579778832,
"grad_norm": 7.603103639854315,
"learning_rate": 5.468070866426098e-06,
"loss": 1.633828043937683,
"step": 1988
},
{
"epoch": 1.5710900473933649,
"grad_norm": 13.602654260017356,
"learning_rate": 5.4634951917243905e-06,
"loss": 1.9716848134994507,
"step": 1989
},
{
"epoch": 1.5718799368088467,
"grad_norm": 9.757410424603487,
"learning_rate": 5.458919125460421e-06,
"loss": 2.7240705490112305,
"step": 1990
},
{
"epoch": 1.5726698262243286,
"grad_norm": 19.360862166490207,
"learning_rate": 5.4543426715000644e-06,
"loss": 1.697304368019104,
"step": 1991
},
{
"epoch": 1.5734597156398105,
"grad_norm": 10.343188536941312,
"learning_rate": 5.4497658337095205e-06,
"loss": 1.9906163215637207,
"step": 1992
},
{
"epoch": 1.574249605055292,
"grad_norm": 11.847212271229933,
"learning_rate": 5.44518861595532e-06,
"loss": 2.298971176147461,
"step": 1993
},
{
"epoch": 1.575039494470774,
"grad_norm": 9.679704863397104,
"learning_rate": 5.440611022104312e-06,
"loss": 1.621870994567871,
"step": 1994
},
{
"epoch": 1.5758293838862558,
"grad_norm": 8.36320218724247,
"learning_rate": 5.43603305602366e-06,
"loss": 0.9557559490203857,
"step": 1995
},
{
"epoch": 1.5766192733017377,
"grad_norm": 9.018756709124666,
"learning_rate": 5.431454721580847e-06,
"loss": 1.3342235088348389,
"step": 1996
},
{
"epoch": 1.5774091627172195,
"grad_norm": 10.023772272993021,
"learning_rate": 5.426876022643665e-06,
"loss": 2.0808849334716797,
"step": 1997
},
{
"epoch": 1.5781990521327014,
"grad_norm": 11.455856990097669,
"learning_rate": 5.422296963080212e-06,
"loss": 1.4423177242279053,
"step": 1998
},
{
"epoch": 1.5789889415481833,
"grad_norm": 23.049740134401613,
"learning_rate": 5.417717546758895e-06,
"loss": 1.558653712272644,
"step": 1999
},
{
"epoch": 1.5797788309636651,
"grad_norm": 19.165624885196646,
"learning_rate": 5.413137777548418e-06,
"loss": 2.2184576988220215,
"step": 2000
},
{
"epoch": 1.580568720379147,
"grad_norm": 11.595793426567166,
"learning_rate": 5.4085576593177865e-06,
"loss": 1.0131672620773315,
"step": 2001
},
{
"epoch": 1.5813586097946288,
"grad_norm": 8.213720732018274,
"learning_rate": 5.403977195936301e-06,
"loss": 1.3775444030761719,
"step": 2002
},
{
"epoch": 1.5821484992101107,
"grad_norm": 12.113284402572612,
"learning_rate": 5.399396391273547e-06,
"loss": 1.8444898128509521,
"step": 2003
},
{
"epoch": 1.5829383886255926,
"grad_norm": 14.168275573138237,
"learning_rate": 5.394815249199408e-06,
"loss": 1.2480335235595703,
"step": 2004
},
{
"epoch": 1.5837282780410744,
"grad_norm": 5.247577025302916,
"learning_rate": 5.390233773584047e-06,
"loss": 0.7935315370559692,
"step": 2005
},
{
"epoch": 1.584518167456556,
"grad_norm": 10.201870401815132,
"learning_rate": 5.385651968297907e-06,
"loss": 1.4372203350067139,
"step": 2006
},
{
"epoch": 1.585308056872038,
"grad_norm": 9.572141631234302,
"learning_rate": 5.3810698372117165e-06,
"loss": 1.6270627975463867,
"step": 2007
},
{
"epoch": 1.5860979462875198,
"grad_norm": 11.97927351834067,
"learning_rate": 5.37648738419647e-06,
"loss": 1.5614657402038574,
"step": 2008
},
{
"epoch": 1.5868878357030016,
"grad_norm": 8.772950923899149,
"learning_rate": 5.371904613123444e-06,
"loss": 1.8875480890274048,
"step": 2009
},
{
"epoch": 1.5876777251184833,
"grad_norm": 8.245381714413037,
"learning_rate": 5.367321527864175e-06,
"loss": 1.6283080577850342,
"step": 2010
},
{
"epoch": 1.5884676145339651,
"grad_norm": 7.982415790960697,
"learning_rate": 5.362738132290471e-06,
"loss": 1.4374988079071045,
"step": 2011
},
{
"epoch": 1.589257503949447,
"grad_norm": 11.43633322051133,
"learning_rate": 5.358154430274397e-06,
"loss": 1.4911184310913086,
"step": 2012
},
{
"epoch": 1.5900473933649288,
"grad_norm": 10.388360353070853,
"learning_rate": 5.353570425688282e-06,
"loss": 0.8646364212036133,
"step": 2013
},
{
"epoch": 1.5908372827804107,
"grad_norm": 8.234236363122712,
"learning_rate": 5.348986122404706e-06,
"loss": 1.3800685405731201,
"step": 2014
},
{
"epoch": 1.5916271721958926,
"grad_norm": 9.432342143567002,
"learning_rate": 5.344401524296506e-06,
"loss": 1.8379184007644653,
"step": 2015
},
{
"epoch": 1.5924170616113744,
"grad_norm": 10.131723893874554,
"learning_rate": 5.339816635236762e-06,
"loss": 1.7298725843429565,
"step": 2016
},
{
"epoch": 1.5932069510268563,
"grad_norm": 9.394398464778371,
"learning_rate": 5.335231459098806e-06,
"loss": 1.9646117687225342,
"step": 2017
},
{
"epoch": 1.5939968404423381,
"grad_norm": 15.074944044842479,
"learning_rate": 5.330645999756211e-06,
"loss": 1.759244680404663,
"step": 2018
},
{
"epoch": 1.59478672985782,
"grad_norm": 30.106091146105907,
"learning_rate": 5.326060261082786e-06,
"loss": 1.3692538738250732,
"step": 2019
},
{
"epoch": 1.5955766192733019,
"grad_norm": 10.236894381216507,
"learning_rate": 5.321474246952577e-06,
"loss": 1.1828837394714355,
"step": 2020
},
{
"epoch": 1.5963665086887837,
"grad_norm": 14.4756810516669,
"learning_rate": 5.3168879612398684e-06,
"loss": 2.2654309272766113,
"step": 2021
},
{
"epoch": 1.5971563981042654,
"grad_norm": 12.643909473952599,
"learning_rate": 5.3123014078191635e-06,
"loss": 1.6730940341949463,
"step": 2022
},
{
"epoch": 1.5979462875197472,
"grad_norm": 11.041702917394197,
"learning_rate": 5.307714590565203e-06,
"loss": 1.547790288925171,
"step": 2023
},
{
"epoch": 1.598736176935229,
"grad_norm": 11.4813876902655,
"learning_rate": 5.303127513352943e-06,
"loss": 1.3282029628753662,
"step": 2024
},
{
"epoch": 1.599526066350711,
"grad_norm": 17.03475421856299,
"learning_rate": 5.298540180057561e-06,
"loss": 1.527526617050171,
"step": 2025
},
{
"epoch": 1.6003159557661928,
"grad_norm": 12.426584174439972,
"learning_rate": 5.293952594554452e-06,
"loss": 1.0609666109085083,
"step": 2026
},
{
"epoch": 1.6011058451816744,
"grad_norm": 14.614321755378219,
"learning_rate": 5.289364760719223e-06,
"loss": 1.6314609050750732,
"step": 2027
},
{
"epoch": 1.6018957345971563,
"grad_norm": 17.90827490801936,
"learning_rate": 5.284776682427691e-06,
"loss": 2.031951904296875,
"step": 2028
},
{
"epoch": 1.6026856240126381,
"grad_norm": 9.299749173159547,
"learning_rate": 5.280188363555881e-06,
"loss": 1.3200483322143555,
"step": 2029
},
{
"epoch": 1.60347551342812,
"grad_norm": 13.120649438444463,
"learning_rate": 5.275599807980019e-06,
"loss": 1.2993329763412476,
"step": 2030
},
{
"epoch": 1.6042654028436019,
"grad_norm": 22.075534962390574,
"learning_rate": 5.271011019576528e-06,
"loss": 1.9799494743347168,
"step": 2031
},
{
"epoch": 1.6050552922590837,
"grad_norm": 7.468671537450528,
"learning_rate": 5.2664220022220404e-06,
"loss": 1.303866982460022,
"step": 2032
},
{
"epoch": 1.6058451816745656,
"grad_norm": 9.57368198328469,
"learning_rate": 5.261832759793365e-06,
"loss": 1.6507763862609863,
"step": 2033
},
{
"epoch": 1.6066350710900474,
"grad_norm": 9.44370501166652,
"learning_rate": 5.2572432961675115e-06,
"loss": 1.9541301727294922,
"step": 2034
},
{
"epoch": 1.6074249605055293,
"grad_norm": 14.242349861981912,
"learning_rate": 5.252653615221677e-06,
"loss": 1.2269582748413086,
"step": 2035
},
{
"epoch": 1.6082148499210112,
"grad_norm": 8.26115422114046,
"learning_rate": 5.248063720833233e-06,
"loss": 1.1905943155288696,
"step": 2036
},
{
"epoch": 1.609004739336493,
"grad_norm": 10.698374761244876,
"learning_rate": 5.243473616879744e-06,
"loss": 1.0884801149368286,
"step": 2037
},
{
"epoch": 1.6097946287519749,
"grad_norm": 11.821184713135086,
"learning_rate": 5.238883307238939e-06,
"loss": 1.7929291725158691,
"step": 2038
},
{
"epoch": 1.6105845181674565,
"grad_norm": 16.8095121665838,
"learning_rate": 5.234292795788731e-06,
"loss": 1.8585515022277832,
"step": 2039
},
{
"epoch": 1.6113744075829384,
"grad_norm": 14.477383097248046,
"learning_rate": 5.229702086407197e-06,
"loss": 2.150439977645874,
"step": 2040
},
{
"epoch": 1.6121642969984202,
"grad_norm": 11.989923601556997,
"learning_rate": 5.225111182972584e-06,
"loss": 1.4646919965744019,
"step": 2041
},
{
"epoch": 1.612954186413902,
"grad_norm": 21.745170716359002,
"learning_rate": 5.220520089363302e-06,
"loss": 1.7785918712615967,
"step": 2042
},
{
"epoch": 1.6137440758293837,
"grad_norm": 11.270529683690256,
"learning_rate": 5.215928809457924e-06,
"loss": 2.263561964035034,
"step": 2043
},
{
"epoch": 1.6145339652448656,
"grad_norm": 15.910293271142669,
"learning_rate": 5.211337347135176e-06,
"loss": 1.4089993238449097,
"step": 2044
},
{
"epoch": 1.6153238546603474,
"grad_norm": 16.722563692314175,
"learning_rate": 5.20674570627394e-06,
"loss": 1.6263060569763184,
"step": 2045
},
{
"epoch": 1.6161137440758293,
"grad_norm": 11.666163831636595,
"learning_rate": 5.202153890753252e-06,
"loss": 1.2603790760040283,
"step": 2046
},
{
"epoch": 1.6169036334913112,
"grad_norm": 12.621495227684445,
"learning_rate": 5.197561904452291e-06,
"loss": 2.883836507797241,
"step": 2047
},
{
"epoch": 1.617693522906793,
"grad_norm": 10.161552116575363,
"learning_rate": 5.192969751250382e-06,
"loss": 1.666745662689209,
"step": 2048
},
{
"epoch": 1.6184834123222749,
"grad_norm": 8.91334171222036,
"learning_rate": 5.188377435026991e-06,
"loss": 1.3152096271514893,
"step": 2049
},
{
"epoch": 1.6192733017377567,
"grad_norm": 12.703171570917682,
"learning_rate": 5.183784959661723e-06,
"loss": 1.2735559940338135,
"step": 2050
},
{
"epoch": 1.6200631911532386,
"grad_norm": 13.160459144969359,
"learning_rate": 5.1791923290343175e-06,
"loss": 0.9299610257148743,
"step": 2051
},
{
"epoch": 1.6208530805687205,
"grad_norm": 8.443761976415868,
"learning_rate": 5.17459954702464e-06,
"loss": 1.560915231704712,
"step": 2052
},
{
"epoch": 1.6216429699842023,
"grad_norm": 14.155078887312051,
"learning_rate": 5.1700066175126915e-06,
"loss": 2.029481887817383,
"step": 2053
},
{
"epoch": 1.6224328593996842,
"grad_norm": 7.4669992465133825,
"learning_rate": 5.165413544378594e-06,
"loss": 1.332027792930603,
"step": 2054
},
{
"epoch": 1.623222748815166,
"grad_norm": 11.339506770988185,
"learning_rate": 5.160820331502587e-06,
"loss": 1.6719763278961182,
"step": 2055
},
{
"epoch": 1.6240126382306477,
"grad_norm": 9.957423283849112,
"learning_rate": 5.1562269827650365e-06,
"loss": 1.8021857738494873,
"step": 2056
},
{
"epoch": 1.6248025276461295,
"grad_norm": 13.931093811264075,
"learning_rate": 5.1516335020464146e-06,
"loss": 1.2689666748046875,
"step": 2057
},
{
"epoch": 1.6255924170616114,
"grad_norm": 10.873902590482956,
"learning_rate": 5.147039893227312e-06,
"loss": 1.6544064283370972,
"step": 2058
},
{
"epoch": 1.6263823064770933,
"grad_norm": 14.998070753763457,
"learning_rate": 5.142446160188423e-06,
"loss": 0.7487756013870239,
"step": 2059
},
{
"epoch": 1.627172195892575,
"grad_norm": 11.06717608145743,
"learning_rate": 5.137852306810549e-06,
"loss": 1.2134767770767212,
"step": 2060
},
{
"epoch": 1.6279620853080567,
"grad_norm": 9.422607327758378,
"learning_rate": 5.133258336974593e-06,
"loss": 1.7278623580932617,
"step": 2061
},
{
"epoch": 1.6287519747235386,
"grad_norm": 11.032813566120764,
"learning_rate": 5.128664254561554e-06,
"loss": 1.4675060510635376,
"step": 2062
},
{
"epoch": 1.6295418641390205,
"grad_norm": 19.42554207831122,
"learning_rate": 5.12407006345253e-06,
"loss": 1.6174191236495972,
"step": 2063
},
{
"epoch": 1.6303317535545023,
"grad_norm": 8.820495621535443,
"learning_rate": 5.119475767528706e-06,
"loss": 1.6649625301361084,
"step": 2064
},
{
"epoch": 1.6311216429699842,
"grad_norm": 8.576125799967375,
"learning_rate": 5.114881370671363e-06,
"loss": 1.447519063949585,
"step": 2065
},
{
"epoch": 1.631911532385466,
"grad_norm": 12.22755594166127,
"learning_rate": 5.1102868767618564e-06,
"loss": 2.094078540802002,
"step": 2066
},
{
"epoch": 1.632701421800948,
"grad_norm": 17.90068380574316,
"learning_rate": 5.105692289681637e-06,
"loss": 2.2962210178375244,
"step": 2067
},
{
"epoch": 1.6334913112164298,
"grad_norm": 10.844973400668414,
"learning_rate": 5.10109761331222e-06,
"loss": 1.753501534461975,
"step": 2068
},
{
"epoch": 1.6342812006319116,
"grad_norm": 9.036279470960077,
"learning_rate": 5.096502851535207e-06,
"loss": 1.4237632751464844,
"step": 2069
},
{
"epoch": 1.6350710900473935,
"grad_norm": 19.783228854851586,
"learning_rate": 5.091908008232269e-06,
"loss": 1.0084950923919678,
"step": 2070
},
{
"epoch": 1.6358609794628753,
"grad_norm": 8.265709299215786,
"learning_rate": 5.08731308728514e-06,
"loss": 2.1787667274475098,
"step": 2071
},
{
"epoch": 1.636650868878357,
"grad_norm": 14.174582481095438,
"learning_rate": 5.082718092575629e-06,
"loss": 2.0619583129882812,
"step": 2072
},
{
"epoch": 1.6374407582938388,
"grad_norm": 11.108397416157564,
"learning_rate": 5.078123027985602e-06,
"loss": 1.3192667961120605,
"step": 2073
},
{
"epoch": 1.6382306477093207,
"grad_norm": 8.363886228263077,
"learning_rate": 5.073527897396983e-06,
"loss": 1.50796639919281,
"step": 2074
},
{
"epoch": 1.6390205371248026,
"grad_norm": 15.251141269075907,
"learning_rate": 5.068932704691754e-06,
"loss": 2.005817174911499,
"step": 2075
},
{
"epoch": 1.6398104265402842,
"grad_norm": 13.455564265575436,
"learning_rate": 5.064337453751949e-06,
"loss": 0.9753101468086243,
"step": 2076
},
{
"epoch": 1.640600315955766,
"grad_norm": 10.151593209532505,
"learning_rate": 5.059742148459651e-06,
"loss": 1.3239325284957886,
"step": 2077
},
{
"epoch": 1.641390205371248,
"grad_norm": 9.012341783855266,
"learning_rate": 5.055146792696989e-06,
"loss": 1.3081142902374268,
"step": 2078
},
{
"epoch": 1.6421800947867298,
"grad_norm": 14.881682947088617,
"learning_rate": 5.050551390346135e-06,
"loss": 1.9254162311553955,
"step": 2079
},
{
"epoch": 1.6429699842022116,
"grad_norm": 9.10456564272968,
"learning_rate": 5.0459559452893e-06,
"loss": 1.5548919439315796,
"step": 2080
},
{
"epoch": 1.6437598736176935,
"grad_norm": 19.6788851757542,
"learning_rate": 5.041360461408733e-06,
"loss": 1.1454696655273438,
"step": 2081
},
{
"epoch": 1.6445497630331753,
"grad_norm": 14.710569283676744,
"learning_rate": 5.036764942586709e-06,
"loss": 1.5640335083007812,
"step": 2082
},
{
"epoch": 1.6453396524486572,
"grad_norm": 12.439497693452665,
"learning_rate": 5.032169392705542e-06,
"loss": 1.5709795951843262,
"step": 2083
},
{
"epoch": 1.646129541864139,
"grad_norm": 16.197420446815777,
"learning_rate": 5.027573815647567e-06,
"loss": 2.125795602798462,
"step": 2084
},
{
"epoch": 1.646919431279621,
"grad_norm": 12.282692453985112,
"learning_rate": 5.0229782152951405e-06,
"loss": 1.4845194816589355,
"step": 2085
},
{
"epoch": 1.6477093206951028,
"grad_norm": 9.520283321188863,
"learning_rate": 5.018382595530643e-06,
"loss": 1.538682460784912,
"step": 2086
},
{
"epoch": 1.6484992101105846,
"grad_norm": 9.675670573110745,
"learning_rate": 5.0137869602364665e-06,
"loss": 1.280341625213623,
"step": 2087
},
{
"epoch": 1.6492890995260665,
"grad_norm": 18.974744400190374,
"learning_rate": 5.009191313295021e-06,
"loss": 1.772722601890564,
"step": 2088
},
{
"epoch": 1.6500789889415481,
"grad_norm": 9.452933859527397,
"learning_rate": 5.004595658588725e-06,
"loss": 1.2798036336898804,
"step": 2089
},
{
"epoch": 1.65086887835703,
"grad_norm": 11.716983488622807,
"learning_rate": 5e-06,
"loss": 2.5696773529052734,
"step": 2090
},
{
"epoch": 1.6516587677725119,
"grad_norm": 17.30778123295629,
"learning_rate": 4.995404341411277e-06,
"loss": 2.143465518951416,
"step": 2091
},
{
"epoch": 1.6524486571879937,
"grad_norm": 6.676595853028537,
"learning_rate": 4.990808686704979e-06,
"loss": 1.9209420680999756,
"step": 2092
},
{
"epoch": 1.6532385466034754,
"grad_norm": 10.700026843162101,
"learning_rate": 4.986213039763537e-06,
"loss": 1.3106441497802734,
"step": 2093
},
{
"epoch": 1.6540284360189572,
"grad_norm": 11.845311555733167,
"learning_rate": 4.98161740446936e-06,
"loss": 1.331827998161316,
"step": 2094
},
{
"epoch": 1.654818325434439,
"grad_norm": 14.482146662888397,
"learning_rate": 4.977021784704862e-06,
"loss": 1.2673121690750122,
"step": 2095
},
{
"epoch": 1.655608214849921,
"grad_norm": 9.96068930193819,
"learning_rate": 4.9724261843524345e-06,
"loss": 1.4080572128295898,
"step": 2096
},
{
"epoch": 1.6563981042654028,
"grad_norm": 6.81552208619803,
"learning_rate": 4.967830607294459e-06,
"loss": 1.8892409801483154,
"step": 2097
},
{
"epoch": 1.6571879936808847,
"grad_norm": 9.139351921483907,
"learning_rate": 4.963235057413292e-06,
"loss": 1.865785002708435,
"step": 2098
},
{
"epoch": 1.6579778830963665,
"grad_norm": 18.25394807894221,
"learning_rate": 4.95863953859127e-06,
"loss": 3.839024782180786,
"step": 2099
},
{
"epoch": 1.6587677725118484,
"grad_norm": 12.684731410566474,
"learning_rate": 4.9540440547107016e-06,
"loss": 1.6854069232940674,
"step": 2100
},
{
"epoch": 1.6595576619273302,
"grad_norm": 11.466195743942457,
"learning_rate": 4.9494486096538654e-06,
"loss": 0.6167169809341431,
"step": 2101
},
{
"epoch": 1.660347551342812,
"grad_norm": 11.989810253242599,
"learning_rate": 4.9448532073030125e-06,
"loss": 1.5397396087646484,
"step": 2102
},
{
"epoch": 1.661137440758294,
"grad_norm": 13.091229369852948,
"learning_rate": 4.940257851540351e-06,
"loss": 1.1000051498413086,
"step": 2103
},
{
"epoch": 1.6619273301737758,
"grad_norm": 15.278599967251001,
"learning_rate": 4.935662546248054e-06,
"loss": 1.370941162109375,
"step": 2104
},
{
"epoch": 1.6627172195892577,
"grad_norm": 8.92736303739886,
"learning_rate": 4.9310672953082486e-06,
"loss": 1.4179476499557495,
"step": 2105
},
{
"epoch": 1.6635071090047393,
"grad_norm": 7.439214281898773,
"learning_rate": 4.92647210260302e-06,
"loss": 1.1264572143554688,
"step": 2106
},
{
"epoch": 1.6642969984202212,
"grad_norm": 8.391749061486982,
"learning_rate": 4.9218769720144e-06,
"loss": 1.7352138757705688,
"step": 2107
},
{
"epoch": 1.665086887835703,
"grad_norm": 8.41116001387029,
"learning_rate": 4.917281907424371e-06,
"loss": 1.3535940647125244,
"step": 2108
},
{
"epoch": 1.6658767772511849,
"grad_norm": 9.156612347669475,
"learning_rate": 4.912686912714861e-06,
"loss": 1.4920666217803955,
"step": 2109
},
{
"epoch": 1.6666666666666665,
"grad_norm": 16.712403107809568,
"learning_rate": 4.908091991767734e-06,
"loss": 1.6276068687438965,
"step": 2110
},
{
"epoch": 1.6674565560821484,
"grad_norm": 18.944886780238278,
"learning_rate": 4.903497148464795e-06,
"loss": 1.2643494606018066,
"step": 2111
},
{
"epoch": 1.6682464454976302,
"grad_norm": 8.854420836492178,
"learning_rate": 4.898902386687782e-06,
"loss": 1.728925108909607,
"step": 2112
},
{
"epoch": 1.669036334913112,
"grad_norm": 7.998931904416969,
"learning_rate": 4.894307710318365e-06,
"loss": 1.2208718061447144,
"step": 2113
},
{
"epoch": 1.669826224328594,
"grad_norm": 11.667293171545051,
"learning_rate": 4.8897131232381435e-06,
"loss": 1.281367540359497,
"step": 2114
},
{
"epoch": 1.6706161137440758,
"grad_norm": 8.360501164751504,
"learning_rate": 4.88511862932864e-06,
"loss": 1.7427953481674194,
"step": 2115
},
{
"epoch": 1.6714060031595577,
"grad_norm": 12.84273569482612,
"learning_rate": 4.880524232471295e-06,
"loss": 1.1219735145568848,
"step": 2116
},
{
"epoch": 1.6721958925750395,
"grad_norm": 8.81213908324747,
"learning_rate": 4.875929936547472e-06,
"loss": 1.642223596572876,
"step": 2117
},
{
"epoch": 1.6729857819905214,
"grad_norm": 17.800338724408167,
"learning_rate": 4.871335745438448e-06,
"loss": 2.3190040588378906,
"step": 2118
},
{
"epoch": 1.6737756714060033,
"grad_norm": 17.22836983227111,
"learning_rate": 4.866741663025409e-06,
"loss": 1.689987063407898,
"step": 2119
},
{
"epoch": 1.674565560821485,
"grad_norm": 10.031772239297299,
"learning_rate": 4.8621476931894505e-06,
"loss": 1.2501479387283325,
"step": 2120
},
{
"epoch": 1.675355450236967,
"grad_norm": 9.091404836352739,
"learning_rate": 4.857553839811579e-06,
"loss": 1.4689991474151611,
"step": 2121
},
{
"epoch": 1.6761453396524486,
"grad_norm": 11.720644404349697,
"learning_rate": 4.85296010677269e-06,
"loss": 1.3092423677444458,
"step": 2122
},
{
"epoch": 1.6769352290679305,
"grad_norm": 9.826261602656874,
"learning_rate": 4.848366497953586e-06,
"loss": 1.6839494705200195,
"step": 2123
},
{
"epoch": 1.6777251184834123,
"grad_norm": 10.367714457357824,
"learning_rate": 4.843773017234964e-06,
"loss": 1.245840072631836,
"step": 2124
},
{
"epoch": 1.6785150078988942,
"grad_norm": 15.019641392439464,
"learning_rate": 4.839179668497413e-06,
"loss": 1.2178664207458496,
"step": 2125
},
{
"epoch": 1.6793048973143758,
"grad_norm": 13.354170916911665,
"learning_rate": 4.834586455621409e-06,
"loss": 1.5003374814987183,
"step": 2126
},
{
"epoch": 1.6800947867298577,
"grad_norm": 9.162723435056346,
"learning_rate": 4.829993382487309e-06,
"loss": 1.3886042833328247,
"step": 2127
},
{
"epoch": 1.6808846761453395,
"grad_norm": 9.551211296648892,
"learning_rate": 4.825400452975361e-06,
"loss": 1.7338354587554932,
"step": 2128
},
{
"epoch": 1.6816745655608214,
"grad_norm": 15.926343890819645,
"learning_rate": 4.820807670965683e-06,
"loss": 1.217260718345642,
"step": 2129
},
{
"epoch": 1.6824644549763033,
"grad_norm": 9.271922963812838,
"learning_rate": 4.816215040338277e-06,
"loss": 1.4944868087768555,
"step": 2130
},
{
"epoch": 1.6832543443917851,
"grad_norm": 10.041906359891817,
"learning_rate": 4.811622564973011e-06,
"loss": 1.8609442710876465,
"step": 2131
},
{
"epoch": 1.684044233807267,
"grad_norm": 10.228103619124214,
"learning_rate": 4.807030248749621e-06,
"loss": 1.6223942041397095,
"step": 2132
},
{
"epoch": 1.6848341232227488,
"grad_norm": 9.327061347477445,
"learning_rate": 4.802438095547712e-06,
"loss": 1.100557804107666,
"step": 2133
},
{
"epoch": 1.6856240126382307,
"grad_norm": 30.522951746249742,
"learning_rate": 4.7978461092467495e-06,
"loss": 1.0848801136016846,
"step": 2134
},
{
"epoch": 1.6864139020537126,
"grad_norm": 13.164129620057786,
"learning_rate": 4.793254293726061e-06,
"loss": 1.1626616716384888,
"step": 2135
},
{
"epoch": 1.6872037914691944,
"grad_norm": 21.254188334183024,
"learning_rate": 4.788662652864825e-06,
"loss": 1.7419378757476807,
"step": 2136
},
{
"epoch": 1.6879936808846763,
"grad_norm": 15.489897028057435,
"learning_rate": 4.784071190542079e-06,
"loss": 1.6035929918289185,
"step": 2137
},
{
"epoch": 1.6887835703001581,
"grad_norm": 9.477674481683636,
"learning_rate": 4.7794799106366985e-06,
"loss": 0.9086638689041138,
"step": 2138
},
{
"epoch": 1.6895734597156398,
"grad_norm": 14.744530447825799,
"learning_rate": 4.774888817027417e-06,
"loss": 2.459580183029175,
"step": 2139
},
{
"epoch": 1.6903633491311216,
"grad_norm": 10.143359754031751,
"learning_rate": 4.770297913592805e-06,
"loss": 1.5249871015548706,
"step": 2140
},
{
"epoch": 1.6911532385466035,
"grad_norm": 8.92591707625083,
"learning_rate": 4.76570720421127e-06,
"loss": 1.3436775207519531,
"step": 2141
},
{
"epoch": 1.6919431279620853,
"grad_norm": 30.798541971194087,
"learning_rate": 4.7611166927610625e-06,
"loss": 1.0415196418762207,
"step": 2142
},
{
"epoch": 1.692733017377567,
"grad_norm": 10.766454601215523,
"learning_rate": 4.756526383120258e-06,
"loss": 1.6581356525421143,
"step": 2143
},
{
"epoch": 1.6935229067930488,
"grad_norm": 10.521084763687215,
"learning_rate": 4.751936279166767e-06,
"loss": 1.5447998046875,
"step": 2144
},
{
"epoch": 1.6943127962085307,
"grad_norm": 9.436881293717475,
"learning_rate": 4.747346384778325e-06,
"loss": 2.1874170303344727,
"step": 2145
},
{
"epoch": 1.6951026856240126,
"grad_norm": 9.945491755562474,
"learning_rate": 4.7427567038324884e-06,
"loss": 1.6179015636444092,
"step": 2146
},
{
"epoch": 1.6958925750394944,
"grad_norm": 20.026946174142626,
"learning_rate": 4.738167240206637e-06,
"loss": 1.4960978031158447,
"step": 2147
},
{
"epoch": 1.6966824644549763,
"grad_norm": 9.92767129752773,
"learning_rate": 4.733577997777963e-06,
"loss": 1.0159810781478882,
"step": 2148
},
{
"epoch": 1.6974723538704581,
"grad_norm": 8.419076351215592,
"learning_rate": 4.728988980423473e-06,
"loss": 1.4090895652770996,
"step": 2149
},
{
"epoch": 1.69826224328594,
"grad_norm": 8.367910062718474,
"learning_rate": 4.724400192019983e-06,
"loss": 1.2694896459579468,
"step": 2150
},
{
"epoch": 1.6990521327014219,
"grad_norm": 15.475743745207783,
"learning_rate": 4.71981163644412e-06,
"loss": 1.5766160488128662,
"step": 2151
},
{
"epoch": 1.6998420221169037,
"grad_norm": 8.610091370022774,
"learning_rate": 4.715223317572309e-06,
"loss": 1.2474552392959595,
"step": 2152
},
{
"epoch": 1.7006319115323856,
"grad_norm": 8.616644887482238,
"learning_rate": 4.7106352392807794e-06,
"loss": 1.4365007877349854,
"step": 2153
},
{
"epoch": 1.7014218009478674,
"grad_norm": 12.289373525187166,
"learning_rate": 4.70604740544555e-06,
"loss": 1.2263505458831787,
"step": 2154
},
{
"epoch": 1.7022116903633493,
"grad_norm": 14.493463927125523,
"learning_rate": 4.701459819942441e-06,
"loss": 1.1191456317901611,
"step": 2155
},
{
"epoch": 1.703001579778831,
"grad_norm": 11.091605026430587,
"learning_rate": 4.696872486647059e-06,
"loss": 1.8349318504333496,
"step": 2156
},
{
"epoch": 1.7037914691943128,
"grad_norm": 13.242011895533519,
"learning_rate": 4.692285409434797e-06,
"loss": 1.2665749788284302,
"step": 2157
},
{
"epoch": 1.7045813586097947,
"grad_norm": 12.97363928423666,
"learning_rate": 4.6876985921808365e-06,
"loss": 1.3644407987594604,
"step": 2158
},
{
"epoch": 1.7053712480252765,
"grad_norm": 9.838108998315606,
"learning_rate": 4.683112038760135e-06,
"loss": 1.3256113529205322,
"step": 2159
},
{
"epoch": 1.7061611374407581,
"grad_norm": 11.584707197962354,
"learning_rate": 4.6785257530474244e-06,
"loss": 1.4737862348556519,
"step": 2160
},
{
"epoch": 1.70695102685624,
"grad_norm": 13.735300921932108,
"learning_rate": 4.673939738917216e-06,
"loss": 1.6939847469329834,
"step": 2161
},
{
"epoch": 1.7077409162717219,
"grad_norm": 9.701613005896544,
"learning_rate": 4.669354000243791e-06,
"loss": 1.218980312347412,
"step": 2162
},
{
"epoch": 1.7085308056872037,
"grad_norm": 16.55205261891123,
"learning_rate": 4.664768540901194e-06,
"loss": 1.3491718769073486,
"step": 2163
},
{
"epoch": 1.7093206951026856,
"grad_norm": 10.899568751624019,
"learning_rate": 4.66018336476324e-06,
"loss": 1.0617191791534424,
"step": 2164
},
{
"epoch": 1.7101105845181674,
"grad_norm": 31.659285393659797,
"learning_rate": 4.655598475703498e-06,
"loss": 1.6565725803375244,
"step": 2165
},
{
"epoch": 1.7109004739336493,
"grad_norm": 14.401725841266487,
"learning_rate": 4.651013877595296e-06,
"loss": 1.4930999279022217,
"step": 2166
},
{
"epoch": 1.7116903633491312,
"grad_norm": 16.299266500455296,
"learning_rate": 4.64642957431172e-06,
"loss": 0.774669885635376,
"step": 2167
},
{
"epoch": 1.712480252764613,
"grad_norm": 19.156771040259898,
"learning_rate": 4.641845569725605e-06,
"loss": 2.0019631385803223,
"step": 2168
},
{
"epoch": 1.7132701421800949,
"grad_norm": 17.22778044854949,
"learning_rate": 4.63726186770953e-06,
"loss": 1.3190504312515259,
"step": 2169
},
{
"epoch": 1.7140600315955767,
"grad_norm": 9.853391429883748,
"learning_rate": 4.6326784721358255e-06,
"loss": 1.7607496976852417,
"step": 2170
},
{
"epoch": 1.7148499210110586,
"grad_norm": 11.600650539403235,
"learning_rate": 4.628095386876557e-06,
"loss": 1.8006988763809204,
"step": 2171
},
{
"epoch": 1.7156398104265402,
"grad_norm": 9.816615664918947,
"learning_rate": 4.623512615803531e-06,
"loss": 1.423611044883728,
"step": 2172
},
{
"epoch": 1.716429699842022,
"grad_norm": 11.105593098350882,
"learning_rate": 4.618930162788284e-06,
"loss": 1.4016926288604736,
"step": 2173
},
{
"epoch": 1.717219589257504,
"grad_norm": 9.31423831663281,
"learning_rate": 4.614348031702093e-06,
"loss": 1.490910291671753,
"step": 2174
},
{
"epoch": 1.7180094786729858,
"grad_norm": 9.65485402130286,
"learning_rate": 4.609766226415955e-06,
"loss": 1.4671694040298462,
"step": 2175
},
{
"epoch": 1.7187993680884674,
"grad_norm": 8.831389046526823,
"learning_rate": 4.605184750800594e-06,
"loss": 1.1502845287322998,
"step": 2176
},
{
"epoch": 1.7195892575039493,
"grad_norm": 11.97525173977403,
"learning_rate": 4.6006036087264544e-06,
"loss": 1.1983712911605835,
"step": 2177
},
{
"epoch": 1.7203791469194312,
"grad_norm": 7.344974809829897,
"learning_rate": 4.596022804063701e-06,
"loss": 1.7621641159057617,
"step": 2178
},
{
"epoch": 1.721169036334913,
"grad_norm": 8.826471148662039,
"learning_rate": 4.591442340682214e-06,
"loss": 1.350406289100647,
"step": 2179
},
{
"epoch": 1.7219589257503949,
"grad_norm": 14.464898797136621,
"learning_rate": 4.586862222451582e-06,
"loss": 1.644295334815979,
"step": 2180
},
{
"epoch": 1.7227488151658767,
"grad_norm": 14.126992669178419,
"learning_rate": 4.582282453241108e-06,
"loss": 1.1674833297729492,
"step": 2181
},
{
"epoch": 1.7235387045813586,
"grad_norm": 15.220420323290032,
"learning_rate": 4.5777030369197895e-06,
"loss": 1.2685773372650146,
"step": 2182
},
{
"epoch": 1.7243285939968405,
"grad_norm": 9.673306531164522,
"learning_rate": 4.573123977356337e-06,
"loss": 1.3816874027252197,
"step": 2183
},
{
"epoch": 1.7251184834123223,
"grad_norm": 10.109225928810197,
"learning_rate": 4.568545278419154e-06,
"loss": 0.9319192171096802,
"step": 2184
},
{
"epoch": 1.7259083728278042,
"grad_norm": 9.14802501607995,
"learning_rate": 4.56396694397634e-06,
"loss": 1.8452692031860352,
"step": 2185
},
{
"epoch": 1.726698262243286,
"grad_norm": 12.908130900260579,
"learning_rate": 4.55938897789569e-06,
"loss": 1.1169474124908447,
"step": 2186
},
{
"epoch": 1.727488151658768,
"grad_norm": 13.515379103637612,
"learning_rate": 4.554811384044681e-06,
"loss": 2.0045344829559326,
"step": 2187
},
{
"epoch": 1.7282780410742498,
"grad_norm": 14.12974423042282,
"learning_rate": 4.550234166290481e-06,
"loss": 1.453951358795166,
"step": 2188
},
{
"epoch": 1.7290679304897314,
"grad_norm": 11.23753254826144,
"learning_rate": 4.545657328499937e-06,
"loss": 1.3621933460235596,
"step": 2189
},
{
"epoch": 1.7298578199052133,
"grad_norm": 13.330762458315919,
"learning_rate": 4.541080874539579e-06,
"loss": 2.1850600242614746,
"step": 2190
},
{
"epoch": 1.7306477093206951,
"grad_norm": 15.273321036263173,
"learning_rate": 4.5365048082756095e-06,
"loss": 2.321899890899658,
"step": 2191
},
{
"epoch": 1.731437598736177,
"grad_norm": 10.112662361275033,
"learning_rate": 4.531929133573906e-06,
"loss": 1.4877285957336426,
"step": 2192
},
{
"epoch": 1.7322274881516586,
"grad_norm": 12.097152921819895,
"learning_rate": 4.5273538543000095e-06,
"loss": 1.2780163288116455,
"step": 2193
},
{
"epoch": 1.7330173775671405,
"grad_norm": 10.432317615119704,
"learning_rate": 4.522778974319133e-06,
"loss": 1.5664427280426025,
"step": 2194
},
{
"epoch": 1.7338072669826223,
"grad_norm": 7.912507954338899,
"learning_rate": 4.518204497496151e-06,
"loss": 1.6642968654632568,
"step": 2195
},
{
"epoch": 1.7345971563981042,
"grad_norm": 11.522163423849092,
"learning_rate": 4.513630427695597e-06,
"loss": 2.0236799716949463,
"step": 2196
},
{
"epoch": 1.735387045813586,
"grad_norm": 9.953485109546435,
"learning_rate": 4.50905676878166e-06,
"loss": 2.1658871173858643,
"step": 2197
},
{
"epoch": 1.736176935229068,
"grad_norm": 13.055500452513753,
"learning_rate": 4.504483524618179e-06,
"loss": 0.8655682802200317,
"step": 2198
},
{
"epoch": 1.7369668246445498,
"grad_norm": 10.348280690590556,
"learning_rate": 4.499910699068649e-06,
"loss": 1.5977658033370972,
"step": 2199
},
{
"epoch": 1.7377567140600316,
"grad_norm": 9.252649833434882,
"learning_rate": 4.495338295996208e-06,
"loss": 1.5957226753234863,
"step": 2200
},
{
"epoch": 1.7385466034755135,
"grad_norm": 11.994815495201612,
"learning_rate": 4.4907663192636345e-06,
"loss": 0.9464290738105774,
"step": 2201
},
{
"epoch": 1.7393364928909953,
"grad_norm": 8.819889689255204,
"learning_rate": 4.486194772733356e-06,
"loss": 1.736267328262329,
"step": 2202
},
{
"epoch": 1.7401263823064772,
"grad_norm": 10.89375250297425,
"learning_rate": 4.4816236602674204e-06,
"loss": 1.5301947593688965,
"step": 2203
},
{
"epoch": 1.740916271721959,
"grad_norm": 28.361460541379106,
"learning_rate": 4.477052985727525e-06,
"loss": 2.966822385787964,
"step": 2204
},
{
"epoch": 1.741706161137441,
"grad_norm": 16.527934269673235,
"learning_rate": 4.47248275297499e-06,
"loss": 1.2912685871124268,
"step": 2205
},
{
"epoch": 1.7424960505529226,
"grad_norm": 7.535472217731121,
"learning_rate": 4.467912965870761e-06,
"loss": 1.1335291862487793,
"step": 2206
},
{
"epoch": 1.7432859399684044,
"grad_norm": 8.2905986876192,
"learning_rate": 4.463343628275412e-06,
"loss": 1.4929556846618652,
"step": 2207
},
{
"epoch": 1.7440758293838863,
"grad_norm": 12.59336480489856,
"learning_rate": 4.458774744049134e-06,
"loss": 1.0215003490447998,
"step": 2208
},
{
"epoch": 1.7448657187993681,
"grad_norm": 10.41128966672044,
"learning_rate": 4.454206317051734e-06,
"loss": 1.46480393409729,
"step": 2209
},
{
"epoch": 1.7456556082148498,
"grad_norm": 10.615522798499182,
"learning_rate": 4.449638351142632e-06,
"loss": 1.465099811553955,
"step": 2210
},
{
"epoch": 1.7464454976303316,
"grad_norm": 9.897944735916337,
"learning_rate": 4.445070850180865e-06,
"loss": 1.993574857711792,
"step": 2211
},
{
"epoch": 1.7472353870458135,
"grad_norm": 11.844266382618313,
"learning_rate": 4.4405038180250715e-06,
"loss": 1.3619449138641357,
"step": 2212
},
{
"epoch": 1.7480252764612954,
"grad_norm": 8.226740624728182,
"learning_rate": 4.435937258533496e-06,
"loss": 1.619173288345337,
"step": 2213
},
{
"epoch": 1.7488151658767772,
"grad_norm": 11.343296287634017,
"learning_rate": 4.43137117556398e-06,
"loss": 1.3998527526855469,
"step": 2214
},
{
"epoch": 1.749605055292259,
"grad_norm": 13.493219686349496,
"learning_rate": 4.426805572973968e-06,
"loss": 1.625508427619934,
"step": 2215
},
{
"epoch": 1.750394944707741,
"grad_norm": 12.38010034938405,
"learning_rate": 4.422240454620496e-06,
"loss": 2.122622013092041,
"step": 2216
},
{
"epoch": 1.7511848341232228,
"grad_norm": 14.980457953037133,
"learning_rate": 4.4176758243601885e-06,
"loss": 1.5803240537643433,
"step": 2217
},
{
"epoch": 1.7519747235387046,
"grad_norm": 9.389237583408196,
"learning_rate": 4.413111686049264e-06,
"loss": 1.1918928623199463,
"step": 2218
},
{
"epoch": 1.7527646129541865,
"grad_norm": 10.030002557757358,
"learning_rate": 4.40854804354352e-06,
"loss": 0.9838066101074219,
"step": 2219
},
{
"epoch": 1.7535545023696684,
"grad_norm": 9.03857007330401,
"learning_rate": 4.403984900698336e-06,
"loss": 1.4722139835357666,
"step": 2220
},
{
"epoch": 1.7543443917851502,
"grad_norm": 13.96770687161923,
"learning_rate": 4.399422261368671e-06,
"loss": 1.4021885395050049,
"step": 2221
},
{
"epoch": 1.7551342812006319,
"grad_norm": 8.562842515589852,
"learning_rate": 4.394860129409056e-06,
"loss": 1.6055982112884521,
"step": 2222
},
{
"epoch": 1.7559241706161137,
"grad_norm": 16.556021076354195,
"learning_rate": 4.390298508673596e-06,
"loss": 1.7816779613494873,
"step": 2223
},
{
"epoch": 1.7567140600315956,
"grad_norm": 12.43470769219723,
"learning_rate": 4.385737403015967e-06,
"loss": 1.5340075492858887,
"step": 2224
},
{
"epoch": 1.7575039494470774,
"grad_norm": 24.066858353519663,
"learning_rate": 4.3811768162894e-06,
"loss": 1.3754091262817383,
"step": 2225
},
{
"epoch": 1.758293838862559,
"grad_norm": 11.278622345644811,
"learning_rate": 4.376616752346696e-06,
"loss": 1.6019980907440186,
"step": 2226
},
{
"epoch": 1.759083728278041,
"grad_norm": 9.564683161114333,
"learning_rate": 4.372057215040213e-06,
"loss": 1.9771608114242554,
"step": 2227
},
{
"epoch": 1.7598736176935228,
"grad_norm": 12.195360323247021,
"learning_rate": 4.367498208221863e-06,
"loss": 1.3690104484558105,
"step": 2228
},
{
"epoch": 1.7606635071090047,
"grad_norm": 13.65143464575961,
"learning_rate": 4.362939735743108e-06,
"loss": 1.7435321807861328,
"step": 2229
},
{
"epoch": 1.7614533965244865,
"grad_norm": 14.339262082940179,
"learning_rate": 4.358381801454966e-06,
"loss": 0.8703070878982544,
"step": 2230
},
{
"epoch": 1.7622432859399684,
"grad_norm": 9.781836635217395,
"learning_rate": 4.353824409207988e-06,
"loss": 2.1449623107910156,
"step": 2231
},
{
"epoch": 1.7630331753554502,
"grad_norm": 9.901685682067114,
"learning_rate": 4.349267562852276e-06,
"loss": 1.5492500066757202,
"step": 2232
},
{
"epoch": 1.763823064770932,
"grad_norm": 11.408200068592867,
"learning_rate": 4.34471126623747e-06,
"loss": 1.4743053913116455,
"step": 2233
},
{
"epoch": 1.764612954186414,
"grad_norm": 11.620081956175499,
"learning_rate": 4.340155523212746e-06,
"loss": 1.495714545249939,
"step": 2234
},
{
"epoch": 1.7654028436018958,
"grad_norm": 12.144719137720987,
"learning_rate": 4.335600337626809e-06,
"loss": 1.1870977878570557,
"step": 2235
},
{
"epoch": 1.7661927330173777,
"grad_norm": 10.073805141373832,
"learning_rate": 4.331045713327894e-06,
"loss": 1.2681677341461182,
"step": 2236
},
{
"epoch": 1.7669826224328595,
"grad_norm": 13.71543165458782,
"learning_rate": 4.326491654163762e-06,
"loss": 1.1395235061645508,
"step": 2237
},
{
"epoch": 1.7677725118483414,
"grad_norm": 13.755051603926155,
"learning_rate": 4.321938163981699e-06,
"loss": 1.4418907165527344,
"step": 2238
},
{
"epoch": 1.768562401263823,
"grad_norm": 12.213862776654832,
"learning_rate": 4.317385246628508e-06,
"loss": 1.602990746498108,
"step": 2239
},
{
"epoch": 1.7693522906793049,
"grad_norm": 13.831956811214068,
"learning_rate": 4.312832905950509e-06,
"loss": 1.280178427696228,
"step": 2240
},
{
"epoch": 1.7701421800947867,
"grad_norm": 12.777246523961239,
"learning_rate": 4.308281145793535e-06,
"loss": 1.7366316318511963,
"step": 2241
},
{
"epoch": 1.7709320695102686,
"grad_norm": 7.280612489287066,
"learning_rate": 4.303729970002924e-06,
"loss": 1.6962597370147705,
"step": 2242
},
{
"epoch": 1.7717219589257502,
"grad_norm": 13.907639824752652,
"learning_rate": 4.2991793824235286e-06,
"loss": 1.6384978294372559,
"step": 2243
},
{
"epoch": 1.772511848341232,
"grad_norm": 11.962864332754167,
"learning_rate": 4.294629386899699e-06,
"loss": 1.5645751953125,
"step": 2244
},
{
"epoch": 1.773301737756714,
"grad_norm": 10.40002219488319,
"learning_rate": 4.290079987275285e-06,
"loss": 1.8200668096542358,
"step": 2245
},
{
"epoch": 1.7740916271721958,
"grad_norm": 7.796350477300916,
"learning_rate": 4.285531187393639e-06,
"loss": 1.8340072631835938,
"step": 2246
},
{
"epoch": 1.7748815165876777,
"grad_norm": 14.305874479858872,
"learning_rate": 4.280982991097594e-06,
"loss": 1.3229985237121582,
"step": 2247
},
{
"epoch": 1.7756714060031595,
"grad_norm": 17.06836253359979,
"learning_rate": 4.276435402229488e-06,
"loss": 1.6926765441894531,
"step": 2248
},
{
"epoch": 1.7764612954186414,
"grad_norm": 14.834597218782198,
"learning_rate": 4.271888424631137e-06,
"loss": 1.4247950315475464,
"step": 2249
},
{
"epoch": 1.7772511848341233,
"grad_norm": 12.619750807648803,
"learning_rate": 4.267342062143841e-06,
"loss": 1.1141537427902222,
"step": 2250
},
{
"epoch": 1.7780410742496051,
"grad_norm": 12.049893792135833,
"learning_rate": 4.2627963186083844e-06,
"loss": 1.3787682056427002,
"step": 2251
},
{
"epoch": 1.778830963665087,
"grad_norm": 17.958477722896514,
"learning_rate": 4.258251197865028e-06,
"loss": 1.4096425771713257,
"step": 2252
},
{
"epoch": 1.7796208530805688,
"grad_norm": 10.777731590166498,
"learning_rate": 4.253706703753499e-06,
"loss": 1.2030799388885498,
"step": 2253
},
{
"epoch": 1.7804107424960507,
"grad_norm": 17.628412607375658,
"learning_rate": 4.249162840113005e-06,
"loss": 2.639885425567627,
"step": 2254
},
{
"epoch": 1.7812006319115326,
"grad_norm": 14.704071301126223,
"learning_rate": 4.244619610782216e-06,
"loss": 1.2214397192001343,
"step": 2255
},
{
"epoch": 1.7819905213270142,
"grad_norm": 12.269537431737808,
"learning_rate": 4.240077019599268e-06,
"loss": 0.9519909620285034,
"step": 2256
},
{
"epoch": 1.782780410742496,
"grad_norm": 7.752341256018781,
"learning_rate": 4.235535070401757e-06,
"loss": 1.37101149559021,
"step": 2257
},
{
"epoch": 1.783570300157978,
"grad_norm": 16.293151882011077,
"learning_rate": 4.2309937670267355e-06,
"loss": 1.9549283981323242,
"step": 2258
},
{
"epoch": 1.7843601895734598,
"grad_norm": 12.578113105487619,
"learning_rate": 4.22645311331071e-06,
"loss": 1.6407248973846436,
"step": 2259
},
{
"epoch": 1.7851500789889414,
"grad_norm": 11.529806537339917,
"learning_rate": 4.221913113089643e-06,
"loss": 1.6672661304473877,
"step": 2260
},
{
"epoch": 1.7859399684044233,
"grad_norm": 10.540110804631544,
"learning_rate": 4.217373770198939e-06,
"loss": 1.268946647644043,
"step": 2261
},
{
"epoch": 1.7867298578199051,
"grad_norm": 12.276750271201358,
"learning_rate": 4.212835088473455e-06,
"loss": 1.6153327226638794,
"step": 2262
},
{
"epoch": 1.787519747235387,
"grad_norm": 8.08704746323851,
"learning_rate": 4.208297071747476e-06,
"loss": 2.285081386566162,
"step": 2263
},
{
"epoch": 1.7883096366508688,
"grad_norm": 18.248267446477442,
"learning_rate": 4.20375972385474e-06,
"loss": 1.9024407863616943,
"step": 2264
},
{
"epoch": 1.7890995260663507,
"grad_norm": 9.775614166254378,
"learning_rate": 4.1992230486284105e-06,
"loss": 1.5444855690002441,
"step": 2265
},
{
"epoch": 1.7898894154818326,
"grad_norm": 13.515175274258306,
"learning_rate": 4.194687049901086e-06,
"loss": 1.2774243354797363,
"step": 2266
},
{
"epoch": 1.7906793048973144,
"grad_norm": 12.940831969941822,
"learning_rate": 4.190151731504795e-06,
"loss": 1.5125453472137451,
"step": 2267
},
{
"epoch": 1.7914691943127963,
"grad_norm": 9.973015494639506,
"learning_rate": 4.185617097270989e-06,
"loss": 1.5182291269302368,
"step": 2268
},
{
"epoch": 1.7922590837282781,
"grad_norm": 10.794888121500216,
"learning_rate": 4.181083151030539e-06,
"loss": 0.991271436214447,
"step": 2269
},
{
"epoch": 1.79304897314376,
"grad_norm": 23.58963526924935,
"learning_rate": 4.176549896613739e-06,
"loss": 1.3529629707336426,
"step": 2270
},
{
"epoch": 1.7938388625592419,
"grad_norm": 15.70406604388244,
"learning_rate": 4.1720173378503e-06,
"loss": 1.1336208581924438,
"step": 2271
},
{
"epoch": 1.7946287519747235,
"grad_norm": 8.10592167127099,
"learning_rate": 4.1674854785693395e-06,
"loss": 1.4463564157485962,
"step": 2272
},
{
"epoch": 1.7954186413902053,
"grad_norm": 16.247088268768564,
"learning_rate": 4.162954322599389e-06,
"loss": 1.5160022974014282,
"step": 2273
},
{
"epoch": 1.7962085308056872,
"grad_norm": 14.65045540543928,
"learning_rate": 4.158423873768382e-06,
"loss": 1.3699758052825928,
"step": 2274
},
{
"epoch": 1.796998420221169,
"grad_norm": 8.615488903760822,
"learning_rate": 4.153894135903658e-06,
"loss": 1.9635517597198486,
"step": 2275
},
{
"epoch": 1.7977883096366507,
"grad_norm": 7.489719358192559,
"learning_rate": 4.149365112831954e-06,
"loss": 1.8137181997299194,
"step": 2276
},
{
"epoch": 1.7985781990521326,
"grad_norm": 9.694581073663853,
"learning_rate": 4.144836808379404e-06,
"loss": 1.3416333198547363,
"step": 2277
},
{
"epoch": 1.7993680884676144,
"grad_norm": 12.289624728131038,
"learning_rate": 4.140309226371534e-06,
"loss": 1.53472900390625,
"step": 2278
},
{
"epoch": 1.8001579778830963,
"grad_norm": 11.175578971485294,
"learning_rate": 4.135782370633263e-06,
"loss": 1.6156749725341797,
"step": 2279
},
{
"epoch": 1.8009478672985781,
"grad_norm": 9.238967112564058,
"learning_rate": 4.131256244988888e-06,
"loss": 0.7965636253356934,
"step": 2280
},
{
"epoch": 1.80173775671406,
"grad_norm": 12.814521660266232,
"learning_rate": 4.1267308532621e-06,
"loss": 1.129547357559204,
"step": 2281
},
{
"epoch": 1.8025276461295419,
"grad_norm": 15.163564996840357,
"learning_rate": 4.122206199275961e-06,
"loss": 1.5986425876617432,
"step": 2282
},
{
"epoch": 1.8033175355450237,
"grad_norm": 12.370089121691372,
"learning_rate": 4.117682286852916e-06,
"loss": 1.1130129098892212,
"step": 2283
},
{
"epoch": 1.8041074249605056,
"grad_norm": 7.3337703985283325,
"learning_rate": 4.1131591198147825e-06,
"loss": 1.4250520467758179,
"step": 2284
},
{
"epoch": 1.8048973143759874,
"grad_norm": 15.555243696941096,
"learning_rate": 4.108636701982744e-06,
"loss": 1.0984294414520264,
"step": 2285
},
{
"epoch": 1.8056872037914693,
"grad_norm": 11.624250828595164,
"learning_rate": 4.104115037177354e-06,
"loss": 1.8641960620880127,
"step": 2286
},
{
"epoch": 1.8064770932069512,
"grad_norm": 8.641764907398887,
"learning_rate": 4.09959412921853e-06,
"loss": 2.202810287475586,
"step": 2287
},
{
"epoch": 1.807266982622433,
"grad_norm": 10.272414502220078,
"learning_rate": 4.0950739819255516e-06,
"loss": 1.1767101287841797,
"step": 2288
},
{
"epoch": 1.8080568720379147,
"grad_norm": 10.73523808838913,
"learning_rate": 4.090554599117053e-06,
"loss": 1.3976107835769653,
"step": 2289
},
{
"epoch": 1.8088467614533965,
"grad_norm": 9.029022789020436,
"learning_rate": 4.086035984611026e-06,
"loss": 1.4328737258911133,
"step": 2290
},
{
"epoch": 1.8096366508688784,
"grad_norm": 12.75831158267409,
"learning_rate": 4.081518142224806e-06,
"loss": 1.5775648355484009,
"step": 2291
},
{
"epoch": 1.8104265402843602,
"grad_norm": 8.797489371278768,
"learning_rate": 4.077001075775085e-06,
"loss": 1.8569629192352295,
"step": 2292
},
{
"epoch": 1.8112164296998419,
"grad_norm": 19.2818864408269,
"learning_rate": 4.072484789077894e-06,
"loss": 1.569921851158142,
"step": 2293
},
{
"epoch": 1.8120063191153237,
"grad_norm": 9.606639381333586,
"learning_rate": 4.0679692859486076e-06,
"loss": 1.5646262168884277,
"step": 2294
},
{
"epoch": 1.8127962085308056,
"grad_norm": 15.982700710777124,
"learning_rate": 4.06345457020194e-06,
"loss": 1.6539026498794556,
"step": 2295
},
{
"epoch": 1.8135860979462874,
"grad_norm": 11.758946943159149,
"learning_rate": 4.0589406456519335e-06,
"loss": 2.3593804836273193,
"step": 2296
},
{
"epoch": 1.8143759873617693,
"grad_norm": 10.648338644534252,
"learning_rate": 4.054427516111968e-06,
"loss": 1.4271035194396973,
"step": 2297
},
{
"epoch": 1.8151658767772512,
"grad_norm": 13.889260348670108,
"learning_rate": 4.049915185394751e-06,
"loss": 1.5787549018859863,
"step": 2298
},
{
"epoch": 1.815955766192733,
"grad_norm": 11.265998620973773,
"learning_rate": 4.045403657312313e-06,
"loss": 1.2817461490631104,
"step": 2299
},
{
"epoch": 1.8167456556082149,
"grad_norm": 15.757768299774062,
"learning_rate": 4.040892935676011e-06,
"loss": 1.6207523345947266,
"step": 2300
},
{
"epoch": 1.8175355450236967,
"grad_norm": 10.598445710266809,
"learning_rate": 4.036383024296515e-06,
"loss": 1.390448808670044,
"step": 2301
},
{
"epoch": 1.8183254344391786,
"grad_norm": 10.708487190144936,
"learning_rate": 4.031873926983813e-06,
"loss": 1.3507099151611328,
"step": 2302
},
{
"epoch": 1.8191153238546605,
"grad_norm": 13.04295029302765,
"learning_rate": 4.027365647547204e-06,
"loss": 1.3747905492782593,
"step": 2303
},
{
"epoch": 1.8199052132701423,
"grad_norm": 15.06243045733676,
"learning_rate": 4.0228581897953e-06,
"loss": 1.7040674686431885,
"step": 2304
},
{
"epoch": 1.8206951026856242,
"grad_norm": 8.112658972064233,
"learning_rate": 4.018351557536015e-06,
"loss": 1.1987684965133667,
"step": 2305
},
{
"epoch": 1.8214849921011058,
"grad_norm": 6.926031154500966,
"learning_rate": 4.013845754576567e-06,
"loss": 1.538601040840149,
"step": 2306
},
{
"epoch": 1.8222748815165877,
"grad_norm": 7.86014809309979,
"learning_rate": 4.0093407847234725e-06,
"loss": 1.5583560466766357,
"step": 2307
},
{
"epoch": 1.8230647709320695,
"grad_norm": 14.5747224324332,
"learning_rate": 4.004836651782545e-06,
"loss": 1.8234143257141113,
"step": 2308
},
{
"epoch": 1.8238546603475514,
"grad_norm": 13.904991074728608,
"learning_rate": 4.000333359558891e-06,
"loss": 1.4908664226531982,
"step": 2309
},
{
"epoch": 1.824644549763033,
"grad_norm": 10.205474451588882,
"learning_rate": 3.995830911856907e-06,
"loss": 1.1028133630752563,
"step": 2310
},
{
"epoch": 1.825434439178515,
"grad_norm": 10.206650398885225,
"learning_rate": 3.991329312480275e-06,
"loss": 0.9024048447608948,
"step": 2311
},
{
"epoch": 1.8262243285939967,
"grad_norm": 20.427132609768222,
"learning_rate": 3.986828565231963e-06,
"loss": 1.241244912147522,
"step": 2312
},
{
"epoch": 1.8270142180094786,
"grad_norm": 5.947375976678703,
"learning_rate": 3.982328673914215e-06,
"loss": 1.3205927610397339,
"step": 2313
},
{
"epoch": 1.8278041074249605,
"grad_norm": 11.631133599224034,
"learning_rate": 3.977829642328553e-06,
"loss": 1.45220947265625,
"step": 2314
},
{
"epoch": 1.8285939968404423,
"grad_norm": 30.658181570348717,
"learning_rate": 3.9733314742757755e-06,
"loss": 2.151265859603882,
"step": 2315
},
{
"epoch": 1.8293838862559242,
"grad_norm": 14.081842026874503,
"learning_rate": 3.96883417355595e-06,
"loss": 1.9974944591522217,
"step": 2316
},
{
"epoch": 1.830173775671406,
"grad_norm": 14.732530661028573,
"learning_rate": 3.964337743968411e-06,
"loss": 1.3346126079559326,
"step": 2317
},
{
"epoch": 1.830963665086888,
"grad_norm": 11.657064075884962,
"learning_rate": 3.959842189311756e-06,
"loss": 1.1636943817138672,
"step": 2318
},
{
"epoch": 1.8317535545023698,
"grad_norm": 21.842024915662858,
"learning_rate": 3.955347513383842e-06,
"loss": 2.715574264526367,
"step": 2319
},
{
"epoch": 1.8325434439178516,
"grad_norm": 8.945848850752133,
"learning_rate": 3.950853719981792e-06,
"loss": 1.8645424842834473,
"step": 2320
},
{
"epoch": 1.8333333333333335,
"grad_norm": 7.363041428428467,
"learning_rate": 3.946360812901973e-06,
"loss": 2.0045013427734375,
"step": 2321
},
{
"epoch": 1.8341232227488151,
"grad_norm": 14.121661847109614,
"learning_rate": 3.941868795940008e-06,
"loss": 1.6329420804977417,
"step": 2322
},
{
"epoch": 1.834913112164297,
"grad_norm": 12.076873368108538,
"learning_rate": 3.937377672890771e-06,
"loss": 1.9567821025848389,
"step": 2323
},
{
"epoch": 1.8357030015797788,
"grad_norm": 9.278188737515212,
"learning_rate": 3.932887447548373e-06,
"loss": 1.7651054859161377,
"step": 2324
},
{
"epoch": 1.8364928909952607,
"grad_norm": 7.783361269922115,
"learning_rate": 3.928398123706174e-06,
"loss": 1.2713581323623657,
"step": 2325
},
{
"epoch": 1.8372827804107423,
"grad_norm": 10.329079837290895,
"learning_rate": 3.923909705156768e-06,
"loss": 1.2148998975753784,
"step": 2326
},
{
"epoch": 1.8380726698262242,
"grad_norm": 8.376184643701944,
"learning_rate": 3.919422195691987e-06,
"loss": 1.1715630292892456,
"step": 2327
},
{
"epoch": 1.838862559241706,
"grad_norm": 9.805258629191533,
"learning_rate": 3.9149355991028955e-06,
"loss": 1.2779135704040527,
"step": 2328
},
{
"epoch": 1.839652448657188,
"grad_norm": 14.601491970830052,
"learning_rate": 3.910449919179782e-06,
"loss": 1.2939941883087158,
"step": 2329
},
{
"epoch": 1.8404423380726698,
"grad_norm": 9.430037846938918,
"learning_rate": 3.905965159712164e-06,
"loss": 1.3495118618011475,
"step": 2330
},
{
"epoch": 1.8412322274881516,
"grad_norm": 10.818117159129248,
"learning_rate": 3.901481324488781e-06,
"loss": 1.3297629356384277,
"step": 2331
},
{
"epoch": 1.8420221169036335,
"grad_norm": 11.805518524216797,
"learning_rate": 3.896998417297593e-06,
"loss": 1.4001942873001099,
"step": 2332
},
{
"epoch": 1.8428120063191153,
"grad_norm": 17.217093882822702,
"learning_rate": 3.892516441925776e-06,
"loss": 1.0463072061538696,
"step": 2333
},
{
"epoch": 1.8436018957345972,
"grad_norm": 10.365237443982673,
"learning_rate": 3.888035402159713e-06,
"loss": 0.9372630715370178,
"step": 2334
},
{
"epoch": 1.844391785150079,
"grad_norm": 14.0678966475704,
"learning_rate": 3.883555301785005e-06,
"loss": 1.43800950050354,
"step": 2335
},
{
"epoch": 1.845181674565561,
"grad_norm": 11.354835493412002,
"learning_rate": 3.879076144586455e-06,
"loss": 1.584133505821228,
"step": 2336
},
{
"epoch": 1.8459715639810428,
"grad_norm": 9.595921862598702,
"learning_rate": 3.874597934348069e-06,
"loss": 1.214270830154419,
"step": 2337
},
{
"epoch": 1.8467614533965246,
"grad_norm": 16.11156740518648,
"learning_rate": 3.870120674853053e-06,
"loss": 1.300584316253662,
"step": 2338
},
{
"epoch": 1.8475513428120063,
"grad_norm": 10.555883298615067,
"learning_rate": 3.865644369883814e-06,
"loss": 1.2705886363983154,
"step": 2339
},
{
"epoch": 1.8483412322274881,
"grad_norm": 13.361585040619424,
"learning_rate": 3.861169023221944e-06,
"loss": 1.4234365224838257,
"step": 2340
},
{
"epoch": 1.84913112164297,
"grad_norm": 8.97574341942341,
"learning_rate": 3.856694638648235e-06,
"loss": 2.0097668170928955,
"step": 2341
},
{
"epoch": 1.8499210110584519,
"grad_norm": 19.363394658374865,
"learning_rate": 3.85222121994266e-06,
"loss": 2.359208106994629,
"step": 2342
},
{
"epoch": 1.8507109004739335,
"grad_norm": 14.425059595509914,
"learning_rate": 3.847748770884376e-06,
"loss": 1.24526047706604,
"step": 2343
},
{
"epoch": 1.8515007898894154,
"grad_norm": 16.95844235432838,
"learning_rate": 3.8432772952517285e-06,
"loss": 1.2295589447021484,
"step": 2344
},
{
"epoch": 1.8522906793048972,
"grad_norm": 17.76856930002662,
"learning_rate": 3.8388067968222285e-06,
"loss": 1.4502266645431519,
"step": 2345
},
{
"epoch": 1.853080568720379,
"grad_norm": 9.344678461709814,
"learning_rate": 3.834337279372571e-06,
"loss": 1.0235224962234497,
"step": 2346
},
{
"epoch": 1.853870458135861,
"grad_norm": 7.771543247668636,
"learning_rate": 3.829868746678617e-06,
"loss": 0.9621250629425049,
"step": 2347
},
{
"epoch": 1.8546603475513428,
"grad_norm": 8.997193063535544,
"learning_rate": 3.825401202515401e-06,
"loss": 1.0980961322784424,
"step": 2348
},
{
"epoch": 1.8554502369668247,
"grad_norm": 11.396355571537933,
"learning_rate": 3.820934650657116e-06,
"loss": 1.376495599746704,
"step": 2349
},
{
"epoch": 1.8562401263823065,
"grad_norm": 12.015491646099614,
"learning_rate": 3.816469094877122e-06,
"loss": 1.1994984149932861,
"step": 2350
},
{
"epoch": 1.8570300157977884,
"grad_norm": 13.018195454364,
"learning_rate": 3.812004538947933e-06,
"loss": 1.5184617042541504,
"step": 2351
},
{
"epoch": 1.8578199052132702,
"grad_norm": 16.613834476479624,
"learning_rate": 3.807540986641221e-06,
"loss": 1.3802169561386108,
"step": 2352
},
{
"epoch": 1.858609794628752,
"grad_norm": 8.55272399415255,
"learning_rate": 3.803078441727811e-06,
"loss": 2.0620877742767334,
"step": 2353
},
{
"epoch": 1.859399684044234,
"grad_norm": 16.517593501003834,
"learning_rate": 3.7986169079776738e-06,
"loss": 1.021907925605774,
"step": 2354
},
{
"epoch": 1.8601895734597158,
"grad_norm": 12.612088978458775,
"learning_rate": 3.794156389159932e-06,
"loss": 1.9003463983535767,
"step": 2355
},
{
"epoch": 1.8609794628751974,
"grad_norm": 9.508374505836391,
"learning_rate": 3.78969688904284e-06,
"loss": 1.6055456399917603,
"step": 2356
},
{
"epoch": 1.8617693522906793,
"grad_norm": 13.25180074627195,
"learning_rate": 3.7852384113938033e-06,
"loss": 1.5654323101043701,
"step": 2357
},
{
"epoch": 1.8625592417061612,
"grad_norm": 7.960467731299903,
"learning_rate": 3.7807809599793564e-06,
"loss": 1.5566787719726562,
"step": 2358
},
{
"epoch": 1.863349131121643,
"grad_norm": 10.271738225651552,
"learning_rate": 3.7763245385651688e-06,
"loss": 0.9510334134101868,
"step": 2359
},
{
"epoch": 1.8641390205371247,
"grad_norm": 10.69224989126236,
"learning_rate": 3.771869150916041e-06,
"loss": 1.842404842376709,
"step": 2360
},
{
"epoch": 1.8649289099526065,
"grad_norm": 10.785184946501737,
"learning_rate": 3.7674148007959e-06,
"loss": 1.2416088581085205,
"step": 2361
},
{
"epoch": 1.8657187993680884,
"grad_norm": 8.752263843413806,
"learning_rate": 3.762961491967793e-06,
"loss": 2.3513264656066895,
"step": 2362
},
{
"epoch": 1.8665086887835702,
"grad_norm": 11.250525502071884,
"learning_rate": 3.7585092281938906e-06,
"loss": 1.8920762538909912,
"step": 2363
},
{
"epoch": 1.867298578199052,
"grad_norm": 8.990065479551909,
"learning_rate": 3.754058013235481e-06,
"loss": 1.415196180343628,
"step": 2364
},
{
"epoch": 1.868088467614534,
"grad_norm": 8.516130774368182,
"learning_rate": 3.7496078508529655e-06,
"loss": 1.542191743850708,
"step": 2365
},
{
"epoch": 1.8688783570300158,
"grad_norm": 9.072473337050232,
"learning_rate": 3.7451587448058553e-06,
"loss": 1.6856296062469482,
"step": 2366
},
{
"epoch": 1.8696682464454977,
"grad_norm": 12.708061491523079,
"learning_rate": 3.7407106988527687e-06,
"loss": 1.783271074295044,
"step": 2367
},
{
"epoch": 1.8704581358609795,
"grad_norm": 10.769206669854055,
"learning_rate": 3.7362637167514294e-06,
"loss": 1.5678787231445312,
"step": 2368
},
{
"epoch": 1.8712480252764614,
"grad_norm": 20.106127018365843,
"learning_rate": 3.731817802258665e-06,
"loss": 1.4182826280593872,
"step": 2369
},
{
"epoch": 1.8720379146919433,
"grad_norm": 14.613741216644682,
"learning_rate": 3.7273729591303977e-06,
"loss": 0.9464998245239258,
"step": 2370
},
{
"epoch": 1.872827804107425,
"grad_norm": 14.57353180320642,
"learning_rate": 3.7229291911216438e-06,
"loss": 1.2734510898590088,
"step": 2371
},
{
"epoch": 1.8736176935229067,
"grad_norm": 18.57604525014746,
"learning_rate": 3.718486501986517e-06,
"loss": 1.3280824422836304,
"step": 2372
},
{
"epoch": 1.8744075829383886,
"grad_norm": 10.655701944239475,
"learning_rate": 3.7140448954782118e-06,
"loss": 2.1533942222595215,
"step": 2373
},
{
"epoch": 1.8751974723538705,
"grad_norm": 10.436689103007193,
"learning_rate": 3.7096043753490135e-06,
"loss": 3.3665125370025635,
"step": 2374
},
{
"epoch": 1.8759873617693523,
"grad_norm": 11.030169703908209,
"learning_rate": 3.7051649453502874e-06,
"loss": 2.9506380558013916,
"step": 2375
},
{
"epoch": 1.876777251184834,
"grad_norm": 14.013016012791953,
"learning_rate": 3.7007266092324807e-06,
"loss": 1.3356175422668457,
"step": 2376
},
{
"epoch": 1.8775671406003158,
"grad_norm": 17.981966299258165,
"learning_rate": 3.6962893707451136e-06,
"loss": 0.6594985723495483,
"step": 2377
},
{
"epoch": 1.8783570300157977,
"grad_norm": 13.564577074220647,
"learning_rate": 3.6918532336367784e-06,
"loss": 2.0847041606903076,
"step": 2378
},
{
"epoch": 1.8791469194312795,
"grad_norm": 15.468644474324055,
"learning_rate": 3.6874182016551384e-06,
"loss": 1.3281214237213135,
"step": 2379
},
{
"epoch": 1.8799368088467614,
"grad_norm": 18.198119013305984,
"learning_rate": 3.6829842785469237e-06,
"loss": 2.035587787628174,
"step": 2380
},
{
"epoch": 1.8807266982622433,
"grad_norm": 7.022992331825169,
"learning_rate": 3.678551468057928e-06,
"loss": 1.31044602394104,
"step": 2381
},
{
"epoch": 1.8815165876777251,
"grad_norm": 11.785392947710033,
"learning_rate": 3.6741197739330036e-06,
"loss": 1.1300781965255737,
"step": 2382
},
{
"epoch": 1.882306477093207,
"grad_norm": 13.706145484472717,
"learning_rate": 3.6696891999160624e-06,
"loss": 2.2334394454956055,
"step": 2383
},
{
"epoch": 1.8830963665086888,
"grad_norm": 9.781655657096698,
"learning_rate": 3.6652597497500632e-06,
"loss": 1.2571077346801758,
"step": 2384
},
{
"epoch": 1.8838862559241707,
"grad_norm": 16.34961943071829,
"learning_rate": 3.660831427177023e-06,
"loss": 1.1918299198150635,
"step": 2385
},
{
"epoch": 1.8846761453396526,
"grad_norm": 9.661383436716706,
"learning_rate": 3.6564042359380035e-06,
"loss": 1.571300983428955,
"step": 2386
},
{
"epoch": 1.8854660347551344,
"grad_norm": 9.08149537153619,
"learning_rate": 3.651978179773109e-06,
"loss": 1.381611943244934,
"step": 2387
},
{
"epoch": 1.8862559241706163,
"grad_norm": 8.947483706023426,
"learning_rate": 3.647553262421489e-06,
"loss": 1.569331169128418,
"step": 2388
},
{
"epoch": 1.887045813586098,
"grad_norm": 15.643579814373975,
"learning_rate": 3.6431294876213256e-06,
"loss": 1.7174663543701172,
"step": 2389
},
{
"epoch": 1.8878357030015798,
"grad_norm": 11.74224852213168,
"learning_rate": 3.6387068591098394e-06,
"loss": 1.6020495891571045,
"step": 2390
},
{
"epoch": 1.8886255924170616,
"grad_norm": 17.71812535453601,
"learning_rate": 3.6342853806232803e-06,
"loss": 2.5341310501098633,
"step": 2391
},
{
"epoch": 1.8894154818325435,
"grad_norm": 19.738562508307506,
"learning_rate": 3.6298650558969297e-06,
"loss": 1.986846685409546,
"step": 2392
},
{
"epoch": 1.8902053712480251,
"grad_norm": 11.658929421377039,
"learning_rate": 3.6254458886650916e-06,
"loss": 1.847143530845642,
"step": 2393
},
{
"epoch": 1.890995260663507,
"grad_norm": 12.222377083494536,
"learning_rate": 3.6210278826610947e-06,
"loss": 1.7250714302062988,
"step": 2394
},
{
"epoch": 1.8917851500789888,
"grad_norm": 23.353812919786044,
"learning_rate": 3.6166110416172824e-06,
"loss": 1.1605632305145264,
"step": 2395
},
{
"epoch": 1.8925750394944707,
"grad_norm": 13.967775786929394,
"learning_rate": 3.612195369265016e-06,
"loss": 1.477918028831482,
"step": 2396
},
{
"epoch": 1.8933649289099526,
"grad_norm": 7.824641265572897,
"learning_rate": 3.607780869334673e-06,
"loss": 0.8736203908920288,
"step": 2397
},
{
"epoch": 1.8941548183254344,
"grad_norm": 15.896684803906586,
"learning_rate": 3.6033675455556362e-06,
"loss": 1.7004368305206299,
"step": 2398
},
{
"epoch": 1.8949447077409163,
"grad_norm": 12.602951334291856,
"learning_rate": 3.5989554016562973e-06,
"loss": 1.6466461420059204,
"step": 2399
},
{
"epoch": 1.8957345971563981,
"grad_norm": 23.183480711381492,
"learning_rate": 3.594544441364046e-06,
"loss": 2.251540184020996,
"step": 2400
},
{
"epoch": 1.89652448657188,
"grad_norm": 10.791019706405871,
"learning_rate": 3.590134668405281e-06,
"loss": 2.160658121109009,
"step": 2401
},
{
"epoch": 1.8973143759873619,
"grad_norm": 20.769430115923996,
"learning_rate": 3.5857260865053888e-06,
"loss": 2.002129077911377,
"step": 2402
},
{
"epoch": 1.8981042654028437,
"grad_norm": 11.654913849622542,
"learning_rate": 3.5813186993887564e-06,
"loss": 1.1885507106781006,
"step": 2403
},
{
"epoch": 1.8988941548183256,
"grad_norm": 11.018801992801427,
"learning_rate": 3.5769125107787615e-06,
"loss": 1.4551243782043457,
"step": 2404
},
{
"epoch": 1.8996840442338072,
"grad_norm": 13.81180119203942,
"learning_rate": 3.57250752439776e-06,
"loss": 1.8174326419830322,
"step": 2405
},
{
"epoch": 1.900473933649289,
"grad_norm": 8.538596914130633,
"learning_rate": 3.568103743967104e-06,
"loss": 2.0413808822631836,
"step": 2406
},
{
"epoch": 1.901263823064771,
"grad_norm": 13.208971367362476,
"learning_rate": 3.56370117320712e-06,
"loss": 1.130143642425537,
"step": 2407
},
{
"epoch": 1.9020537124802528,
"grad_norm": 20.133975799192914,
"learning_rate": 3.559299815837114e-06,
"loss": 2.2564547061920166,
"step": 2408
},
{
"epoch": 1.9028436018957346,
"grad_norm": 8.040080834857976,
"learning_rate": 3.5548996755753686e-06,
"loss": 1.9948700666427612,
"step": 2409
},
{
"epoch": 1.9036334913112163,
"grad_norm": 10.107125326201913,
"learning_rate": 3.550500756139137e-06,
"loss": 1.2073781490325928,
"step": 2410
},
{
"epoch": 1.9044233807266981,
"grad_norm": 12.074035310627842,
"learning_rate": 3.546103061244639e-06,
"loss": 1.4633095264434814,
"step": 2411
},
{
"epoch": 1.90521327014218,
"grad_norm": 8.748368688532468,
"learning_rate": 3.5417065946070616e-06,
"loss": 1.4983105659484863,
"step": 2412
},
{
"epoch": 1.9060031595576619,
"grad_norm": 8.119449198425269,
"learning_rate": 3.5373113599405558e-06,
"loss": 1.4577915668487549,
"step": 2413
},
{
"epoch": 1.9067930489731437,
"grad_norm": 8.602605817692487,
"learning_rate": 3.53291736095823e-06,
"loss": 1.7091364860534668,
"step": 2414
},
{
"epoch": 1.9075829383886256,
"grad_norm": 9.57318217061906,
"learning_rate": 3.52852460137215e-06,
"loss": 0.9629848003387451,
"step": 2415
},
{
"epoch": 1.9083728278041074,
"grad_norm": 12.401875854076282,
"learning_rate": 3.5241330848933297e-06,
"loss": 1.1933588981628418,
"step": 2416
},
{
"epoch": 1.9091627172195893,
"grad_norm": 19.330318737313394,
"learning_rate": 3.5197428152317405e-06,
"loss": 1.4718396663665771,
"step": 2417
},
{
"epoch": 1.9099526066350712,
"grad_norm": 10.724633948475557,
"learning_rate": 3.5153537960962953e-06,
"loss": 1.8858482837677002,
"step": 2418
},
{
"epoch": 1.910742496050553,
"grad_norm": 9.259587522429628,
"learning_rate": 3.510966031194851e-06,
"loss": 1.550106167793274,
"step": 2419
},
{
"epoch": 1.9115323854660349,
"grad_norm": 10.146358708655796,
"learning_rate": 3.5065795242342083e-06,
"loss": 1.5809369087219238,
"step": 2420
},
{
"epoch": 1.9123222748815167,
"grad_norm": 13.004346105846725,
"learning_rate": 3.5021942789201026e-06,
"loss": 1.3224411010742188,
"step": 2421
},
{
"epoch": 1.9131121642969984,
"grad_norm": 8.080997608907936,
"learning_rate": 3.4978102989572007e-06,
"loss": 1.6125473976135254,
"step": 2422
},
{
"epoch": 1.9139020537124802,
"grad_norm": 12.741289791550335,
"learning_rate": 3.4934275880491055e-06,
"loss": 1.4377524852752686,
"step": 2423
},
{
"epoch": 1.914691943127962,
"grad_norm": 7.712596175637102,
"learning_rate": 3.4890461498983436e-06,
"loss": 1.7991526126861572,
"step": 2424
},
{
"epoch": 1.915481832543444,
"grad_norm": 9.444274006754885,
"learning_rate": 3.4846659882063704e-06,
"loss": 1.237257957458496,
"step": 2425
},
{
"epoch": 1.9162717219589256,
"grad_norm": 12.543456884340403,
"learning_rate": 3.4802871066735623e-06,
"loss": 1.9164607524871826,
"step": 2426
},
{
"epoch": 1.9170616113744074,
"grad_norm": 13.74775394370082,
"learning_rate": 3.4759095089992094e-06,
"loss": 1.111218810081482,
"step": 2427
},
{
"epoch": 1.9178515007898893,
"grad_norm": 13.934197139537574,
"learning_rate": 3.4715331988815217e-06,
"loss": 1.1446185111999512,
"step": 2428
},
{
"epoch": 1.9186413902053712,
"grad_norm": 8.58653764163054,
"learning_rate": 3.4671581800176208e-06,
"loss": 1.4410523176193237,
"step": 2429
},
{
"epoch": 1.919431279620853,
"grad_norm": 13.847657929892328,
"learning_rate": 3.462784456103537e-06,
"loss": 1.9751472473144531,
"step": 2430
},
{
"epoch": 1.9202211690363349,
"grad_norm": 11.542004822189035,
"learning_rate": 3.4584120308342068e-06,
"loss": 0.9431929588317871,
"step": 2431
},
{
"epoch": 1.9210110584518167,
"grad_norm": 12.290366977682353,
"learning_rate": 3.454040907903472e-06,
"loss": 1.09321129322052,
"step": 2432
},
{
"epoch": 1.9218009478672986,
"grad_norm": 10.957843707967365,
"learning_rate": 3.4496710910040654e-06,
"loss": 2.014118194580078,
"step": 2433
},
{
"epoch": 1.9225908372827805,
"grad_norm": 30.636247581974626,
"learning_rate": 3.4453025838276283e-06,
"loss": 1.576662302017212,
"step": 2434
},
{
"epoch": 1.9233807266982623,
"grad_norm": 15.05397997539076,
"learning_rate": 3.4409353900646873e-06,
"loss": 1.2868478298187256,
"step": 2435
},
{
"epoch": 1.9241706161137442,
"grad_norm": 12.897559358848328,
"learning_rate": 3.4365695134046616e-06,
"loss": 1.1865384578704834,
"step": 2436
},
{
"epoch": 1.924960505529226,
"grad_norm": 15.253959007398343,
"learning_rate": 3.432204957535862e-06,
"loss": 1.626413106918335,
"step": 2437
},
{
"epoch": 1.925750394944708,
"grad_norm": 8.00956704186196,
"learning_rate": 3.4278417261454754e-06,
"loss": 1.630741834640503,
"step": 2438
},
{
"epoch": 1.9265402843601895,
"grad_norm": 17.812247659435748,
"learning_rate": 3.4234798229195764e-06,
"loss": 1.313347578048706,
"step": 2439
},
{
"epoch": 1.9273301737756714,
"grad_norm": 9.723288589416475,
"learning_rate": 3.4191192515431125e-06,
"loss": 1.5095974206924438,
"step": 2440
},
{
"epoch": 1.9281200631911533,
"grad_norm": 19.0000198211733,
"learning_rate": 3.414760015699913e-06,
"loss": 1.7037584781646729,
"step": 2441
},
{
"epoch": 1.9289099526066351,
"grad_norm": 7.4858175557613436,
"learning_rate": 3.410402119072671e-06,
"loss": 2.0781524181365967,
"step": 2442
},
{
"epoch": 1.9296998420221168,
"grad_norm": 9.98301676651302,
"learning_rate": 3.406045565342955e-06,
"loss": 1.491240382194519,
"step": 2443
},
{
"epoch": 1.9304897314375986,
"grad_norm": 10.15893764761394,
"learning_rate": 3.401690358191192e-06,
"loss": 1.3460967540740967,
"step": 2444
},
{
"epoch": 1.9312796208530805,
"grad_norm": 12.890492449150006,
"learning_rate": 3.397336501296679e-06,
"loss": 0.8602248430252075,
"step": 2445
},
{
"epoch": 1.9320695102685623,
"grad_norm": 12.533401638864804,
"learning_rate": 3.392983998337567e-06,
"loss": 0.8721238374710083,
"step": 2446
},
{
"epoch": 1.9328593996840442,
"grad_norm": 20.30153738152959,
"learning_rate": 3.388632852990864e-06,
"loss": 1.3083229064941406,
"step": 2447
},
{
"epoch": 1.933649289099526,
"grad_norm": 9.12776969619587,
"learning_rate": 3.3842830689324367e-06,
"loss": 1.5735386610031128,
"step": 2448
},
{
"epoch": 1.934439178515008,
"grad_norm": 20.700954625914637,
"learning_rate": 3.3799346498369895e-06,
"loss": 1.5870678424835205,
"step": 2449
},
{
"epoch": 1.9352290679304898,
"grad_norm": 9.831445546192326,
"learning_rate": 3.3755875993780862e-06,
"loss": 1.48130464553833,
"step": 2450
},
{
"epoch": 1.9360189573459716,
"grad_norm": 24.574710960838644,
"learning_rate": 3.3712419212281284e-06,
"loss": 1.7219302654266357,
"step": 2451
},
{
"epoch": 1.9368088467614535,
"grad_norm": 14.89660871560069,
"learning_rate": 3.366897619058358e-06,
"loss": 0.9834906458854675,
"step": 2452
},
{
"epoch": 1.9375987361769353,
"grad_norm": 10.712068685842123,
"learning_rate": 3.362554696538857e-06,
"loss": 2.1397347450256348,
"step": 2453
},
{
"epoch": 1.9383886255924172,
"grad_norm": 7.7667540070752485,
"learning_rate": 3.358213157338542e-06,
"loss": 1.1784471273422241,
"step": 2454
},
{
"epoch": 1.9391785150078988,
"grad_norm": 9.841175283941777,
"learning_rate": 3.3538730051251576e-06,
"loss": 1.503877878189087,
"step": 2455
},
{
"epoch": 1.9399684044233807,
"grad_norm": 12.542202389896184,
"learning_rate": 3.3495342435652777e-06,
"loss": 1.7189602851867676,
"step": 2456
},
{
"epoch": 1.9407582938388626,
"grad_norm": 11.391348598231419,
"learning_rate": 3.3451968763243046e-06,
"loss": 1.686078667640686,
"step": 2457
},
{
"epoch": 1.9415481832543444,
"grad_norm": 11.575710499478722,
"learning_rate": 3.34086090706646e-06,
"loss": 1.031868577003479,
"step": 2458
},
{
"epoch": 1.9423380726698263,
"grad_norm": 8.78322861407791,
"learning_rate": 3.3365263394547852e-06,
"loss": 1.1780157089233398,
"step": 2459
},
{
"epoch": 1.943127962085308,
"grad_norm": 12.608494943318862,
"learning_rate": 3.3321931771511363e-06,
"loss": 1.1709704399108887,
"step": 2460
},
{
"epoch": 1.9439178515007898,
"grad_norm": 12.683639129888297,
"learning_rate": 3.3278614238161818e-06,
"loss": 0.8888792991638184,
"step": 2461
},
{
"epoch": 1.9447077409162716,
"grad_norm": 11.443846674864774,
"learning_rate": 3.323531083109404e-06,
"loss": 1.3447215557098389,
"step": 2462
},
{
"epoch": 1.9454976303317535,
"grad_norm": 11.117047622365464,
"learning_rate": 3.3192021586890866e-06,
"loss": 1.329740047454834,
"step": 2463
},
{
"epoch": 1.9462875197472354,
"grad_norm": 14.02605723542626,
"learning_rate": 3.314874654212321e-06,
"loss": 1.4177271127700806,
"step": 2464
},
{
"epoch": 1.9470774091627172,
"grad_norm": 10.663308506333905,
"learning_rate": 3.3105485733349984e-06,
"loss": 1.0513715744018555,
"step": 2465
},
{
"epoch": 1.947867298578199,
"grad_norm": 12.042995971816161,
"learning_rate": 3.3062239197118027e-06,
"loss": 2.0589680671691895,
"step": 2466
},
{
"epoch": 1.948657187993681,
"grad_norm": 13.784435713074545,
"learning_rate": 3.301900696996218e-06,
"loss": 2.251110553741455,
"step": 2467
},
{
"epoch": 1.9494470774091628,
"grad_norm": 13.710270558589283,
"learning_rate": 3.297578908840515e-06,
"loss": 1.2197270393371582,
"step": 2468
},
{
"epoch": 1.9502369668246446,
"grad_norm": 10.246163702089847,
"learning_rate": 3.2932585588957565e-06,
"loss": 1.1383863687515259,
"step": 2469
},
{
"epoch": 1.9510268562401265,
"grad_norm": 12.115693006024177,
"learning_rate": 3.288939650811789e-06,
"loss": 1.6648939847946167,
"step": 2470
},
{
"epoch": 1.9518167456556084,
"grad_norm": 10.775832953436032,
"learning_rate": 3.2846221882372386e-06,
"loss": 1.417509913444519,
"step": 2471
},
{
"epoch": 1.95260663507109,
"grad_norm": 13.57822764736623,
"learning_rate": 3.280306174819511e-06,
"loss": 1.1139479875564575,
"step": 2472
},
{
"epoch": 1.9533965244865719,
"grad_norm": 14.390531948777902,
"learning_rate": 3.27599161420479e-06,
"loss": 1.3739941120147705,
"step": 2473
},
{
"epoch": 1.9541864139020537,
"grad_norm": 11.875606554603966,
"learning_rate": 3.271678510038031e-06,
"loss": 1.250982642173767,
"step": 2474
},
{
"epoch": 1.9549763033175356,
"grad_norm": 10.755099863929798,
"learning_rate": 3.2673668659629594e-06,
"loss": 1.8845748901367188,
"step": 2475
},
{
"epoch": 1.9557661927330172,
"grad_norm": 12.930480165235073,
"learning_rate": 3.2630566856220636e-06,
"loss": 1.489890456199646,
"step": 2476
},
{
"epoch": 1.956556082148499,
"grad_norm": 14.065755846735259,
"learning_rate": 3.2587479726565985e-06,
"loss": 1.648732304573059,
"step": 2477
},
{
"epoch": 1.957345971563981,
"grad_norm": 12.648739189286825,
"learning_rate": 3.2544407307065808e-06,
"loss": 1.7760826349258423,
"step": 2478
},
{
"epoch": 1.9581358609794628,
"grad_norm": 11.427887541540612,
"learning_rate": 3.2501349634107825e-06,
"loss": 0.8107354640960693,
"step": 2479
},
{
"epoch": 1.9589257503949447,
"grad_norm": 16.28308318755435,
"learning_rate": 3.245830674406728e-06,
"loss": 1.8128418922424316,
"step": 2480
},
{
"epoch": 1.9597156398104265,
"grad_norm": 13.404249494910019,
"learning_rate": 3.2415278673306984e-06,
"loss": 1.3533351421356201,
"step": 2481
},
{
"epoch": 1.9605055292259084,
"grad_norm": 26.577557541363184,
"learning_rate": 3.237226545817716e-06,
"loss": 1.6566579341888428,
"step": 2482
},
{
"epoch": 1.9612954186413902,
"grad_norm": 10.919680561043819,
"learning_rate": 3.2329267135015526e-06,
"loss": 1.7733817100524902,
"step": 2483
},
{
"epoch": 1.962085308056872,
"grad_norm": 22.667838737436107,
"learning_rate": 3.2286283740147194e-06,
"loss": 1.8271636962890625,
"step": 2484
},
{
"epoch": 1.962875197472354,
"grad_norm": 10.40163155926731,
"learning_rate": 3.2243315309884697e-06,
"loss": 1.3933049440383911,
"step": 2485
},
{
"epoch": 1.9636650868878358,
"grad_norm": 9.862448912563854,
"learning_rate": 3.2200361880527914e-06,
"loss": 2.3680734634399414,
"step": 2486
},
{
"epoch": 1.9644549763033177,
"grad_norm": 11.811513863223011,
"learning_rate": 3.2157423488364013e-06,
"loss": 1.6310195922851562,
"step": 2487
},
{
"epoch": 1.9652448657187995,
"grad_norm": 8.26150488888275,
"learning_rate": 3.2114500169667513e-06,
"loss": 1.2277730703353882,
"step": 2488
},
{
"epoch": 1.9660347551342812,
"grad_norm": 17.701718441376492,
"learning_rate": 3.207159196070015e-06,
"loss": 1.0331852436065674,
"step": 2489
},
{
"epoch": 1.966824644549763,
"grad_norm": 12.041291133088178,
"learning_rate": 3.2028698897710945e-06,
"loss": 1.8590238094329834,
"step": 2490
},
{
"epoch": 1.9676145339652449,
"grad_norm": 10.507710338817834,
"learning_rate": 3.198582101693608e-06,
"loss": 1.9933216571807861,
"step": 2491
},
{
"epoch": 1.9684044233807267,
"grad_norm": 8.15881705828358,
"learning_rate": 3.194295835459898e-06,
"loss": 1.6253411769866943,
"step": 2492
},
{
"epoch": 1.9691943127962084,
"grad_norm": 10.38474355259108,
"learning_rate": 3.1900110946910084e-06,
"loss": 1.3114337921142578,
"step": 2493
},
{
"epoch": 1.9699842022116902,
"grad_norm": 9.949738129316227,
"learning_rate": 3.1857278830067075e-06,
"loss": 1.256972074508667,
"step": 2494
},
{
"epoch": 1.970774091627172,
"grad_norm": 16.162021019463328,
"learning_rate": 3.1814462040254657e-06,
"loss": 1.8827450275421143,
"step": 2495
},
{
"epoch": 1.971563981042654,
"grad_norm": 22.149860191874843,
"learning_rate": 3.1771660613644582e-06,
"loss": 0.9394640922546387,
"step": 2496
},
{
"epoch": 1.9723538704581358,
"grad_norm": 8.539480886296017,
"learning_rate": 3.1728874586395677e-06,
"loss": 1.0433759689331055,
"step": 2497
},
{
"epoch": 1.9731437598736177,
"grad_norm": 11.67531560801483,
"learning_rate": 3.168610399465365e-06,
"loss": 0.7940645813941956,
"step": 2498
},
{
"epoch": 1.9739336492890995,
"grad_norm": 12.4425838328368,
"learning_rate": 3.1643348874551294e-06,
"loss": 1.4504199028015137,
"step": 2499
},
{
"epoch": 1.9747235387045814,
"grad_norm": 10.347989319044142,
"learning_rate": 3.1600609262208244e-06,
"loss": 1.3296544551849365,
"step": 2500
},
{
"epoch": 1.9755134281200633,
"grad_norm": 8.989622478769817,
"learning_rate": 3.1557885193731086e-06,
"loss": 1.6991509199142456,
"step": 2501
},
{
"epoch": 1.9763033175355451,
"grad_norm": 9.631551183343522,
"learning_rate": 3.151517670521325e-06,
"loss": 1.6035475730895996,
"step": 2502
},
{
"epoch": 1.977093206951027,
"grad_norm": 13.638367050748972,
"learning_rate": 3.1472483832735014e-06,
"loss": 1.8189468383789062,
"step": 2503
},
{
"epoch": 1.9778830963665088,
"grad_norm": 13.50296256744864,
"learning_rate": 3.1429806612363432e-06,
"loss": 1.6658248901367188,
"step": 2504
},
{
"epoch": 1.9786729857819905,
"grad_norm": 7.393153154801466,
"learning_rate": 3.138714508015237e-06,
"loss": 1.200107455253601,
"step": 2505
},
{
"epoch": 1.9794628751974723,
"grad_norm": 11.920956663917268,
"learning_rate": 3.1344499272142447e-06,
"loss": 0.791041910648346,
"step": 2506
},
{
"epoch": 1.9802527646129542,
"grad_norm": 17.66290191613317,
"learning_rate": 3.130186922436097e-06,
"loss": 1.5144439935684204,
"step": 2507
},
{
"epoch": 1.981042654028436,
"grad_norm": 9.017263658547026,
"learning_rate": 3.125925497282195e-06,
"loss": 1.9003779888153076,
"step": 2508
},
{
"epoch": 1.981832543443918,
"grad_norm": 16.75636258557487,
"learning_rate": 3.1216656553526026e-06,
"loss": 2.5085067749023438,
"step": 2509
},
{
"epoch": 1.9826224328593995,
"grad_norm": 14.890632169656481,
"learning_rate": 3.117407400246051e-06,
"loss": 1.7289268970489502,
"step": 2510
},
{
"epoch": 1.9834123222748814,
"grad_norm": 9.15166116321032,
"learning_rate": 3.1131507355599267e-06,
"loss": 1.61943781375885,
"step": 2511
},
{
"epoch": 1.9842022116903633,
"grad_norm": 6.859620699544047,
"learning_rate": 3.1088956648902735e-06,
"loss": 2.0489935874938965,
"step": 2512
},
{
"epoch": 1.9849921011058451,
"grad_norm": 14.799263181017864,
"learning_rate": 3.1046421918317916e-06,
"loss": 1.6825857162475586,
"step": 2513
},
{
"epoch": 1.985781990521327,
"grad_norm": 8.947142781131408,
"learning_rate": 3.1003903199778273e-06,
"loss": 1.6780674457550049,
"step": 2514
},
{
"epoch": 1.9865718799368088,
"grad_norm": 14.278954614357216,
"learning_rate": 3.096140052920376e-06,
"loss": 1.194378137588501,
"step": 2515
},
{
"epoch": 1.9873617693522907,
"grad_norm": 12.21887451937195,
"learning_rate": 3.091891394250077e-06,
"loss": 2.4973843097686768,
"step": 2516
},
{
"epoch": 1.9881516587677726,
"grad_norm": 13.216871045904774,
"learning_rate": 3.087644347556211e-06,
"loss": 1.668590784072876,
"step": 2517
},
{
"epoch": 1.9889415481832544,
"grad_norm": 14.625469207562114,
"learning_rate": 3.0833989164266974e-06,
"loss": 1.5314085483551025,
"step": 2518
},
{
"epoch": 1.9897314375987363,
"grad_norm": 11.732142027757016,
"learning_rate": 3.0791551044480926e-06,
"loss": 1.6780108213424683,
"step": 2519
},
{
"epoch": 1.9905213270142181,
"grad_norm": 9.710757091096028,
"learning_rate": 3.0749129152055795e-06,
"loss": 1.6987766027450562,
"step": 2520
},
{
"epoch": 1.9913112164297,
"grad_norm": 18.032480852526703,
"learning_rate": 3.070672352282974e-06,
"loss": 1.1783912181854248,
"step": 2521
},
{
"epoch": 1.9921011058451816,
"grad_norm": 11.162322784263365,
"learning_rate": 3.0664334192627197e-06,
"loss": 1.6210596561431885,
"step": 2522
},
{
"epoch": 1.9928909952606635,
"grad_norm": 9.526530451148624,
"learning_rate": 3.06219611972588e-06,
"loss": 0.9034061431884766,
"step": 2523
},
{
"epoch": 1.9936808846761453,
"grad_norm": 12.853469107604116,
"learning_rate": 3.0579604572521382e-06,
"loss": 1.8308205604553223,
"step": 2524
},
{
"epoch": 1.9944707740916272,
"grad_norm": 12.043619204725749,
"learning_rate": 3.0537264354198005e-06,
"loss": 1.336472749710083,
"step": 2525
},
{
"epoch": 1.9952606635071088,
"grad_norm": 24.68078723401722,
"learning_rate": 3.049494057805776e-06,
"loss": 1.3730854988098145,
"step": 2526
},
{
"epoch": 1.9960505529225907,
"grad_norm": 8.875683203972285,
"learning_rate": 3.045263327985595e-06,
"loss": 1.297802209854126,
"step": 2527
},
{
"epoch": 1.9968404423380726,
"grad_norm": 10.388495319632469,
"learning_rate": 3.0410342495333913e-06,
"loss": 1.585411548614502,
"step": 2528
},
{
"epoch": 1.9976303317535544,
"grad_norm": 9.15038105991676,
"learning_rate": 3.0368068260219054e-06,
"loss": 1.1837197542190552,
"step": 2529
},
{
"epoch": 1.9984202211690363,
"grad_norm": 10.39999784581774,
"learning_rate": 3.0325810610224783e-06,
"loss": 1.3715815544128418,
"step": 2530
},
{
"epoch": 1.9992101105845181,
"grad_norm": 20.945648961140414,
"learning_rate": 3.0283569581050486e-06,
"loss": 1.6476037502288818,
"step": 2531
},
{
"epoch": 2.0,
"grad_norm": 12.576094322325524,
"learning_rate": 3.0241345208381533e-06,
"loss": 1.4700212478637695,
"step": 2532
},
{
"epoch": 2.000789889415482,
"grad_norm": 10.359676749996417,
"learning_rate": 3.01991375278892e-06,
"loss": 0.46016550064086914,
"step": 2533
},
{
"epoch": 2.0015797788309637,
"grad_norm": 10.397265684651309,
"learning_rate": 3.01569465752307e-06,
"loss": 0.43968772888183594,
"step": 2534
},
{
"epoch": 2.0023696682464456,
"grad_norm": 10.588510736324265,
"learning_rate": 3.0114772386049087e-06,
"loss": 0.40929579734802246,
"step": 2535
},
{
"epoch": 2.0031595576619274,
"grad_norm": 12.058979157630162,
"learning_rate": 3.0072614995973236e-06,
"loss": 0.6055519580841064,
"step": 2536
},
{
"epoch": 2.0039494470774093,
"grad_norm": 9.905968562829903,
"learning_rate": 3.003047444061784e-06,
"loss": 0.33923691511154175,
"step": 2537
},
{
"epoch": 2.004739336492891,
"grad_norm": 11.848016978276616,
"learning_rate": 2.998835075558342e-06,
"loss": 0.5501883029937744,
"step": 2538
},
{
"epoch": 2.005529225908373,
"grad_norm": 7.660245787743295,
"learning_rate": 2.994624397645616e-06,
"loss": 0.3189247250556946,
"step": 2539
},
{
"epoch": 2.006319115323855,
"grad_norm": 7.718904002575967,
"learning_rate": 2.990415413880803e-06,
"loss": 0.8301833868026733,
"step": 2540
},
{
"epoch": 2.0071090047393363,
"grad_norm": 7.798220693220483,
"learning_rate": 2.9862081278196685e-06,
"loss": 0.34704911708831787,
"step": 2541
},
{
"epoch": 2.007898894154818,
"grad_norm": 8.488060658104867,
"learning_rate": 2.9820025430165358e-06,
"loss": 0.8342065215110779,
"step": 2542
},
{
"epoch": 2.0086887835703,
"grad_norm": 17.260191665775476,
"learning_rate": 2.977798663024302e-06,
"loss": 0.341113805770874,
"step": 2543
},
{
"epoch": 2.009478672985782,
"grad_norm": 9.539527306419366,
"learning_rate": 2.9735964913944153e-06,
"loss": 0.7216507792472839,
"step": 2544
},
{
"epoch": 2.0102685624012637,
"grad_norm": 6.040467200683253,
"learning_rate": 2.9693960316768856e-06,
"loss": 0.5428364276885986,
"step": 2545
},
{
"epoch": 2.0110584518167456,
"grad_norm": 10.079491641898473,
"learning_rate": 2.965197287420276e-06,
"loss": 0.49245187640190125,
"step": 2546
},
{
"epoch": 2.0118483412322274,
"grad_norm": 8.736640891813208,
"learning_rate": 2.9610002621716987e-06,
"loss": 0.37076878547668457,
"step": 2547
},
{
"epoch": 2.0126382306477093,
"grad_norm": 14.117126577405678,
"learning_rate": 2.956804959476814e-06,
"loss": 0.4397502541542053,
"step": 2548
},
{
"epoch": 2.013428120063191,
"grad_norm": 7.821072536076488,
"learning_rate": 2.9526113828798266e-06,
"loss": 0.3473249673843384,
"step": 2549
},
{
"epoch": 2.014218009478673,
"grad_norm": 12.898146258975322,
"learning_rate": 2.9484195359234855e-06,
"loss": 0.507472813129425,
"step": 2550
},
{
"epoch": 2.015007898894155,
"grad_norm": 16.615044423238285,
"learning_rate": 2.9442294221490773e-06,
"loss": 0.32390978932380676,
"step": 2551
},
{
"epoch": 2.0157977883096367,
"grad_norm": 10.491110312547283,
"learning_rate": 2.940041045096423e-06,
"loss": 0.5616019368171692,
"step": 2552
},
{
"epoch": 2.0165876777251186,
"grad_norm": 17.4266369509056,
"learning_rate": 2.9358544083038772e-06,
"loss": 0.5512704849243164,
"step": 2553
},
{
"epoch": 2.0173775671406005,
"grad_norm": 9.189915300922015,
"learning_rate": 2.931669515308323e-06,
"loss": 0.5548714399337769,
"step": 2554
},
{
"epoch": 2.0181674565560823,
"grad_norm": 12.471542857964309,
"learning_rate": 2.927486369645174e-06,
"loss": 0.5579742789268494,
"step": 2555
},
{
"epoch": 2.018957345971564,
"grad_norm": 10.57651269059222,
"learning_rate": 2.9233049748483637e-06,
"loss": 0.4655217230319977,
"step": 2556
},
{
"epoch": 2.0197472353870456,
"grad_norm": 16.404149783138585,
"learning_rate": 2.9191253344503512e-06,
"loss": 0.5133844017982483,
"step": 2557
},
{
"epoch": 2.0205371248025275,
"grad_norm": 8.739917453875979,
"learning_rate": 2.9149474519821073e-06,
"loss": 0.425764799118042,
"step": 2558
},
{
"epoch": 2.0213270142180093,
"grad_norm": 12.340461413890134,
"learning_rate": 2.910771330973123e-06,
"loss": 0.31363582611083984,
"step": 2559
},
{
"epoch": 2.022116903633491,
"grad_norm": 17.15887001035681,
"learning_rate": 2.9065969749513972e-06,
"loss": 0.3946301341056824,
"step": 2560
},
{
"epoch": 2.022906793048973,
"grad_norm": 12.235209650094014,
"learning_rate": 2.9024243874434412e-06,
"loss": 0.3221214711666107,
"step": 2561
},
{
"epoch": 2.023696682464455,
"grad_norm": 12.122441612475534,
"learning_rate": 2.898253571974267e-06,
"loss": 0.7128668427467346,
"step": 2562
},
{
"epoch": 2.0244865718799367,
"grad_norm": 11.253478945540717,
"learning_rate": 2.8940845320674003e-06,
"loss": 0.7156331539154053,
"step": 2563
},
{
"epoch": 2.0252764612954186,
"grad_norm": 11.411246914531567,
"learning_rate": 2.889917271244854e-06,
"loss": 0.38757872581481934,
"step": 2564
},
{
"epoch": 2.0260663507109005,
"grad_norm": 13.55858423904863,
"learning_rate": 2.885751793027146e-06,
"loss": 1.2920098304748535,
"step": 2565
},
{
"epoch": 2.0268562401263823,
"grad_norm": 7.218573790221784,
"learning_rate": 2.8815881009332847e-06,
"loss": 0.305652379989624,
"step": 2566
},
{
"epoch": 2.027646129541864,
"grad_norm": 21.733202425060888,
"learning_rate": 2.8774261984807705e-06,
"loss": 0.5940957069396973,
"step": 2567
},
{
"epoch": 2.028436018957346,
"grad_norm": 10.902544131148884,
"learning_rate": 2.873266089185597e-06,
"loss": 0.4019826352596283,
"step": 2568
},
{
"epoch": 2.029225908372828,
"grad_norm": 12.95751723968972,
"learning_rate": 2.869107776562232e-06,
"loss": 0.9044560790061951,
"step": 2569
},
{
"epoch": 2.0300157977883098,
"grad_norm": 12.066912492836503,
"learning_rate": 2.864951264123635e-06,
"loss": 0.9884500503540039,
"step": 2570
},
{
"epoch": 2.0308056872037916,
"grad_norm": 9.279060734945645,
"learning_rate": 2.860796555381238e-06,
"loss": 0.21672052145004272,
"step": 2571
},
{
"epoch": 2.0315955766192735,
"grad_norm": 12.515738686148927,
"learning_rate": 2.8566436538449583e-06,
"loss": 0.5464432239532471,
"step": 2572
},
{
"epoch": 2.0323854660347553,
"grad_norm": 5.072237401183682,
"learning_rate": 2.8524925630231774e-06,
"loss": 0.12266440689563751,
"step": 2573
},
{
"epoch": 2.0331753554502368,
"grad_norm": 13.049508774102216,
"learning_rate": 2.8483432864227533e-06,
"loss": 0.5199903249740601,
"step": 2574
},
{
"epoch": 2.0339652448657186,
"grad_norm": 26.009573976811115,
"learning_rate": 2.8441958275490044e-06,
"loss": 1.3708068132400513,
"step": 2575
},
{
"epoch": 2.0347551342812005,
"grad_norm": 8.306114337004733,
"learning_rate": 2.8400501899057164e-06,
"loss": 0.41701173782348633,
"step": 2576
},
{
"epoch": 2.0355450236966823,
"grad_norm": 19.310700207769223,
"learning_rate": 2.8359063769951435e-06,
"loss": 0.48340296745300293,
"step": 2577
},
{
"epoch": 2.036334913112164,
"grad_norm": 9.685038803587986,
"learning_rate": 2.8317643923179894e-06,
"loss": 0.2897825539112091,
"step": 2578
},
{
"epoch": 2.037124802527646,
"grad_norm": 7.494546906971376,
"learning_rate": 2.827624239373419e-06,
"loss": 0.3327743411064148,
"step": 2579
},
{
"epoch": 2.037914691943128,
"grad_norm": 14.776433298746518,
"learning_rate": 2.8234859216590406e-06,
"loss": 0.30218467116355896,
"step": 2580
},
{
"epoch": 2.0387045813586098,
"grad_norm": 9.947771139586356,
"learning_rate": 2.8193494426709245e-06,
"loss": 0.2601761519908905,
"step": 2581
},
{
"epoch": 2.0394944707740916,
"grad_norm": 14.37459477886007,
"learning_rate": 2.8152148059035804e-06,
"loss": 2.110447645187378,
"step": 2582
},
{
"epoch": 2.0402843601895735,
"grad_norm": 10.93265288740834,
"learning_rate": 2.811082014849963e-06,
"loss": 0.6686077117919922,
"step": 2583
},
{
"epoch": 2.0410742496050553,
"grad_norm": 10.24553214669215,
"learning_rate": 2.806951073001467e-06,
"loss": 0.33964627981185913,
"step": 2584
},
{
"epoch": 2.041864139020537,
"grad_norm": 10.359426539370531,
"learning_rate": 2.8028219838479265e-06,
"loss": 0.2946935296058655,
"step": 2585
},
{
"epoch": 2.042654028436019,
"grad_norm": 7.743470398158407,
"learning_rate": 2.798694750877609e-06,
"loss": 0.5350501537322998,
"step": 2586
},
{
"epoch": 2.043443917851501,
"grad_norm": 11.20913313781443,
"learning_rate": 2.7945693775772153e-06,
"loss": 0.3125535547733307,
"step": 2587
},
{
"epoch": 2.044233807266983,
"grad_norm": 8.18102147636904,
"learning_rate": 2.7904458674318733e-06,
"loss": 0.5411368012428284,
"step": 2588
},
{
"epoch": 2.0450236966824646,
"grad_norm": 9.35774408306311,
"learning_rate": 2.7863242239251385e-06,
"loss": 0.33201736211776733,
"step": 2589
},
{
"epoch": 2.0458135860979465,
"grad_norm": 8.673142738839285,
"learning_rate": 2.78220445053899e-06,
"loss": 0.4620051085948944,
"step": 2590
},
{
"epoch": 2.046603475513428,
"grad_norm": 9.55716346640722,
"learning_rate": 2.7780865507538236e-06,
"loss": 0.44175124168395996,
"step": 2591
},
{
"epoch": 2.0473933649289098,
"grad_norm": 9.80538220230876,
"learning_rate": 2.7739705280484565e-06,
"loss": 0.23179033398628235,
"step": 2592
},
{
"epoch": 2.0481832543443916,
"grad_norm": 7.778339221842325,
"learning_rate": 2.769856385900118e-06,
"loss": 0.2992667555809021,
"step": 2593
},
{
"epoch": 2.0489731437598735,
"grad_norm": 10.15304744757696,
"learning_rate": 2.7657441277844475e-06,
"loss": 0.8387447595596313,
"step": 2594
},
{
"epoch": 2.0497630331753554,
"grad_norm": 22.187673641215007,
"learning_rate": 2.7616337571754937e-06,
"loss": 0.6397115588188171,
"step": 2595
},
{
"epoch": 2.050552922590837,
"grad_norm": 11.077698692839348,
"learning_rate": 2.7575252775457175e-06,
"loss": 0.4148407280445099,
"step": 2596
},
{
"epoch": 2.051342812006319,
"grad_norm": 11.214710142878744,
"learning_rate": 2.753418692365968e-06,
"loss": 0.3338342607021332,
"step": 2597
},
{
"epoch": 2.052132701421801,
"grad_norm": 24.053873668111414,
"learning_rate": 2.7493140051055055e-06,
"loss": 0.19720637798309326,
"step": 2598
},
{
"epoch": 2.052922590837283,
"grad_norm": 7.360243899896904,
"learning_rate": 2.7452112192319813e-06,
"loss": 0.17611664533615112,
"step": 2599
},
{
"epoch": 2.0537124802527646,
"grad_norm": 7.902969436001227,
"learning_rate": 2.741110338211446e-06,
"loss": 0.18759757280349731,
"step": 2600
},
{
"epoch": 2.0545023696682465,
"grad_norm": 11.102771052686371,
"learning_rate": 2.7370113655083373e-06,
"loss": 0.5418599247932434,
"step": 2601
},
{
"epoch": 2.0552922590837284,
"grad_norm": 10.890805356373017,
"learning_rate": 2.732914304585478e-06,
"loss": 0.4259986877441406,
"step": 2602
},
{
"epoch": 2.0560821484992102,
"grad_norm": 11.586458157042319,
"learning_rate": 2.728819158904078e-06,
"loss": 0.36362117528915405,
"step": 2603
},
{
"epoch": 2.056872037914692,
"grad_norm": 18.320864664521476,
"learning_rate": 2.7247259319237306e-06,
"loss": 0.939771294593811,
"step": 2604
},
{
"epoch": 2.057661927330174,
"grad_norm": 11.501402001297102,
"learning_rate": 2.7206346271024103e-06,
"loss": 0.2737082839012146,
"step": 2605
},
{
"epoch": 2.058451816745656,
"grad_norm": 12.125160137594307,
"learning_rate": 2.716545247896465e-06,
"loss": 0.7849869728088379,
"step": 2606
},
{
"epoch": 2.0592417061611377,
"grad_norm": 11.093511362641284,
"learning_rate": 2.7124577977606114e-06,
"loss": 0.47068697214126587,
"step": 2607
},
{
"epoch": 2.060031595576619,
"grad_norm": 6.735093477058937,
"learning_rate": 2.7083722801479407e-06,
"loss": 0.30919548869132996,
"step": 2608
},
{
"epoch": 2.060821484992101,
"grad_norm": 11.012490104989881,
"learning_rate": 2.704288698509917e-06,
"loss": 0.5182478427886963,
"step": 2609
},
{
"epoch": 2.061611374407583,
"grad_norm": 19.951181172455126,
"learning_rate": 2.70020705629636e-06,
"loss": 0.30237090587615967,
"step": 2610
},
{
"epoch": 2.0624012638230647,
"grad_norm": 7.174756319853717,
"learning_rate": 2.696127356955455e-06,
"loss": 0.2660676836967468,
"step": 2611
},
{
"epoch": 2.0631911532385465,
"grad_norm": 12.385922369030384,
"learning_rate": 2.6920496039337474e-06,
"loss": 0.3617851734161377,
"step": 2612
},
{
"epoch": 2.0639810426540284,
"grad_norm": 11.015014488490415,
"learning_rate": 2.6879738006761303e-06,
"loss": 0.46704721450805664,
"step": 2613
},
{
"epoch": 2.0647709320695102,
"grad_norm": 16.919341385806266,
"learning_rate": 2.6838999506258623e-06,
"loss": 0.6928585767745972,
"step": 2614
},
{
"epoch": 2.065560821484992,
"grad_norm": 11.230526922983666,
"learning_rate": 2.6798280572245427e-06,
"loss": 0.47172248363494873,
"step": 2615
},
{
"epoch": 2.066350710900474,
"grad_norm": 6.544914274412647,
"learning_rate": 2.6757581239121203e-06,
"loss": 0.27590444684028625,
"step": 2616
},
{
"epoch": 2.067140600315956,
"grad_norm": 20.085203153128194,
"learning_rate": 2.671690154126889e-06,
"loss": 1.725602388381958,
"step": 2617
},
{
"epoch": 2.0679304897314377,
"grad_norm": 9.954561309572316,
"learning_rate": 2.667624151305482e-06,
"loss": 0.34551358222961426,
"step": 2618
},
{
"epoch": 2.0687203791469195,
"grad_norm": 9.55670352614984,
"learning_rate": 2.6635601188828736e-06,
"loss": 0.2692929804325104,
"step": 2619
},
{
"epoch": 2.0695102685624014,
"grad_norm": 19.36634128513723,
"learning_rate": 2.6594980602923702e-06,
"loss": 0.4668102264404297,
"step": 2620
},
{
"epoch": 2.0703001579778832,
"grad_norm": 10.139253556415792,
"learning_rate": 2.6554379789656124e-06,
"loss": 0.4466455578804016,
"step": 2621
},
{
"epoch": 2.071090047393365,
"grad_norm": 10.412158390774838,
"learning_rate": 2.6513798783325708e-06,
"loss": 0.3185625970363617,
"step": 2622
},
{
"epoch": 2.071879936808847,
"grad_norm": 16.193194717671236,
"learning_rate": 2.647323761821542e-06,
"loss": 1.1391959190368652,
"step": 2623
},
{
"epoch": 2.0726698262243284,
"grad_norm": 10.665895924665014,
"learning_rate": 2.643269632859146e-06,
"loss": 0.5236232876777649,
"step": 2624
},
{
"epoch": 2.0734597156398102,
"grad_norm": 9.152693760808571,
"learning_rate": 2.6392174948703253e-06,
"loss": 0.38468989729881287,
"step": 2625
},
{
"epoch": 2.074249605055292,
"grad_norm": 10.476888206104105,
"learning_rate": 2.635167351278339e-06,
"loss": 0.21812653541564941,
"step": 2626
},
{
"epoch": 2.075039494470774,
"grad_norm": 8.26594520989118,
"learning_rate": 2.63111920550476e-06,
"loss": 0.5767203569412231,
"step": 2627
},
{
"epoch": 2.075829383886256,
"grad_norm": 10.011641691561126,
"learning_rate": 2.62707306096948e-06,
"loss": 0.5115009546279907,
"step": 2628
},
{
"epoch": 2.0766192733017377,
"grad_norm": 9.073068879094404,
"learning_rate": 2.6230289210906904e-06,
"loss": 0.4996326267719269,
"step": 2629
},
{
"epoch": 2.0774091627172195,
"grad_norm": 11.435973854776599,
"learning_rate": 2.6189867892848962e-06,
"loss": 1.0042023658752441,
"step": 2630
},
{
"epoch": 2.0781990521327014,
"grad_norm": 16.103230479782546,
"learning_rate": 2.614946668966902e-06,
"loss": 0.5340977907180786,
"step": 2631
},
{
"epoch": 2.0789889415481833,
"grad_norm": 10.298294605139441,
"learning_rate": 2.610908563549815e-06,
"loss": 0.23495090007781982,
"step": 2632
},
{
"epoch": 2.079778830963665,
"grad_norm": 12.126262761621527,
"learning_rate": 2.606872476445042e-06,
"loss": 0.4852793216705322,
"step": 2633
},
{
"epoch": 2.080568720379147,
"grad_norm": 14.89375330624192,
"learning_rate": 2.602838411062284e-06,
"loss": 0.42270925641059875,
"step": 2634
},
{
"epoch": 2.081358609794629,
"grad_norm": 8.811379648381822,
"learning_rate": 2.598806370809528e-06,
"loss": 0.3555320203304291,
"step": 2635
},
{
"epoch": 2.0821484992101107,
"grad_norm": 9.627230128604296,
"learning_rate": 2.5947763590930542e-06,
"loss": 0.7813281416893005,
"step": 2636
},
{
"epoch": 2.0829383886255926,
"grad_norm": 12.811410790915238,
"learning_rate": 2.590748379317436e-06,
"loss": 0.5171550512313843,
"step": 2637
},
{
"epoch": 2.0837282780410744,
"grad_norm": 6.703660941737119,
"learning_rate": 2.586722434885519e-06,
"loss": 0.4919162392616272,
"step": 2638
},
{
"epoch": 2.0845181674565563,
"grad_norm": 8.360573561210712,
"learning_rate": 2.582698529198439e-06,
"loss": 0.28129732608795166,
"step": 2639
},
{
"epoch": 2.085308056872038,
"grad_norm": 10.528148772142709,
"learning_rate": 2.5786766656555996e-06,
"loss": 0.28879594802856445,
"step": 2640
},
{
"epoch": 2.0860979462875195,
"grad_norm": 9.079673356688698,
"learning_rate": 2.574656847654684e-06,
"loss": 0.5426896810531616,
"step": 2641
},
{
"epoch": 2.0868878357030014,
"grad_norm": 9.340991792914485,
"learning_rate": 2.5706390785916526e-06,
"loss": 0.7707650661468506,
"step": 2642
},
{
"epoch": 2.0876777251184833,
"grad_norm": 9.244986743162158,
"learning_rate": 2.5666233618607274e-06,
"loss": 0.34384623169898987,
"step": 2643
},
{
"epoch": 2.088467614533965,
"grad_norm": 16.744652694879548,
"learning_rate": 2.5626097008543995e-06,
"loss": 0.5586552619934082,
"step": 2644
},
{
"epoch": 2.089257503949447,
"grad_norm": 9.45648755694814,
"learning_rate": 2.5585980989634217e-06,
"loss": 0.20669318735599518,
"step": 2645
},
{
"epoch": 2.090047393364929,
"grad_norm": 10.927788414106475,
"learning_rate": 2.55458855957681e-06,
"loss": 0.3616315424442291,
"step": 2646
},
{
"epoch": 2.0908372827804107,
"grad_norm": 10.887569307564323,
"learning_rate": 2.5505810860818356e-06,
"loss": 0.4344330132007599,
"step": 2647
},
{
"epoch": 2.0916271721958926,
"grad_norm": 7.716338584985328,
"learning_rate": 2.5465756818640258e-06,
"loss": 0.7404396533966064,
"step": 2648
},
{
"epoch": 2.0924170616113744,
"grad_norm": 8.682244564927794,
"learning_rate": 2.5425723503071586e-06,
"loss": 0.7206960320472717,
"step": 2649
},
{
"epoch": 2.0932069510268563,
"grad_norm": 10.426071122202922,
"learning_rate": 2.538571094793263e-06,
"loss": 0.4914172887802124,
"step": 2650
},
{
"epoch": 2.093996840442338,
"grad_norm": 10.818446184348494,
"learning_rate": 2.534571918702611e-06,
"loss": 0.22247469425201416,
"step": 2651
},
{
"epoch": 2.09478672985782,
"grad_norm": 8.183773295547795,
"learning_rate": 2.530574825413722e-06,
"loss": 0.5524343252182007,
"step": 2652
},
{
"epoch": 2.095576619273302,
"grad_norm": 10.561001834128774,
"learning_rate": 2.526579818303351e-06,
"loss": 0.4442840814590454,
"step": 2653
},
{
"epoch": 2.0963665086887837,
"grad_norm": 12.36354051019539,
"learning_rate": 2.5225869007464953e-06,
"loss": 0.43416035175323486,
"step": 2654
},
{
"epoch": 2.0971563981042656,
"grad_norm": 10.720335557119547,
"learning_rate": 2.5185960761163816e-06,
"loss": 0.3992769718170166,
"step": 2655
},
{
"epoch": 2.0979462875197474,
"grad_norm": 12.529038317941579,
"learning_rate": 2.514607347784478e-06,
"loss": 0.5805951356887817,
"step": 2656
},
{
"epoch": 2.098736176935229,
"grad_norm": 8.702799415503215,
"learning_rate": 2.510620719120469e-06,
"loss": 0.4916655123233795,
"step": 2657
},
{
"epoch": 2.0995260663507107,
"grad_norm": 14.453659414269685,
"learning_rate": 2.5066361934922745e-06,
"loss": 0.3664616346359253,
"step": 2658
},
{
"epoch": 2.1003159557661926,
"grad_norm": 9.593268812077245,
"learning_rate": 2.502653774266034e-06,
"loss": 1.0257253646850586,
"step": 2659
},
{
"epoch": 2.1011058451816744,
"grad_norm": 10.7133353823578,
"learning_rate": 2.4986734648061066e-06,
"loss": 0.3108058273792267,
"step": 2660
},
{
"epoch": 2.1018957345971563,
"grad_norm": 9.400571761002238,
"learning_rate": 2.4946952684750773e-06,
"loss": 0.6172835826873779,
"step": 2661
},
{
"epoch": 2.102685624012638,
"grad_norm": 13.17142044952775,
"learning_rate": 2.4907191886337334e-06,
"loss": 0.7078754305839539,
"step": 2662
},
{
"epoch": 2.10347551342812,
"grad_norm": 9.62188121387961,
"learning_rate": 2.4867452286410815e-06,
"loss": 0.8794481754302979,
"step": 2663
},
{
"epoch": 2.104265402843602,
"grad_norm": 8.644663424593888,
"learning_rate": 2.482773391854335e-06,
"loss": 0.3982178866863251,
"step": 2664
},
{
"epoch": 2.1050552922590837,
"grad_norm": 8.326165450957228,
"learning_rate": 2.4788036816289177e-06,
"loss": 0.2630547881126404,
"step": 2665
},
{
"epoch": 2.1058451816745656,
"grad_norm": 8.952882981363153,
"learning_rate": 2.474836101318453e-06,
"loss": 0.5207113027572632,
"step": 2666
},
{
"epoch": 2.1066350710900474,
"grad_norm": 7.954230630151849,
"learning_rate": 2.470870654274767e-06,
"loss": 0.6573061943054199,
"step": 2667
},
{
"epoch": 2.1074249605055293,
"grad_norm": 9.631811512663736,
"learning_rate": 2.4669073438478784e-06,
"loss": 0.29487311840057373,
"step": 2668
},
{
"epoch": 2.108214849921011,
"grad_norm": 10.558058540717864,
"learning_rate": 2.4629461733860055e-06,
"loss": 0.24810953438282013,
"step": 2669
},
{
"epoch": 2.109004739336493,
"grad_norm": 7.799037056656612,
"learning_rate": 2.458987146235562e-06,
"loss": 0.20641303062438965,
"step": 2670
},
{
"epoch": 2.109794628751975,
"grad_norm": 9.478790048879866,
"learning_rate": 2.4550302657411447e-06,
"loss": 0.3360731303691864,
"step": 2671
},
{
"epoch": 2.1105845181674567,
"grad_norm": 6.220059265575532,
"learning_rate": 2.4510755352455413e-06,
"loss": 0.5347034931182861,
"step": 2672
},
{
"epoch": 2.1113744075829386,
"grad_norm": 12.272978606568286,
"learning_rate": 2.4471229580897155e-06,
"loss": 0.33499157428741455,
"step": 2673
},
{
"epoch": 2.11216429699842,
"grad_norm": 11.71613295930798,
"learning_rate": 2.443172537612823e-06,
"loss": 0.810235321521759,
"step": 2674
},
{
"epoch": 2.112954186413902,
"grad_norm": 10.155734244037252,
"learning_rate": 2.43922427715219e-06,
"loss": 0.7765663862228394,
"step": 2675
},
{
"epoch": 2.1137440758293837,
"grad_norm": 8.684157709345978,
"learning_rate": 2.4352781800433213e-06,
"loss": 0.2663138508796692,
"step": 2676
},
{
"epoch": 2.1145339652448656,
"grad_norm": 7.269900003866313,
"learning_rate": 2.431334249619891e-06,
"loss": 0.8140415549278259,
"step": 2677
},
{
"epoch": 2.1153238546603474,
"grad_norm": 11.872895668793475,
"learning_rate": 2.427392489213745e-06,
"loss": 0.7136765718460083,
"step": 2678
},
{
"epoch": 2.1161137440758293,
"grad_norm": 15.205862969567574,
"learning_rate": 2.4234529021548963e-06,
"loss": 0.581038236618042,
"step": 2679
},
{
"epoch": 2.116903633491311,
"grad_norm": 10.329194968162666,
"learning_rate": 2.4195154917715202e-06,
"loss": 0.313004732131958,
"step": 2680
},
{
"epoch": 2.117693522906793,
"grad_norm": 7.375552046117448,
"learning_rate": 2.4155802613899537e-06,
"loss": 0.41316336393356323,
"step": 2681
},
{
"epoch": 2.118483412322275,
"grad_norm": 12.51452481692,
"learning_rate": 2.4116472143346935e-06,
"loss": 1.1459236145019531,
"step": 2682
},
{
"epoch": 2.1192733017377567,
"grad_norm": 8.014729129098054,
"learning_rate": 2.40771635392839e-06,
"loss": 0.10683616995811462,
"step": 2683
},
{
"epoch": 2.1200631911532386,
"grad_norm": 16.715819051395663,
"learning_rate": 2.4037876834918467e-06,
"loss": 0.8776874542236328,
"step": 2684
},
{
"epoch": 2.1208530805687205,
"grad_norm": 15.222031604067872,
"learning_rate": 2.3998612063440175e-06,
"loss": 0.549107551574707,
"step": 2685
},
{
"epoch": 2.1216429699842023,
"grad_norm": 11.067349309197576,
"learning_rate": 2.3959369258020036e-06,
"loss": 0.3277229368686676,
"step": 2686
},
{
"epoch": 2.122432859399684,
"grad_norm": 10.422204153009583,
"learning_rate": 2.3920148451810504e-06,
"loss": 0.3309401869773865,
"step": 2687
},
{
"epoch": 2.123222748815166,
"grad_norm": 11.728679388557909,
"learning_rate": 2.3880949677945437e-06,
"loss": 0.6130886673927307,
"step": 2688
},
{
"epoch": 2.124012638230648,
"grad_norm": 7.010036521611688,
"learning_rate": 2.3841772969540088e-06,
"loss": 0.18529269099235535,
"step": 2689
},
{
"epoch": 2.1248025276461293,
"grad_norm": 21.13853477472658,
"learning_rate": 2.380261835969108e-06,
"loss": 1.7113615274429321,
"step": 2690
},
{
"epoch": 2.125592417061611,
"grad_norm": 11.881636165836428,
"learning_rate": 2.3763485881476345e-06,
"loss": 0.254605233669281,
"step": 2691
},
{
"epoch": 2.126382306477093,
"grad_norm": 8.99366986984141,
"learning_rate": 2.3724375567955115e-06,
"loss": 0.17952272295951843,
"step": 2692
},
{
"epoch": 2.127172195892575,
"grad_norm": 12.677526063207466,
"learning_rate": 2.368528745216795e-06,
"loss": 0.47902315855026245,
"step": 2693
},
{
"epoch": 2.1279620853080567,
"grad_norm": 17.055248079539123,
"learning_rate": 2.3646221567136618e-06,
"loss": 0.36781108379364014,
"step": 2694
},
{
"epoch": 2.1287519747235386,
"grad_norm": 12.63653018866412,
"learning_rate": 2.3607177945864073e-06,
"loss": 0.3109586834907532,
"step": 2695
},
{
"epoch": 2.1295418641390205,
"grad_norm": 7.61269588995347,
"learning_rate": 2.3568156621334508e-06,
"loss": 0.5835099220275879,
"step": 2696
},
{
"epoch": 2.1303317535545023,
"grad_norm": 9.309063388608303,
"learning_rate": 2.352915762651325e-06,
"loss": 0.6755543947219849,
"step": 2697
},
{
"epoch": 2.131121642969984,
"grad_norm": 12.180295054957952,
"learning_rate": 2.3490180994346816e-06,
"loss": 0.3916603624820709,
"step": 2698
},
{
"epoch": 2.131911532385466,
"grad_norm": 11.375922907331335,
"learning_rate": 2.3451226757762794e-06,
"loss": 0.3473902940750122,
"step": 2699
},
{
"epoch": 2.132701421800948,
"grad_norm": 11.141196729822012,
"learning_rate": 2.341229494966981e-06,
"loss": 0.48169010877609253,
"step": 2700
},
{
"epoch": 2.1334913112164298,
"grad_norm": 11.044250444422778,
"learning_rate": 2.3373385602957595e-06,
"loss": 0.5068016052246094,
"step": 2701
},
{
"epoch": 2.1342812006319116,
"grad_norm": 14.599024263747504,
"learning_rate": 2.3334498750496913e-06,
"loss": 0.5701960325241089,
"step": 2702
},
{
"epoch": 2.1350710900473935,
"grad_norm": 8.48416563722893,
"learning_rate": 2.3295634425139502e-06,
"loss": 0.7108640074729919,
"step": 2703
},
{
"epoch": 2.1358609794628753,
"grad_norm": 8.64074974867785,
"learning_rate": 2.3256792659718065e-06,
"loss": 0.5362042188644409,
"step": 2704
},
{
"epoch": 2.136650868878357,
"grad_norm": 13.030143439241625,
"learning_rate": 2.321797348704625e-06,
"loss": 0.6571926474571228,
"step": 2705
},
{
"epoch": 2.137440758293839,
"grad_norm": 11.69730983416275,
"learning_rate": 2.317917693991863e-06,
"loss": 0.4899098575115204,
"step": 2706
},
{
"epoch": 2.138230647709321,
"grad_norm": 14.65236719972843,
"learning_rate": 2.314040305111065e-06,
"loss": 1.4374269247055054,
"step": 2707
},
{
"epoch": 2.1390205371248023,
"grad_norm": 11.277026389499401,
"learning_rate": 2.310165185337862e-06,
"loss": 0.40772897005081177,
"step": 2708
},
{
"epoch": 2.139810426540284,
"grad_norm": 9.022193712896843,
"learning_rate": 2.3062923379459684e-06,
"loss": 0.2784253656864166,
"step": 2709
},
{
"epoch": 2.140600315955766,
"grad_norm": 9.676192450422299,
"learning_rate": 2.302421766207177e-06,
"loss": 0.34623268246650696,
"step": 2710
},
{
"epoch": 2.141390205371248,
"grad_norm": 8.652801319265459,
"learning_rate": 2.2985534733913623e-06,
"loss": 0.33302298188209534,
"step": 2711
},
{
"epoch": 2.1421800947867298,
"grad_norm": 8.990062562090557,
"learning_rate": 2.2946874627664677e-06,
"loss": 0.44455865025520325,
"step": 2712
},
{
"epoch": 2.1429699842022116,
"grad_norm": 21.04889344511468,
"learning_rate": 2.2908237375985137e-06,
"loss": 0.5782222747802734,
"step": 2713
},
{
"epoch": 2.1437598736176935,
"grad_norm": 9.050405036026557,
"learning_rate": 2.2869623011515874e-06,
"loss": 0.3401952385902405,
"step": 2714
},
{
"epoch": 2.1445497630331753,
"grad_norm": 15.891303126899643,
"learning_rate": 2.283103156687843e-06,
"loss": 0.5884231925010681,
"step": 2715
},
{
"epoch": 2.145339652448657,
"grad_norm": 10.482443238163478,
"learning_rate": 2.2792463074674987e-06,
"loss": 0.6347147226333618,
"step": 2716
},
{
"epoch": 2.146129541864139,
"grad_norm": 8.647383536000094,
"learning_rate": 2.275391756748833e-06,
"loss": 0.5545493960380554,
"step": 2717
},
{
"epoch": 2.146919431279621,
"grad_norm": 12.912399269663396,
"learning_rate": 2.2715395077881837e-06,
"loss": 0.639219343662262,
"step": 2718
},
{
"epoch": 2.147709320695103,
"grad_norm": 14.471210640975691,
"learning_rate": 2.2676895638399427e-06,
"loss": 0.5747156143188477,
"step": 2719
},
{
"epoch": 2.1484992101105846,
"grad_norm": 10.784235307725265,
"learning_rate": 2.2638419281565536e-06,
"loss": 1.0131480693817139,
"step": 2720
},
{
"epoch": 2.1492890995260665,
"grad_norm": 8.991887236619839,
"learning_rate": 2.259996603988518e-06,
"loss": 0.1766696721315384,
"step": 2721
},
{
"epoch": 2.1500789889415484,
"grad_norm": 8.434251974603205,
"learning_rate": 2.256153594584372e-06,
"loss": 0.4502110481262207,
"step": 2722
},
{
"epoch": 2.1508688783570302,
"grad_norm": 9.15347582657087,
"learning_rate": 2.2523129031907047e-06,
"loss": 0.17326998710632324,
"step": 2723
},
{
"epoch": 2.1516587677725116,
"grad_norm": 10.132042927830476,
"learning_rate": 2.248474533052145e-06,
"loss": 0.32104817032814026,
"step": 2724
},
{
"epoch": 2.1524486571879935,
"grad_norm": 22.126808261308526,
"learning_rate": 2.2446384874113586e-06,
"loss": 0.28873202204704285,
"step": 2725
},
{
"epoch": 2.1532385466034754,
"grad_norm": 9.874113810387273,
"learning_rate": 2.2408047695090533e-06,
"loss": 0.9483802318572998,
"step": 2726
},
{
"epoch": 2.154028436018957,
"grad_norm": 8.399149585433827,
"learning_rate": 2.2369733825839663e-06,
"loss": 0.4678645133972168,
"step": 2727
},
{
"epoch": 2.154818325434439,
"grad_norm": 8.149774853170221,
"learning_rate": 2.233144329872863e-06,
"loss": 0.4194965660572052,
"step": 2728
},
{
"epoch": 2.155608214849921,
"grad_norm": 10.007897566804706,
"learning_rate": 2.229317614610539e-06,
"loss": 0.18562570214271545,
"step": 2729
},
{
"epoch": 2.156398104265403,
"grad_norm": 11.593570889438942,
"learning_rate": 2.2254932400298195e-06,
"loss": 0.2921253442764282,
"step": 2730
},
{
"epoch": 2.1571879936808847,
"grad_norm": 9.592986733727226,
"learning_rate": 2.2216712093615474e-06,
"loss": 0.5794805288314819,
"step": 2731
},
{
"epoch": 2.1579778830963665,
"grad_norm": 18.709687315043503,
"learning_rate": 2.2178515258345885e-06,
"loss": 0.7258075475692749,
"step": 2732
},
{
"epoch": 2.1587677725118484,
"grad_norm": 15.272801270549794,
"learning_rate": 2.2140341926758186e-06,
"loss": 0.622929036617279,
"step": 2733
},
{
"epoch": 2.1595576619273302,
"grad_norm": 12.017018764788721,
"learning_rate": 2.2102192131101386e-06,
"loss": 0.656425952911377,
"step": 2734
},
{
"epoch": 2.160347551342812,
"grad_norm": 13.837307556949694,
"learning_rate": 2.2064065903604537e-06,
"loss": 0.3320290744304657,
"step": 2735
},
{
"epoch": 2.161137440758294,
"grad_norm": 9.400730803116534,
"learning_rate": 2.2025963276476814e-06,
"loss": 0.3217647075653076,
"step": 2736
},
{
"epoch": 2.161927330173776,
"grad_norm": 13.942726496241551,
"learning_rate": 2.1987884281907425e-06,
"loss": 0.5152993202209473,
"step": 2737
},
{
"epoch": 2.1627172195892577,
"grad_norm": 9.822554670458867,
"learning_rate": 2.1949828952065643e-06,
"loss": 0.23020845651626587,
"step": 2738
},
{
"epoch": 2.1635071090047395,
"grad_norm": 12.584615682614533,
"learning_rate": 2.191179731910073e-06,
"loss": 0.4029275178909302,
"step": 2739
},
{
"epoch": 2.1642969984202214,
"grad_norm": 11.17103259387213,
"learning_rate": 2.1873789415141932e-06,
"loss": 0.23068757355213165,
"step": 2740
},
{
"epoch": 2.165086887835703,
"grad_norm": 12.389468057157949,
"learning_rate": 2.183580527229846e-06,
"loss": 0.39349129796028137,
"step": 2741
},
{
"epoch": 2.1658767772511847,
"grad_norm": 8.11758263289566,
"learning_rate": 2.1797844922659437e-06,
"loss": 0.2747986912727356,
"step": 2742
},
{
"epoch": 2.1666666666666665,
"grad_norm": 12.530738023381977,
"learning_rate": 2.1759908398293896e-06,
"loss": 1.14316987991333,
"step": 2743
},
{
"epoch": 2.1674565560821484,
"grad_norm": 14.915166154269375,
"learning_rate": 2.1721995731250726e-06,
"loss": 0.517180323600769,
"step": 2744
},
{
"epoch": 2.1682464454976302,
"grad_norm": 13.701952179268636,
"learning_rate": 2.1684106953558693e-06,
"loss": 0.25364458560943604,
"step": 2745
},
{
"epoch": 2.169036334913112,
"grad_norm": 9.011375942635011,
"learning_rate": 2.1646242097226343e-06,
"loss": 0.34542322158813477,
"step": 2746
},
{
"epoch": 2.169826224328594,
"grad_norm": 9.781858990537309,
"learning_rate": 2.1608401194242035e-06,
"loss": 0.24171094596385956,
"step": 2747
},
{
"epoch": 2.170616113744076,
"grad_norm": 8.982304890802853,
"learning_rate": 2.1570584276573896e-06,
"loss": 0.609094500541687,
"step": 2748
},
{
"epoch": 2.1714060031595577,
"grad_norm": 9.10557044009763,
"learning_rate": 2.1532791376169778e-06,
"loss": 0.2869632840156555,
"step": 2749
},
{
"epoch": 2.1721958925750395,
"grad_norm": 9.939923288953379,
"learning_rate": 2.1495022524957244e-06,
"loss": 0.39264094829559326,
"step": 2750
},
{
"epoch": 2.1729857819905214,
"grad_norm": 9.585598670064135,
"learning_rate": 2.1457277754843547e-06,
"loss": 0.18833236396312714,
"step": 2751
},
{
"epoch": 2.1737756714060033,
"grad_norm": 11.521955780403779,
"learning_rate": 2.14195570977156e-06,
"loss": 0.9987523555755615,
"step": 2752
},
{
"epoch": 2.174565560821485,
"grad_norm": 19.37585048155451,
"learning_rate": 2.1381860585439906e-06,
"loss": 0.46828562021255493,
"step": 2753
},
{
"epoch": 2.175355450236967,
"grad_norm": 11.086157538641421,
"learning_rate": 2.134418824986267e-06,
"loss": 0.4600408673286438,
"step": 2754
},
{
"epoch": 2.176145339652449,
"grad_norm": 7.742526132754247,
"learning_rate": 2.1306540122809543e-06,
"loss": 0.27102628350257874,
"step": 2755
},
{
"epoch": 2.1769352290679307,
"grad_norm": 19.82779184404181,
"learning_rate": 2.1268916236085814e-06,
"loss": 0.5771945714950562,
"step": 2756
},
{
"epoch": 2.177725118483412,
"grad_norm": 14.081906680861074,
"learning_rate": 2.1231316621476234e-06,
"loss": 0.3828513026237488,
"step": 2757
},
{
"epoch": 2.178515007898894,
"grad_norm": 12.677665354894604,
"learning_rate": 2.1193741310745125e-06,
"loss": 1.0411120653152466,
"step": 2758
},
{
"epoch": 2.179304897314376,
"grad_norm": 8.072309116801922,
"learning_rate": 2.115619033563624e-06,
"loss": 0.1945279985666275,
"step": 2759
},
{
"epoch": 2.1800947867298577,
"grad_norm": 13.107958264184125,
"learning_rate": 2.111866372787272e-06,
"loss": 0.32704418897628784,
"step": 2760
},
{
"epoch": 2.1808846761453395,
"grad_norm": 10.8975500647945,
"learning_rate": 2.1081161519157168e-06,
"loss": 0.4165365695953369,
"step": 2761
},
{
"epoch": 2.1816745655608214,
"grad_norm": 15.891308461676624,
"learning_rate": 2.104368374117161e-06,
"loss": 0.7583081722259521,
"step": 2762
},
{
"epoch": 2.1824644549763033,
"grad_norm": 12.34192529563348,
"learning_rate": 2.100623042557739e-06,
"loss": 0.39596104621887207,
"step": 2763
},
{
"epoch": 2.183254344391785,
"grad_norm": 12.790970766784817,
"learning_rate": 2.0968801604015176e-06,
"loss": 0.33872219920158386,
"step": 2764
},
{
"epoch": 2.184044233807267,
"grad_norm": 8.883238538058585,
"learning_rate": 2.0931397308104986e-06,
"loss": 0.48357391357421875,
"step": 2765
},
{
"epoch": 2.184834123222749,
"grad_norm": 10.033364618014366,
"learning_rate": 2.0894017569446034e-06,
"loss": 0.2329304814338684,
"step": 2766
},
{
"epoch": 2.1856240126382307,
"grad_norm": 10.963546856917345,
"learning_rate": 2.0856662419616908e-06,
"loss": 0.38120099902153015,
"step": 2767
},
{
"epoch": 2.1864139020537126,
"grad_norm": 8.659173313868209,
"learning_rate": 2.081933189017533e-06,
"loss": 0.18225795030593872,
"step": 2768
},
{
"epoch": 2.1872037914691944,
"grad_norm": 12.579078453532555,
"learning_rate": 2.0782026012658268e-06,
"loss": 0.6677108407020569,
"step": 2769
},
{
"epoch": 2.1879936808846763,
"grad_norm": 10.496831262975576,
"learning_rate": 2.074474481858185e-06,
"loss": 0.7098184823989868,
"step": 2770
},
{
"epoch": 2.188783570300158,
"grad_norm": 15.314620074905251,
"learning_rate": 2.0707488339441338e-06,
"loss": 0.8517345786094666,
"step": 2771
},
{
"epoch": 2.18957345971564,
"grad_norm": 11.322801114701715,
"learning_rate": 2.067025660671114e-06,
"loss": 0.15628886222839355,
"step": 2772
},
{
"epoch": 2.190363349131122,
"grad_norm": 10.857872805242897,
"learning_rate": 2.0633049651844744e-06,
"loss": 0.23775914311408997,
"step": 2773
},
{
"epoch": 2.1911532385466037,
"grad_norm": 17.367672664117908,
"learning_rate": 2.0595867506274707e-06,
"loss": 1.0561261177062988,
"step": 2774
},
{
"epoch": 2.191943127962085,
"grad_norm": 10.613741381941095,
"learning_rate": 2.055871020141263e-06,
"loss": 0.7364283204078674,
"step": 2775
},
{
"epoch": 2.192733017377567,
"grad_norm": 11.141296214016752,
"learning_rate": 2.052157776864912e-06,
"loss": 0.5597008466720581,
"step": 2776
},
{
"epoch": 2.193522906793049,
"grad_norm": 16.153450357978947,
"learning_rate": 2.0484470239353786e-06,
"loss": 0.48830780386924744,
"step": 2777
},
{
"epoch": 2.1943127962085307,
"grad_norm": 13.351249645259788,
"learning_rate": 2.044738764487519e-06,
"loss": 1.164239764213562,
"step": 2778
},
{
"epoch": 2.1951026856240126,
"grad_norm": 9.298702632023925,
"learning_rate": 2.0410330016540824e-06,
"loss": 0.6914669275283813,
"step": 2779
},
{
"epoch": 2.1958925750394944,
"grad_norm": 19.26030750116404,
"learning_rate": 2.0373297385657105e-06,
"loss": 0.9622442722320557,
"step": 2780
},
{
"epoch": 2.1966824644549763,
"grad_norm": 10.373277943662284,
"learning_rate": 2.0336289783509306e-06,
"loss": 0.2636718153953552,
"step": 2781
},
{
"epoch": 2.197472353870458,
"grad_norm": 13.914270193422176,
"learning_rate": 2.0299307241361587e-06,
"loss": 0.5753885507583618,
"step": 2782
},
{
"epoch": 2.19826224328594,
"grad_norm": 11.117981102297946,
"learning_rate": 2.0262349790456908e-06,
"loss": 0.3335786461830139,
"step": 2783
},
{
"epoch": 2.199052132701422,
"grad_norm": 12.558104305781397,
"learning_rate": 2.0225417462017054e-06,
"loss": 0.348050594329834,
"step": 2784
},
{
"epoch": 2.1998420221169037,
"grad_norm": 11.355102467965308,
"learning_rate": 2.0188510287242564e-06,
"loss": 1.1008378267288208,
"step": 2785
},
{
"epoch": 2.2006319115323856,
"grad_norm": 7.680728739198105,
"learning_rate": 2.0151628297312765e-06,
"loss": 0.573356568813324,
"step": 2786
},
{
"epoch": 2.2014218009478674,
"grad_norm": 7.8089582056318525,
"learning_rate": 2.0114771523385682e-06,
"loss": 0.12702372670173645,
"step": 2787
},
{
"epoch": 2.2022116903633493,
"grad_norm": 9.904120419329733,
"learning_rate": 2.0077939996598023e-06,
"loss": 0.33679264783859253,
"step": 2788
},
{
"epoch": 2.203001579778831,
"grad_norm": 9.182229422297102,
"learning_rate": 2.004113374806516e-06,
"loss": 0.6996742486953735,
"step": 2789
},
{
"epoch": 2.2037914691943126,
"grad_norm": 19.940382025619883,
"learning_rate": 2.0004352808881183e-06,
"loss": 0.4126805067062378,
"step": 2790
},
{
"epoch": 2.2045813586097944,
"grad_norm": 8.37724044555483,
"learning_rate": 1.996759721011873e-06,
"loss": 0.43272721767425537,
"step": 2791
},
{
"epoch": 2.2053712480252763,
"grad_norm": 10.772306642802901,
"learning_rate": 1.9930866982829067e-06,
"loss": 0.31847819685935974,
"step": 2792
},
{
"epoch": 2.206161137440758,
"grad_norm": 8.767557779912506,
"learning_rate": 1.9894162158041972e-06,
"loss": 0.3612692654132843,
"step": 2793
},
{
"epoch": 2.20695102685624,
"grad_norm": 7.606502171835215,
"learning_rate": 1.9857482766765812e-06,
"loss": 0.21649795770645142,
"step": 2794
},
{
"epoch": 2.207740916271722,
"grad_norm": 13.255090811561212,
"learning_rate": 1.9820828839987483e-06,
"loss": 0.3744838535785675,
"step": 2795
},
{
"epoch": 2.2085308056872037,
"grad_norm": 8.919411850780316,
"learning_rate": 1.9784200408672332e-06,
"loss": 0.26096653938293457,
"step": 2796
},
{
"epoch": 2.2093206951026856,
"grad_norm": 9.147600079448718,
"learning_rate": 1.9747597503764177e-06,
"loss": 0.47567954659461975,
"step": 2797
},
{
"epoch": 2.2101105845181674,
"grad_norm": 9.819631053417764,
"learning_rate": 1.9711020156185266e-06,
"loss": 0.4913148880004883,
"step": 2798
},
{
"epoch": 2.2109004739336493,
"grad_norm": 11.328664528684907,
"learning_rate": 1.9674468396836273e-06,
"loss": 0.6570562720298767,
"step": 2799
},
{
"epoch": 2.211690363349131,
"grad_norm": 8.021273430434308,
"learning_rate": 1.963794225659624e-06,
"loss": 0.5904330015182495,
"step": 2800
},
{
"epoch": 2.212480252764613,
"grad_norm": 8.57411699896354,
"learning_rate": 1.960144176632257e-06,
"loss": 0.6471877098083496,
"step": 2801
},
{
"epoch": 2.213270142180095,
"grad_norm": 11.950981978395122,
"learning_rate": 1.9564966956850995e-06,
"loss": 0.5829299688339233,
"step": 2802
},
{
"epoch": 2.2140600315955767,
"grad_norm": 8.451163149947735,
"learning_rate": 1.952851785899556e-06,
"loss": 0.35751497745513916,
"step": 2803
},
{
"epoch": 2.2148499210110586,
"grad_norm": 10.138877718616586,
"learning_rate": 1.949209450354858e-06,
"loss": 0.4311722218990326,
"step": 2804
},
{
"epoch": 2.2156398104265405,
"grad_norm": 12.683217627573496,
"learning_rate": 1.9455696921280627e-06,
"loss": 0.7441365718841553,
"step": 2805
},
{
"epoch": 2.2164296998420223,
"grad_norm": 11.694728091873205,
"learning_rate": 1.941932514294049e-06,
"loss": 0.49206316471099854,
"step": 2806
},
{
"epoch": 2.217219589257504,
"grad_norm": 11.855236520924457,
"learning_rate": 1.938297919925518e-06,
"loss": 0.5097864866256714,
"step": 2807
},
{
"epoch": 2.2180094786729856,
"grad_norm": 12.318836475278742,
"learning_rate": 1.9346659120929867e-06,
"loss": 0.6787229776382446,
"step": 2808
},
{
"epoch": 2.2187993680884674,
"grad_norm": 11.391591651407479,
"learning_rate": 1.9310364938647863e-06,
"loss": 0.3987847864627838,
"step": 2809
},
{
"epoch": 2.2195892575039493,
"grad_norm": 10.502147605835487,
"learning_rate": 1.9274096683070625e-06,
"loss": 0.5808389782905579,
"step": 2810
},
{
"epoch": 2.220379146919431,
"grad_norm": 15.889506259074903,
"learning_rate": 1.9237854384837685e-06,
"loss": 0.6458317041397095,
"step": 2811
},
{
"epoch": 2.221169036334913,
"grad_norm": 10.72441475075481,
"learning_rate": 1.920163807456666e-06,
"loss": 0.22764013707637787,
"step": 2812
},
{
"epoch": 2.221958925750395,
"grad_norm": 9.406804023326472,
"learning_rate": 1.916544778285319e-06,
"loss": 0.3760148286819458,
"step": 2813
},
{
"epoch": 2.2227488151658767,
"grad_norm": 12.999087321799392,
"learning_rate": 1.912928354027101e-06,
"loss": 0.9230769872665405,
"step": 2814
},
{
"epoch": 2.2235387045813586,
"grad_norm": 5.261146266935435,
"learning_rate": 1.9093145377371734e-06,
"loss": 0.22567349672317505,
"step": 2815
},
{
"epoch": 2.2243285939968405,
"grad_norm": 9.418952716249674,
"learning_rate": 1.9057033324685014e-06,
"loss": 0.3293178677558899,
"step": 2816
},
{
"epoch": 2.2251184834123223,
"grad_norm": 7.627242194894803,
"learning_rate": 1.9020947412718428e-06,
"loss": 0.3232775926589966,
"step": 2817
},
{
"epoch": 2.225908372827804,
"grad_norm": 6.888057264096037,
"learning_rate": 1.8984887671957492e-06,
"loss": 0.3350941240787506,
"step": 2818
},
{
"epoch": 2.226698262243286,
"grad_norm": 9.593533038856789,
"learning_rate": 1.8948854132865601e-06,
"loss": 0.4311079978942871,
"step": 2819
},
{
"epoch": 2.227488151658768,
"grad_norm": 13.454745411900346,
"learning_rate": 1.8912846825883973e-06,
"loss": 1.0597552061080933,
"step": 2820
},
{
"epoch": 2.2282780410742498,
"grad_norm": 11.058976812584511,
"learning_rate": 1.8876865781431714e-06,
"loss": 0.6466134786605835,
"step": 2821
},
{
"epoch": 2.2290679304897316,
"grad_norm": 9.06033016129208,
"learning_rate": 1.8840911029905718e-06,
"loss": 0.2965662479400635,
"step": 2822
},
{
"epoch": 2.229857819905213,
"grad_norm": 10.351708737498239,
"learning_rate": 1.8804982601680705e-06,
"loss": 0.367464542388916,
"step": 2823
},
{
"epoch": 2.230647709320695,
"grad_norm": 9.6500211642715,
"learning_rate": 1.8769080527109113e-06,
"loss": 0.48031240701675415,
"step": 2824
},
{
"epoch": 2.2314375987361768,
"grad_norm": 10.833034804804822,
"learning_rate": 1.8733204836521156e-06,
"loss": 0.40878814458847046,
"step": 2825
},
{
"epoch": 2.2322274881516586,
"grad_norm": 7.26374801360143,
"learning_rate": 1.8697355560224679e-06,
"loss": 0.5350329279899597,
"step": 2826
},
{
"epoch": 2.2330173775671405,
"grad_norm": 7.132708625630138,
"learning_rate": 1.8661532728505315e-06,
"loss": 0.12886342406272888,
"step": 2827
},
{
"epoch": 2.2338072669826223,
"grad_norm": 17.58232884596969,
"learning_rate": 1.8625736371626307e-06,
"loss": 0.37391525506973267,
"step": 2828
},
{
"epoch": 2.234597156398104,
"grad_norm": 9.783002261543379,
"learning_rate": 1.8589966519828523e-06,
"loss": 0.47310611605644226,
"step": 2829
},
{
"epoch": 2.235387045813586,
"grad_norm": 6.751501626334738,
"learning_rate": 1.8554223203330473e-06,
"loss": 0.15924470126628876,
"step": 2830
},
{
"epoch": 2.236176935229068,
"grad_norm": 11.510963325216071,
"learning_rate": 1.8518506452328182e-06,
"loss": 0.589087188243866,
"step": 2831
},
{
"epoch": 2.2369668246445498,
"grad_norm": 11.300997192020064,
"learning_rate": 1.848281629699532e-06,
"loss": 0.29100021719932556,
"step": 2832
},
{
"epoch": 2.2377567140600316,
"grad_norm": 11.201992942679986,
"learning_rate": 1.8447152767483039e-06,
"loss": 0.4221479296684265,
"step": 2833
},
{
"epoch": 2.2385466034755135,
"grad_norm": 13.129885124330743,
"learning_rate": 1.8411515893920002e-06,
"loss": 0.5831780433654785,
"step": 2834
},
{
"epoch": 2.2393364928909953,
"grad_norm": 8.6407107464604,
"learning_rate": 1.8375905706412362e-06,
"loss": 0.1891106814146042,
"step": 2835
},
{
"epoch": 2.240126382306477,
"grad_norm": 11.674843768645822,
"learning_rate": 1.834032223504371e-06,
"loss": 0.7293417453765869,
"step": 2836
},
{
"epoch": 2.240916271721959,
"grad_norm": 13.500251067265657,
"learning_rate": 1.83047655098751e-06,
"loss": 0.6112027168273926,
"step": 2837
},
{
"epoch": 2.241706161137441,
"grad_norm": 10.949569000417599,
"learning_rate": 1.8269235560944954e-06,
"loss": 0.7212045192718506,
"step": 2838
},
{
"epoch": 2.242496050552923,
"grad_norm": 15.306834691971645,
"learning_rate": 1.823373241826909e-06,
"loss": 0.3620745539665222,
"step": 2839
},
{
"epoch": 2.2432859399684046,
"grad_norm": 11.812488548682289,
"learning_rate": 1.819825611184069e-06,
"loss": 0.6836791038513184,
"step": 2840
},
{
"epoch": 2.244075829383886,
"grad_norm": 12.12932684339931,
"learning_rate": 1.8162806671630252e-06,
"loss": 0.6339531540870667,
"step": 2841
},
{
"epoch": 2.244865718799368,
"grad_norm": 17.039607010297708,
"learning_rate": 1.8127384127585574e-06,
"loss": 0.6890298128128052,
"step": 2842
},
{
"epoch": 2.2456556082148498,
"grad_norm": 9.659644016427855,
"learning_rate": 1.8091988509631741e-06,
"loss": 0.7656424641609192,
"step": 2843
},
{
"epoch": 2.2464454976303316,
"grad_norm": 11.125303849970079,
"learning_rate": 1.8056619847671087e-06,
"loss": 0.4176521897315979,
"step": 2844
},
{
"epoch": 2.2472353870458135,
"grad_norm": 10.295484759083562,
"learning_rate": 1.8021278171583163e-06,
"loss": 0.4512486457824707,
"step": 2845
},
{
"epoch": 2.2480252764612954,
"grad_norm": 14.130813395934341,
"learning_rate": 1.7985963511224757e-06,
"loss": 0.2794567346572876,
"step": 2846
},
{
"epoch": 2.248815165876777,
"grad_norm": 7.986427853036559,
"learning_rate": 1.7950675896429815e-06,
"loss": 0.5515426993370056,
"step": 2847
},
{
"epoch": 2.249605055292259,
"grad_norm": 10.914377369413538,
"learning_rate": 1.7915415357009398e-06,
"loss": 0.5858911275863647,
"step": 2848
},
{
"epoch": 2.250394944707741,
"grad_norm": 9.057658354905428,
"learning_rate": 1.7880181922751743e-06,
"loss": 0.4116283357143402,
"step": 2849
},
{
"epoch": 2.251184834123223,
"grad_norm": 7.630261787099066,
"learning_rate": 1.7844975623422151e-06,
"loss": 0.11056342720985413,
"step": 2850
},
{
"epoch": 2.2519747235387046,
"grad_norm": 10.503693991312414,
"learning_rate": 1.7809796488763058e-06,
"loss": 0.3872639834880829,
"step": 2851
},
{
"epoch": 2.2527646129541865,
"grad_norm": 12.035802077065455,
"learning_rate": 1.7774644548493908e-06,
"loss": 0.6129223108291626,
"step": 2852
},
{
"epoch": 2.2535545023696684,
"grad_norm": 9.46607219708113,
"learning_rate": 1.7739519832311147e-06,
"loss": 0.7763924598693848,
"step": 2853
},
{
"epoch": 2.2543443917851502,
"grad_norm": 6.681381059967548,
"learning_rate": 1.770442236988824e-06,
"loss": 0.14335396885871887,
"step": 2854
},
{
"epoch": 2.255134281200632,
"grad_norm": 18.45511369704355,
"learning_rate": 1.7669352190875687e-06,
"loss": 0.8193086385726929,
"step": 2855
},
{
"epoch": 2.2559241706161135,
"grad_norm": 13.554836834181051,
"learning_rate": 1.7634309324900872e-06,
"loss": 0.43514060974121094,
"step": 2856
},
{
"epoch": 2.2567140600315954,
"grad_norm": 7.454792364779305,
"learning_rate": 1.7599293801568114e-06,
"loss": 0.2814521789550781,
"step": 2857
},
{
"epoch": 2.257503949447077,
"grad_norm": 10.368313203853804,
"learning_rate": 1.7564305650458662e-06,
"loss": 0.41086602210998535,
"step": 2858
},
{
"epoch": 2.258293838862559,
"grad_norm": 13.204982194940747,
"learning_rate": 1.7529344901130579e-06,
"loss": 0.8798295259475708,
"step": 2859
},
{
"epoch": 2.259083728278041,
"grad_norm": 7.7989794475969925,
"learning_rate": 1.749441158311886e-06,
"loss": 0.5868876576423645,
"step": 2860
},
{
"epoch": 2.259873617693523,
"grad_norm": 11.119432491809754,
"learning_rate": 1.7459505725935282e-06,
"loss": 0.737459123134613,
"step": 2861
},
{
"epoch": 2.2606635071090047,
"grad_norm": 8.550440775085379,
"learning_rate": 1.7424627359068418e-06,
"loss": 0.5273802280426025,
"step": 2862
},
{
"epoch": 2.2614533965244865,
"grad_norm": 17.591370161982017,
"learning_rate": 1.738977651198363e-06,
"loss": 0.33475935459136963,
"step": 2863
},
{
"epoch": 2.2622432859399684,
"grad_norm": 18.24717692489189,
"learning_rate": 1.7354953214123033e-06,
"loss": 0.6660683751106262,
"step": 2864
},
{
"epoch": 2.2630331753554502,
"grad_norm": 18.92375097618639,
"learning_rate": 1.732015749490546e-06,
"loss": 0.5045152902603149,
"step": 2865
},
{
"epoch": 2.263823064770932,
"grad_norm": 11.503325523793645,
"learning_rate": 1.7285389383726448e-06,
"loss": 0.4045574367046356,
"step": 2866
},
{
"epoch": 2.264612954186414,
"grad_norm": 6.49265889702697,
"learning_rate": 1.7250648909958218e-06,
"loss": 0.15654566884040833,
"step": 2867
},
{
"epoch": 2.265402843601896,
"grad_norm": 21.090448949983475,
"learning_rate": 1.7215936102949626e-06,
"loss": 0.48715391755104065,
"step": 2868
},
{
"epoch": 2.2661927330173777,
"grad_norm": 16.416122390825365,
"learning_rate": 1.7181250992026177e-06,
"loss": 0.43794485926628113,
"step": 2869
},
{
"epoch": 2.2669826224328595,
"grad_norm": 9.994750391150157,
"learning_rate": 1.7146593606489958e-06,
"loss": 0.24812400341033936,
"step": 2870
},
{
"epoch": 2.2677725118483414,
"grad_norm": 12.269825935627136,
"learning_rate": 1.7111963975619644e-06,
"loss": 0.3631921410560608,
"step": 2871
},
{
"epoch": 2.2685624012638232,
"grad_norm": 9.202619856134907,
"learning_rate": 1.7077362128670471e-06,
"loss": 0.402981162071228,
"step": 2872
},
{
"epoch": 2.269352290679305,
"grad_norm": 15.450749001959284,
"learning_rate": 1.7042788094874162e-06,
"loss": 0.4597552418708801,
"step": 2873
},
{
"epoch": 2.270142180094787,
"grad_norm": 14.523705053291398,
"learning_rate": 1.7008241903439032e-06,
"loss": 0.8005387783050537,
"step": 2874
},
{
"epoch": 2.2709320695102684,
"grad_norm": 15.590177412312109,
"learning_rate": 1.6973723583549773e-06,
"loss": 0.9203214645385742,
"step": 2875
},
{
"epoch": 2.2717219589257502,
"grad_norm": 10.511158290122776,
"learning_rate": 1.693923316436758e-06,
"loss": 0.4327046573162079,
"step": 2876
},
{
"epoch": 2.272511848341232,
"grad_norm": 8.438221638698291,
"learning_rate": 1.690477067503008e-06,
"loss": 0.5756062269210815,
"step": 2877
},
{
"epoch": 2.273301737756714,
"grad_norm": 10.785045606484534,
"learning_rate": 1.6870336144651279e-06,
"loss": 0.4346384108066559,
"step": 2878
},
{
"epoch": 2.274091627172196,
"grad_norm": 23.099471736106224,
"learning_rate": 1.6835929602321632e-06,
"loss": 1.036636233329773,
"step": 2879
},
{
"epoch": 2.2748815165876777,
"grad_norm": 9.814952316628542,
"learning_rate": 1.6801551077107846e-06,
"loss": 0.35150665044784546,
"step": 2880
},
{
"epoch": 2.2756714060031595,
"grad_norm": 8.217909844120667,
"learning_rate": 1.676720059805304e-06,
"loss": 0.39682185649871826,
"step": 2881
},
{
"epoch": 2.2764612954186414,
"grad_norm": 10.897341078417234,
"learning_rate": 1.6732878194176583e-06,
"loss": 0.7163082361221313,
"step": 2882
},
{
"epoch": 2.2772511848341233,
"grad_norm": 9.957341822543633,
"learning_rate": 1.6698583894474191e-06,
"loss": 0.30133479833602905,
"step": 2883
},
{
"epoch": 2.278041074249605,
"grad_norm": 13.035107378741438,
"learning_rate": 1.6664317727917783e-06,
"loss": 0.35737180709838867,
"step": 2884
},
{
"epoch": 2.278830963665087,
"grad_norm": 7.354777374068423,
"learning_rate": 1.6630079723455555e-06,
"loss": 0.44452938437461853,
"step": 2885
},
{
"epoch": 2.279620853080569,
"grad_norm": 15.178155573408157,
"learning_rate": 1.6595869910011847e-06,
"loss": 0.4684889614582062,
"step": 2886
},
{
"epoch": 2.2804107424960507,
"grad_norm": 13.101785923414612,
"learning_rate": 1.6561688316487218e-06,
"loss": 0.5309923887252808,
"step": 2887
},
{
"epoch": 2.2812006319115326,
"grad_norm": 18.864692186332565,
"learning_rate": 1.652753497175843e-06,
"loss": 0.44382137060165405,
"step": 2888
},
{
"epoch": 2.2819905213270144,
"grad_norm": 11.720692846924454,
"learning_rate": 1.649340990467832e-06,
"loss": 0.5143547058105469,
"step": 2889
},
{
"epoch": 2.282780410742496,
"grad_norm": 7.264023340638442,
"learning_rate": 1.6459313144075879e-06,
"loss": 0.5263423323631287,
"step": 2890
},
{
"epoch": 2.2835703001579777,
"grad_norm": 10.926449502075325,
"learning_rate": 1.642524471875611e-06,
"loss": 0.39519965648651123,
"step": 2891
},
{
"epoch": 2.2843601895734595,
"grad_norm": 6.753518451586659,
"learning_rate": 1.6391204657500175e-06,
"loss": 0.2852955758571625,
"step": 2892
},
{
"epoch": 2.2851500789889414,
"grad_norm": 17.425208641245916,
"learning_rate": 1.6357192989065224e-06,
"loss": 0.6628504991531372,
"step": 2893
},
{
"epoch": 2.2859399684044233,
"grad_norm": 8.821318692301833,
"learning_rate": 1.632320974218442e-06,
"loss": 0.3219633102416992,
"step": 2894
},
{
"epoch": 2.286729857819905,
"grad_norm": 9.0269356351622,
"learning_rate": 1.6289254945566924e-06,
"loss": 0.4846106171607971,
"step": 2895
},
{
"epoch": 2.287519747235387,
"grad_norm": 13.754083021538255,
"learning_rate": 1.625532862789786e-06,
"loss": 0.6081640720367432,
"step": 2896
},
{
"epoch": 2.288309636650869,
"grad_norm": 6.98421162588746,
"learning_rate": 1.622143081783829e-06,
"loss": 0.3637077510356903,
"step": 2897
},
{
"epoch": 2.2890995260663507,
"grad_norm": 8.83511177282514,
"learning_rate": 1.6187561544025198e-06,
"loss": 0.9372393488883972,
"step": 2898
},
{
"epoch": 2.2898894154818326,
"grad_norm": 14.596564979975735,
"learning_rate": 1.6153720835071456e-06,
"loss": 0.5858408212661743,
"step": 2899
},
{
"epoch": 2.2906793048973144,
"grad_norm": 8.954576114923759,
"learning_rate": 1.6119908719565808e-06,
"loss": 0.6942223310470581,
"step": 2900
},
{
"epoch": 2.2914691943127963,
"grad_norm": 4.994809407224409,
"learning_rate": 1.6086125226072841e-06,
"loss": 0.10130809992551804,
"step": 2901
},
{
"epoch": 2.292259083728278,
"grad_norm": 13.657220031311276,
"learning_rate": 1.6052370383132959e-06,
"loss": 0.8644411563873291,
"step": 2902
},
{
"epoch": 2.29304897314376,
"grad_norm": 8.017431818886724,
"learning_rate": 1.601864421926237e-06,
"loss": 0.2293100655078888,
"step": 2903
},
{
"epoch": 2.293838862559242,
"grad_norm": 8.496986168821188,
"learning_rate": 1.5984946762953047e-06,
"loss": 0.29841262102127075,
"step": 2904
},
{
"epoch": 2.2946287519747237,
"grad_norm": 11.503821635042993,
"learning_rate": 1.5951278042672703e-06,
"loss": 0.2735556662082672,
"step": 2905
},
{
"epoch": 2.2954186413902056,
"grad_norm": 12.231463222308765,
"learning_rate": 1.5917638086864778e-06,
"loss": 0.22586968541145325,
"step": 2906
},
{
"epoch": 2.2962085308056874,
"grad_norm": 15.045555109566415,
"learning_rate": 1.5884026923948465e-06,
"loss": 0.6831101179122925,
"step": 2907
},
{
"epoch": 2.296998420221169,
"grad_norm": 9.987448369916713,
"learning_rate": 1.585044458231853e-06,
"loss": 0.4950708746910095,
"step": 2908
},
{
"epoch": 2.2977883096366507,
"grad_norm": 12.921655770502397,
"learning_rate": 1.5816891090345465e-06,
"loss": 0.4142148494720459,
"step": 2909
},
{
"epoch": 2.2985781990521326,
"grad_norm": 11.818337429892228,
"learning_rate": 1.5783366476375355e-06,
"loss": 0.7360016703605652,
"step": 2910
},
{
"epoch": 2.2993680884676144,
"grad_norm": 11.946060281453493,
"learning_rate": 1.5749870768729942e-06,
"loss": 0.43478429317474365,
"step": 2911
},
{
"epoch": 2.3001579778830963,
"grad_norm": 12.943633330633444,
"learning_rate": 1.5716403995706504e-06,
"loss": 0.3442850112915039,
"step": 2912
},
{
"epoch": 2.300947867298578,
"grad_norm": 13.412914057941535,
"learning_rate": 1.5682966185577846e-06,
"loss": 0.4648604989051819,
"step": 2913
},
{
"epoch": 2.30173775671406,
"grad_norm": 7.870032588866806,
"learning_rate": 1.564955736659236e-06,
"loss": 0.25151491165161133,
"step": 2914
},
{
"epoch": 2.302527646129542,
"grad_norm": 12.706146933959786,
"learning_rate": 1.561617756697391e-06,
"loss": 0.7445260882377625,
"step": 2915
},
{
"epoch": 2.3033175355450237,
"grad_norm": 14.267775682697732,
"learning_rate": 1.5582826814921898e-06,
"loss": 0.4260343313217163,
"step": 2916
},
{
"epoch": 2.3041074249605056,
"grad_norm": 12.34278465900189,
"learning_rate": 1.5549505138611126e-06,
"loss": 0.27285605669021606,
"step": 2917
},
{
"epoch": 2.3048973143759874,
"grad_norm": 8.37869237695817,
"learning_rate": 1.5516212566191874e-06,
"loss": 0.24186520278453827,
"step": 2918
},
{
"epoch": 2.3056872037914693,
"grad_norm": 16.50332542092082,
"learning_rate": 1.5482949125789765e-06,
"loss": 0.7350625395774841,
"step": 2919
},
{
"epoch": 2.306477093206951,
"grad_norm": 9.451218272417687,
"learning_rate": 1.5449714845505919e-06,
"loss": 0.48754340410232544,
"step": 2920
},
{
"epoch": 2.307266982622433,
"grad_norm": 8.596522267263161,
"learning_rate": 1.5416509753416742e-06,
"loss": 0.4277498126029968,
"step": 2921
},
{
"epoch": 2.308056872037915,
"grad_norm": 9.543021614361946,
"learning_rate": 1.5383333877574014e-06,
"loss": 0.7987942099571228,
"step": 2922
},
{
"epoch": 2.3088467614533963,
"grad_norm": 8.956197097997833,
"learning_rate": 1.5350187246004827e-06,
"loss": 0.3484036922454834,
"step": 2923
},
{
"epoch": 2.309636650868878,
"grad_norm": 14.554160691726453,
"learning_rate": 1.5317069886711527e-06,
"loss": 0.262870728969574,
"step": 2924
},
{
"epoch": 2.31042654028436,
"grad_norm": 10.028179610842129,
"learning_rate": 1.5283981827671817e-06,
"loss": 0.4316098093986511,
"step": 2925
},
{
"epoch": 2.311216429699842,
"grad_norm": 8.395938728397983,
"learning_rate": 1.525092309683857e-06,
"loss": 0.29740965366363525,
"step": 2926
},
{
"epoch": 2.3120063191153237,
"grad_norm": 13.787916527860657,
"learning_rate": 1.5217893722139927e-06,
"loss": 0.3893413841724396,
"step": 2927
},
{
"epoch": 2.3127962085308056,
"grad_norm": 9.912191066457186,
"learning_rate": 1.51848937314792e-06,
"loss": 0.32815465331077576,
"step": 2928
},
{
"epoch": 2.3135860979462874,
"grad_norm": 9.266231242165011,
"learning_rate": 1.5151923152734899e-06,
"loss": 0.3238118290901184,
"step": 2929
},
{
"epoch": 2.3143759873617693,
"grad_norm": 13.526154892749354,
"learning_rate": 1.511898201376067e-06,
"loss": 0.47903206944465637,
"step": 2930
},
{
"epoch": 2.315165876777251,
"grad_norm": 7.3178550699031915,
"learning_rate": 1.5086070342385301e-06,
"loss": 0.5913638472557068,
"step": 2931
},
{
"epoch": 2.315955766192733,
"grad_norm": 10.488519945078615,
"learning_rate": 1.5053188166412675e-06,
"loss": 0.4839520752429962,
"step": 2932
},
{
"epoch": 2.316745655608215,
"grad_norm": 8.255353237433672,
"learning_rate": 1.5020335513621765e-06,
"loss": 0.44816941022872925,
"step": 2933
},
{
"epoch": 2.3175355450236967,
"grad_norm": 9.243776086558636,
"learning_rate": 1.4987512411766597e-06,
"loss": 0.2721986472606659,
"step": 2934
},
{
"epoch": 2.3183254344391786,
"grad_norm": 10.746251103777091,
"learning_rate": 1.4954718888576247e-06,
"loss": 0.3570769131183624,
"step": 2935
},
{
"epoch": 2.3191153238546605,
"grad_norm": 8.639831387338281,
"learning_rate": 1.4921954971754783e-06,
"loss": 0.3641059994697571,
"step": 2936
},
{
"epoch": 2.3199052132701423,
"grad_norm": 11.092618448863458,
"learning_rate": 1.4889220688981265e-06,
"loss": 0.2935643196105957,
"step": 2937
},
{
"epoch": 2.320695102685624,
"grad_norm": 12.23607825667408,
"learning_rate": 1.4856516067909715e-06,
"loss": 0.6410992741584778,
"step": 2938
},
{
"epoch": 2.321484992101106,
"grad_norm": 16.623306809469454,
"learning_rate": 1.4823841136169132e-06,
"loss": 1.6279677152633667,
"step": 2939
},
{
"epoch": 2.322274881516588,
"grad_norm": 12.421816386616667,
"learning_rate": 1.479119592136341e-06,
"loss": 0.5754894018173218,
"step": 2940
},
{
"epoch": 2.3230647709320698,
"grad_norm": 8.991638214444569,
"learning_rate": 1.4758580451071303e-06,
"loss": 0.5960466861724854,
"step": 2941
},
{
"epoch": 2.323854660347551,
"grad_norm": 12.162545193825485,
"learning_rate": 1.4725994752846473e-06,
"loss": 0.3044765889644623,
"step": 2942
},
{
"epoch": 2.324644549763033,
"grad_norm": 19.440244075455958,
"learning_rate": 1.4693438854217423e-06,
"loss": 0.34671056270599365,
"step": 2943
},
{
"epoch": 2.325434439178515,
"grad_norm": 11.430703439045205,
"learning_rate": 1.4660912782687508e-06,
"loss": 0.43449515104293823,
"step": 2944
},
{
"epoch": 2.3262243285939967,
"grad_norm": 10.157383176145405,
"learning_rate": 1.4628416565734859e-06,
"loss": 0.3315383791923523,
"step": 2945
},
{
"epoch": 2.3270142180094786,
"grad_norm": 17.3305490576594,
"learning_rate": 1.4595950230812362e-06,
"loss": 0.888778567314148,
"step": 2946
},
{
"epoch": 2.3278041074249605,
"grad_norm": 16.32977159230478,
"learning_rate": 1.4563513805347672e-06,
"loss": 0.8644918203353882,
"step": 2947
},
{
"epoch": 2.3285939968404423,
"grad_norm": 14.638072535644602,
"learning_rate": 1.4531107316743231e-06,
"loss": 0.47349095344543457,
"step": 2948
},
{
"epoch": 2.329383886255924,
"grad_norm": 9.49689056502259,
"learning_rate": 1.4498730792376126e-06,
"loss": 0.5828062295913696,
"step": 2949
},
{
"epoch": 2.330173775671406,
"grad_norm": 11.520314966686117,
"learning_rate": 1.446638425959817e-06,
"loss": 0.3956340551376343,
"step": 2950
},
{
"epoch": 2.330963665086888,
"grad_norm": 8.269907045653584,
"learning_rate": 1.4434067745735792e-06,
"loss": 0.2273710072040558,
"step": 2951
},
{
"epoch": 2.3317535545023698,
"grad_norm": 19.223537730273925,
"learning_rate": 1.4401781278090077e-06,
"loss": 0.2646605968475342,
"step": 2952
},
{
"epoch": 2.3325434439178516,
"grad_norm": 7.308582717188896,
"learning_rate": 1.436952488393678e-06,
"loss": 0.25829195976257324,
"step": 2953
},
{
"epoch": 2.3333333333333335,
"grad_norm": 9.018243729685395,
"learning_rate": 1.4337298590526193e-06,
"loss": 0.6489487290382385,
"step": 2954
},
{
"epoch": 2.3341232227488153,
"grad_norm": 11.471038810226908,
"learning_rate": 1.4305102425083195e-06,
"loss": 0.23926126956939697,
"step": 2955
},
{
"epoch": 2.3349131121642968,
"grad_norm": 8.516369253631089,
"learning_rate": 1.4272936414807215e-06,
"loss": 0.7570828199386597,
"step": 2956
},
{
"epoch": 2.3357030015797786,
"grad_norm": 9.3142967522863,
"learning_rate": 1.4240800586872194e-06,
"loss": 0.9574933052062988,
"step": 2957
},
{
"epoch": 2.3364928909952605,
"grad_norm": 18.891161628854636,
"learning_rate": 1.4208694968426594e-06,
"loss": 1.0689260959625244,
"step": 2958
},
{
"epoch": 2.3372827804107423,
"grad_norm": 11.717390658258914,
"learning_rate": 1.4176619586593343e-06,
"loss": 0.5372721552848816,
"step": 2959
},
{
"epoch": 2.338072669826224,
"grad_norm": 12.175691953414255,
"learning_rate": 1.4144574468469836e-06,
"loss": 1.1094560623168945,
"step": 2960
},
{
"epoch": 2.338862559241706,
"grad_norm": 10.010738282134813,
"learning_rate": 1.4112559641127882e-06,
"loss": 0.3446424603462219,
"step": 2961
},
{
"epoch": 2.339652448657188,
"grad_norm": 13.582629438685526,
"learning_rate": 1.4080575131613727e-06,
"loss": 0.3784876763820648,
"step": 2962
},
{
"epoch": 2.3404423380726698,
"grad_norm": 12.572883408640878,
"learning_rate": 1.4048620966947979e-06,
"loss": 0.206849604845047,
"step": 2963
},
{
"epoch": 2.3412322274881516,
"grad_norm": 11.77936634392298,
"learning_rate": 1.4016697174125627e-06,
"loss": 0.41492050886154175,
"step": 2964
},
{
"epoch": 2.3420221169036335,
"grad_norm": 13.508111308050823,
"learning_rate": 1.3984803780115997e-06,
"loss": 0.5502463579177856,
"step": 2965
},
{
"epoch": 2.3428120063191153,
"grad_norm": 9.14917705724563,
"learning_rate": 1.3952940811862715e-06,
"loss": 0.31409573554992676,
"step": 2966
},
{
"epoch": 2.343601895734597,
"grad_norm": 12.299355989140265,
"learning_rate": 1.3921108296283765e-06,
"loss": 0.562318742275238,
"step": 2967
},
{
"epoch": 2.344391785150079,
"grad_norm": 10.934243215627218,
"learning_rate": 1.3889306260271318e-06,
"loss": 0.4536178708076477,
"step": 2968
},
{
"epoch": 2.345181674565561,
"grad_norm": 8.937500050751812,
"learning_rate": 1.3857534730691857e-06,
"loss": 0.34480780363082886,
"step": 2969
},
{
"epoch": 2.345971563981043,
"grad_norm": 14.281522624319098,
"learning_rate": 1.3825793734386074e-06,
"loss": 1.0019593238830566,
"step": 2970
},
{
"epoch": 2.3467614533965246,
"grad_norm": 12.232172084092497,
"learning_rate": 1.3794083298168848e-06,
"loss": 0.8148961663246155,
"step": 2971
},
{
"epoch": 2.3475513428120065,
"grad_norm": 14.943915817149255,
"learning_rate": 1.376240344882931e-06,
"loss": 0.294752836227417,
"step": 2972
},
{
"epoch": 2.3483412322274884,
"grad_norm": 10.941172869384477,
"learning_rate": 1.3730754213130648e-06,
"loss": 0.793616533279419,
"step": 2973
},
{
"epoch": 2.34913112164297,
"grad_norm": 19.66512269635939,
"learning_rate": 1.3699135617810266e-06,
"loss": 0.858728289604187,
"step": 2974
},
{
"epoch": 2.3499210110584516,
"grad_norm": 12.311189719186588,
"learning_rate": 1.3667547689579636e-06,
"loss": 0.5237981677055359,
"step": 2975
},
{
"epoch": 2.3507109004739335,
"grad_norm": 27.368898959383774,
"learning_rate": 1.3635990455124371e-06,
"loss": 0.45691031217575073,
"step": 2976
},
{
"epoch": 2.3515007898894154,
"grad_norm": 12.118450870594108,
"learning_rate": 1.3604463941104119e-06,
"loss": 0.3164505660533905,
"step": 2977
},
{
"epoch": 2.352290679304897,
"grad_norm": 10.789370739328524,
"learning_rate": 1.3572968174152595e-06,
"loss": 0.6960369348526001,
"step": 2978
},
{
"epoch": 2.353080568720379,
"grad_norm": 8.417646546087006,
"learning_rate": 1.3541503180877497e-06,
"loss": 0.45459938049316406,
"step": 2979
},
{
"epoch": 2.353870458135861,
"grad_norm": 10.279601142928406,
"learning_rate": 1.3510068987860554e-06,
"loss": 0.38531941175460815,
"step": 2980
},
{
"epoch": 2.354660347551343,
"grad_norm": 10.475719668500435,
"learning_rate": 1.3478665621657505e-06,
"loss": 0.3190561532974243,
"step": 2981
},
{
"epoch": 2.3554502369668247,
"grad_norm": 12.300957016080215,
"learning_rate": 1.3447293108798005e-06,
"loss": 0.6847068667411804,
"step": 2982
},
{
"epoch": 2.3562401263823065,
"grad_norm": 18.557938231221407,
"learning_rate": 1.3415951475785666e-06,
"loss": 0.6536878347396851,
"step": 2983
},
{
"epoch": 2.3570300157977884,
"grad_norm": 9.51452055614094,
"learning_rate": 1.338464074909796e-06,
"loss": 1.2326545715332031,
"step": 2984
},
{
"epoch": 2.3578199052132702,
"grad_norm": 7.117469162686578,
"learning_rate": 1.3353360955186346e-06,
"loss": 0.2746868431568146,
"step": 2985
},
{
"epoch": 2.358609794628752,
"grad_norm": 8.473831273234493,
"learning_rate": 1.3322112120476066e-06,
"loss": 0.7424836754798889,
"step": 2986
},
{
"epoch": 2.359399684044234,
"grad_norm": 8.174865295399881,
"learning_rate": 1.3290894271366251e-06,
"loss": 0.26962894201278687,
"step": 2987
},
{
"epoch": 2.360189573459716,
"grad_norm": 9.674308066012069,
"learning_rate": 1.3259707434229834e-06,
"loss": 0.24160385131835938,
"step": 2988
},
{
"epoch": 2.3609794628751977,
"grad_norm": 15.484285096363958,
"learning_rate": 1.3228551635413567e-06,
"loss": 0.2914643883705139,
"step": 2989
},
{
"epoch": 2.361769352290679,
"grad_norm": 6.8344064852954425,
"learning_rate": 1.3197426901237965e-06,
"loss": 0.6412686109542847,
"step": 2990
},
{
"epoch": 2.362559241706161,
"grad_norm": 10.510405239917262,
"learning_rate": 1.3166333257997305e-06,
"loss": 0.23676377534866333,
"step": 2991
},
{
"epoch": 2.363349131121643,
"grad_norm": 18.178644105487813,
"learning_rate": 1.3135270731959599e-06,
"loss": 0.6876839995384216,
"step": 2992
},
{
"epoch": 2.3641390205371247,
"grad_norm": 14.269241469381564,
"learning_rate": 1.3104239349366577e-06,
"loss": 0.4241482615470886,
"step": 2993
},
{
"epoch": 2.3649289099526065,
"grad_norm": 12.211602110609782,
"learning_rate": 1.3073239136433651e-06,
"loss": 0.5298961997032166,
"step": 2994
},
{
"epoch": 2.3657187993680884,
"grad_norm": 8.26732985074683,
"learning_rate": 1.3042270119349903e-06,
"loss": 0.5399200320243835,
"step": 2995
},
{
"epoch": 2.3665086887835702,
"grad_norm": 12.593489559566338,
"learning_rate": 1.3011332324278054e-06,
"loss": 0.28332608938217163,
"step": 2996
},
{
"epoch": 2.367298578199052,
"grad_norm": 13.460677136621092,
"learning_rate": 1.2980425777354466e-06,
"loss": 0.8239960670471191,
"step": 2997
},
{
"epoch": 2.368088467614534,
"grad_norm": 11.353725107540342,
"learning_rate": 1.2949550504689084e-06,
"loss": 0.3334651589393616,
"step": 2998
},
{
"epoch": 2.368878357030016,
"grad_norm": 10.254492835832023,
"learning_rate": 1.2918706532365427e-06,
"loss": 0.3909056782722473,
"step": 2999
},
{
"epoch": 2.3696682464454977,
"grad_norm": 14.050213628495184,
"learning_rate": 1.2887893886440634e-06,
"loss": 0.42461708188056946,
"step": 3000
},
{
"epoch": 2.3704581358609795,
"grad_norm": 9.514180124670807,
"learning_rate": 1.2857112592945277e-06,
"loss": 0.3221348822116852,
"step": 3001
},
{
"epoch": 2.3712480252764614,
"grad_norm": 13.534122595465016,
"learning_rate": 1.2826362677883509e-06,
"loss": 0.31083306670188904,
"step": 3002
},
{
"epoch": 2.3720379146919433,
"grad_norm": 10.83312836661239,
"learning_rate": 1.279564416723295e-06,
"loss": 0.6842789649963379,
"step": 3003
},
{
"epoch": 2.372827804107425,
"grad_norm": 13.613162216619145,
"learning_rate": 1.2764957086944729e-06,
"loss": 0.2826170325279236,
"step": 3004
},
{
"epoch": 2.373617693522907,
"grad_norm": 7.054250032981525,
"learning_rate": 1.2734301462943393e-06,
"loss": 0.3503812253475189,
"step": 3005
},
{
"epoch": 2.374407582938389,
"grad_norm": 23.530290090188814,
"learning_rate": 1.2703677321126878e-06,
"loss": 0.9787487983703613,
"step": 3006
},
{
"epoch": 2.3751974723538707,
"grad_norm": 9.116900547516696,
"learning_rate": 1.2673084687366577e-06,
"loss": 0.5961554050445557,
"step": 3007
},
{
"epoch": 2.375987361769352,
"grad_norm": 12.370457479829778,
"learning_rate": 1.264252358750724e-06,
"loss": 0.4279392957687378,
"step": 3008
},
{
"epoch": 2.376777251184834,
"grad_norm": 16.664525922372817,
"learning_rate": 1.2611994047367004e-06,
"loss": 2.003368854522705,
"step": 3009
},
{
"epoch": 2.377567140600316,
"grad_norm": 9.89227361650676,
"learning_rate": 1.2581496092737315e-06,
"loss": 0.36015745997428894,
"step": 3010
},
{
"epoch": 2.3783570300157977,
"grad_norm": 7.456303625403205,
"learning_rate": 1.2551029749382966e-06,
"loss": 0.38664817810058594,
"step": 3011
},
{
"epoch": 2.3791469194312795,
"grad_norm": 10.16486281921813,
"learning_rate": 1.2520595043041967e-06,
"loss": 0.1998920887708664,
"step": 3012
},
{
"epoch": 2.3799368088467614,
"grad_norm": 12.477795753447527,
"learning_rate": 1.249019199942571e-06,
"loss": 0.5245856046676636,
"step": 3013
},
{
"epoch": 2.3807266982622433,
"grad_norm": 15.497688569467634,
"learning_rate": 1.2459820644218772e-06,
"loss": 0.5648780465126038,
"step": 3014
},
{
"epoch": 2.381516587677725,
"grad_norm": 9.766778610490318,
"learning_rate": 1.242948100307898e-06,
"loss": 0.3348008990287781,
"step": 3015
},
{
"epoch": 2.382306477093207,
"grad_norm": 9.459152445457095,
"learning_rate": 1.2399173101637362e-06,
"loss": 0.28159070014953613,
"step": 3016
},
{
"epoch": 2.383096366508689,
"grad_norm": 10.550046979381259,
"learning_rate": 1.23688969654981e-06,
"loss": 0.5117573738098145,
"step": 3017
},
{
"epoch": 2.3838862559241707,
"grad_norm": 15.689874055565582,
"learning_rate": 1.2338652620238617e-06,
"loss": 0.40390482544898987,
"step": 3018
},
{
"epoch": 2.3846761453396526,
"grad_norm": 13.629010440932046,
"learning_rate": 1.2308440091409418e-06,
"loss": 0.255649209022522,
"step": 3019
},
{
"epoch": 2.3854660347551344,
"grad_norm": 12.962233890538123,
"learning_rate": 1.2278259404534148e-06,
"loss": 0.7356714010238647,
"step": 3020
},
{
"epoch": 2.3862559241706163,
"grad_norm": 12.685586645489847,
"learning_rate": 1.2248110585109546e-06,
"loss": 0.30903008580207825,
"step": 3021
},
{
"epoch": 2.387045813586098,
"grad_norm": 9.857261027285547,
"learning_rate": 1.2217993658605442e-06,
"loss": 0.19482699036598206,
"step": 3022
},
{
"epoch": 2.3878357030015795,
"grad_norm": 10.203386869299976,
"learning_rate": 1.2187908650464713e-06,
"loss": 0.777467668056488,
"step": 3023
},
{
"epoch": 2.3886255924170614,
"grad_norm": 13.960859010293253,
"learning_rate": 1.2157855586103268e-06,
"loss": 0.42737501859664917,
"step": 3024
},
{
"epoch": 2.3894154818325433,
"grad_norm": 9.02890660772045,
"learning_rate": 1.212783449091004e-06,
"loss": 0.19785018265247345,
"step": 3025
},
{
"epoch": 2.390205371248025,
"grad_norm": 16.20027560463624,
"learning_rate": 1.2097845390246944e-06,
"loss": 0.4012932777404785,
"step": 3026
},
{
"epoch": 2.390995260663507,
"grad_norm": 13.935718162104166,
"learning_rate": 1.2067888309448872e-06,
"loss": 1.0133466720581055,
"step": 3027
},
{
"epoch": 2.391785150078989,
"grad_norm": 17.86275453891239,
"learning_rate": 1.2037963273823667e-06,
"loss": 0.7729724049568176,
"step": 3028
},
{
"epoch": 2.3925750394944707,
"grad_norm": 19.729430525861485,
"learning_rate": 1.2008070308652097e-06,
"loss": 0.6262521147727966,
"step": 3029
},
{
"epoch": 2.3933649289099526,
"grad_norm": 9.043064178522485,
"learning_rate": 1.1978209439187843e-06,
"loss": 0.33055561780929565,
"step": 3030
},
{
"epoch": 2.3941548183254344,
"grad_norm": 10.08632808389374,
"learning_rate": 1.194838069065744e-06,
"loss": 0.3754257559776306,
"step": 3031
},
{
"epoch": 2.3949447077409163,
"grad_norm": 16.377512511153483,
"learning_rate": 1.191858408826036e-06,
"loss": 0.7418103218078613,
"step": 3032
},
{
"epoch": 2.395734597156398,
"grad_norm": 13.891209536245656,
"learning_rate": 1.1888819657168832e-06,
"loss": 0.6630780100822449,
"step": 3033
},
{
"epoch": 2.39652448657188,
"grad_norm": 9.70934891051542,
"learning_rate": 1.185908742252796e-06,
"loss": 0.6031774878501892,
"step": 3034
},
{
"epoch": 2.397314375987362,
"grad_norm": 7.781623949513264,
"learning_rate": 1.1829387409455628e-06,
"loss": 0.3950078785419464,
"step": 3035
},
{
"epoch": 2.3981042654028437,
"grad_norm": 14.63680014242443,
"learning_rate": 1.1799719643042494e-06,
"loss": 1.1075937747955322,
"step": 3036
},
{
"epoch": 2.3988941548183256,
"grad_norm": 9.35495088180136,
"learning_rate": 1.1770084148352013e-06,
"loss": 0.5169080495834351,
"step": 3037
},
{
"epoch": 2.3996840442338074,
"grad_norm": 12.0333389179042,
"learning_rate": 1.1740480950420346e-06,
"loss": 0.6824804544448853,
"step": 3038
},
{
"epoch": 2.4004739336492893,
"grad_norm": 10.918655037249678,
"learning_rate": 1.1710910074256353e-06,
"loss": 0.21143901348114014,
"step": 3039
},
{
"epoch": 2.401263823064771,
"grad_norm": 12.024208404396846,
"learning_rate": 1.1681371544841596e-06,
"loss": 0.22546377778053284,
"step": 3040
},
{
"epoch": 2.402053712480253,
"grad_norm": 7.956119102031276,
"learning_rate": 1.165186538713035e-06,
"loss": 0.4426957070827484,
"step": 3041
},
{
"epoch": 2.4028436018957344,
"grad_norm": 14.526088548416308,
"learning_rate": 1.1622391626049512e-06,
"loss": 0.46725398302078247,
"step": 3042
},
{
"epoch": 2.4036334913112163,
"grad_norm": 15.015374102906868,
"learning_rate": 1.1592950286498617e-06,
"loss": 0.3960593044757843,
"step": 3043
},
{
"epoch": 2.404423380726698,
"grad_norm": 8.595894150813733,
"learning_rate": 1.156354139334978e-06,
"loss": 0.34023603796958923,
"step": 3044
},
{
"epoch": 2.40521327014218,
"grad_norm": 10.668250389515402,
"learning_rate": 1.153416497144773e-06,
"loss": 0.29296159744262695,
"step": 3045
},
{
"epoch": 2.406003159557662,
"grad_norm": 12.422225116517867,
"learning_rate": 1.1504821045609793e-06,
"loss": 0.3671707510948181,
"step": 3046
},
{
"epoch": 2.4067930489731437,
"grad_norm": 10.146132230382701,
"learning_rate": 1.1475509640625803e-06,
"loss": 0.3341038227081299,
"step": 3047
},
{
"epoch": 2.4075829383886256,
"grad_norm": 15.595249640220638,
"learning_rate": 1.1446230781258126e-06,
"loss": 0.4406832456588745,
"step": 3048
},
{
"epoch": 2.4083728278041074,
"grad_norm": 15.105385899136605,
"learning_rate": 1.1416984492241651e-06,
"loss": 0.3876189589500427,
"step": 3049
},
{
"epoch": 2.4091627172195893,
"grad_norm": 12.938838417300977,
"learning_rate": 1.138777079828372e-06,
"loss": 0.3145609498023987,
"step": 3050
},
{
"epoch": 2.409952606635071,
"grad_norm": 8.358585121545957,
"learning_rate": 1.1358589724064172e-06,
"loss": 0.3923751711845398,
"step": 3051
},
{
"epoch": 2.410742496050553,
"grad_norm": 12.197191053772348,
"learning_rate": 1.1329441294235271e-06,
"loss": 0.4876922369003296,
"step": 3052
},
{
"epoch": 2.411532385466035,
"grad_norm": 22.74826926426158,
"learning_rate": 1.1300325533421708e-06,
"loss": 1.1759089231491089,
"step": 3053
},
{
"epoch": 2.4123222748815167,
"grad_norm": 9.507426051564869,
"learning_rate": 1.1271242466220566e-06,
"loss": 0.7912863492965698,
"step": 3054
},
{
"epoch": 2.4131121642969986,
"grad_norm": 28.164313247406366,
"learning_rate": 1.1242192117201329e-06,
"loss": 2.359377384185791,
"step": 3055
},
{
"epoch": 2.41390205371248,
"grad_norm": 6.939852525178187,
"learning_rate": 1.1213174510905818e-06,
"loss": 0.1508610099554062,
"step": 3056
},
{
"epoch": 2.414691943127962,
"grad_norm": 8.079273130567692,
"learning_rate": 1.1184189671848205e-06,
"loss": 0.26179447770118713,
"step": 3057
},
{
"epoch": 2.4154818325434437,
"grad_norm": 8.63717491884411,
"learning_rate": 1.1155237624514975e-06,
"loss": 0.3997868001461029,
"step": 3058
},
{
"epoch": 2.4162717219589256,
"grad_norm": 12.57415659403395,
"learning_rate": 1.1126318393364905e-06,
"loss": 0.31383663415908813,
"step": 3059
},
{
"epoch": 2.4170616113744074,
"grad_norm": 10.339245467585227,
"learning_rate": 1.109743200282909e-06,
"loss": 0.3091076612472534,
"step": 3060
},
{
"epoch": 2.4178515007898893,
"grad_norm": 9.227956180667832,
"learning_rate": 1.106857847731081e-06,
"loss": 0.28240686655044556,
"step": 3061
},
{
"epoch": 2.418641390205371,
"grad_norm": 14.195925477576337,
"learning_rate": 1.1039757841185638e-06,
"loss": 0.58428555727005,
"step": 3062
},
{
"epoch": 2.419431279620853,
"grad_norm": 12.117805874205489,
"learning_rate": 1.1010970118801335e-06,
"loss": 0.36473608016967773,
"step": 3063
},
{
"epoch": 2.420221169036335,
"grad_norm": 8.135434340440062,
"learning_rate": 1.0982215334477852e-06,
"loss": 0.5204439163208008,
"step": 3064
},
{
"epoch": 2.4210110584518167,
"grad_norm": 11.478951662361853,
"learning_rate": 1.0953493512507369e-06,
"loss": 0.7073631882667542,
"step": 3065
},
{
"epoch": 2.4218009478672986,
"grad_norm": 8.874708226579822,
"learning_rate": 1.0924804677154132e-06,
"loss": 0.3390922546386719,
"step": 3066
},
{
"epoch": 2.4225908372827805,
"grad_norm": 8.771829165557099,
"learning_rate": 1.0896148852654576e-06,
"loss": 0.28562629222869873,
"step": 3067
},
{
"epoch": 2.4233807266982623,
"grad_norm": 12.196152581610507,
"learning_rate": 1.0867526063217225e-06,
"loss": 0.7520745396614075,
"step": 3068
},
{
"epoch": 2.424170616113744,
"grad_norm": 15.941013046283597,
"learning_rate": 1.0838936333022732e-06,
"loss": 0.7924416661262512,
"step": 3069
},
{
"epoch": 2.424960505529226,
"grad_norm": 16.208712282712163,
"learning_rate": 1.0810379686223782e-06,
"loss": 0.4997054934501648,
"step": 3070
},
{
"epoch": 2.425750394944708,
"grad_norm": 12.626153196177519,
"learning_rate": 1.0781856146945135e-06,
"loss": 0.31034407019615173,
"step": 3071
},
{
"epoch": 2.4265402843601898,
"grad_norm": 13.13529853421417,
"learning_rate": 1.075336573928355e-06,
"loss": 1.2663923501968384,
"step": 3072
},
{
"epoch": 2.4273301737756716,
"grad_norm": 11.619655409461624,
"learning_rate": 1.0724908487307812e-06,
"loss": 0.4666603207588196,
"step": 3073
},
{
"epoch": 2.4281200631911535,
"grad_norm": 11.29095734565805,
"learning_rate": 1.0696484415058732e-06,
"loss": 0.6091010570526123,
"step": 3074
},
{
"epoch": 2.428909952606635,
"grad_norm": 12.496992069841568,
"learning_rate": 1.066809354654904e-06,
"loss": 0.3018745183944702,
"step": 3075
},
{
"epoch": 2.4296998420221168,
"grad_norm": 10.57772764970064,
"learning_rate": 1.063973590576346e-06,
"loss": 0.7717863917350769,
"step": 3076
},
{
"epoch": 2.4304897314375986,
"grad_norm": 14.741370815856945,
"learning_rate": 1.0611411516658566e-06,
"loss": 0.47512930631637573,
"step": 3077
},
{
"epoch": 2.4312796208530805,
"grad_norm": 14.21167289494127,
"learning_rate": 1.0583120403162943e-06,
"loss": 0.6081647872924805,
"step": 3078
},
{
"epoch": 2.4320695102685623,
"grad_norm": 9.193770485318598,
"learning_rate": 1.0554862589177007e-06,
"loss": 0.34148019552230835,
"step": 3079
},
{
"epoch": 2.432859399684044,
"grad_norm": 9.062778818641565,
"learning_rate": 1.0526638098573045e-06,
"loss": 0.3882153630256653,
"step": 3080
},
{
"epoch": 2.433649289099526,
"grad_norm": 18.929160213629444,
"learning_rate": 1.0498446955195202e-06,
"loss": 0.3846644163131714,
"step": 3081
},
{
"epoch": 2.434439178515008,
"grad_norm": 8.998677826427556,
"learning_rate": 1.047028918285945e-06,
"loss": 0.3000609576702118,
"step": 3082
},
{
"epoch": 2.4352290679304898,
"grad_norm": 10.101377476509365,
"learning_rate": 1.0442164805353565e-06,
"loss": 0.3316442370414734,
"step": 3083
},
{
"epoch": 2.4360189573459716,
"grad_norm": 15.80218064139609,
"learning_rate": 1.0414073846437122e-06,
"loss": 0.15290002524852753,
"step": 3084
},
{
"epoch": 2.4368088467614535,
"grad_norm": 9.603248142881998,
"learning_rate": 1.0386016329841448e-06,
"loss": 0.30186790227890015,
"step": 3085
},
{
"epoch": 2.4375987361769353,
"grad_norm": 14.04175823933125,
"learning_rate": 1.0357992279269623e-06,
"loss": 0.4339219033718109,
"step": 3086
},
{
"epoch": 2.438388625592417,
"grad_norm": 18.009198275639264,
"learning_rate": 1.033000171839646e-06,
"loss": 1.9742562770843506,
"step": 3087
},
{
"epoch": 2.439178515007899,
"grad_norm": 22.24916433433492,
"learning_rate": 1.0302044670868483e-06,
"loss": 0.504414439201355,
"step": 3088
},
{
"epoch": 2.4399684044233805,
"grad_norm": 10.361343950481873,
"learning_rate": 1.027412116030389e-06,
"loss": 1.0488673448562622,
"step": 3089
},
{
"epoch": 2.4407582938388623,
"grad_norm": 10.797759458558541,
"learning_rate": 1.0246231210292557e-06,
"loss": 0.3521267771720886,
"step": 3090
},
{
"epoch": 2.441548183254344,
"grad_norm": 12.14235046863475,
"learning_rate": 1.0218374844396011e-06,
"loss": 0.3208717703819275,
"step": 3091
},
{
"epoch": 2.442338072669826,
"grad_norm": 14.3011282222263,
"learning_rate": 1.0190552086147393e-06,
"loss": 0.7246259450912476,
"step": 3092
},
{
"epoch": 2.443127962085308,
"grad_norm": 14.472676250973395,
"learning_rate": 1.0162762959051464e-06,
"loss": 0.3144262433052063,
"step": 3093
},
{
"epoch": 2.4439178515007898,
"grad_norm": 8.531330890483545,
"learning_rate": 1.013500748658457e-06,
"loss": 0.4461020231246948,
"step": 3094
},
{
"epoch": 2.4447077409162716,
"grad_norm": 14.749179854381705,
"learning_rate": 1.010728569219463e-06,
"loss": 0.37278565764427185,
"step": 3095
},
{
"epoch": 2.4454976303317535,
"grad_norm": 9.553911314657903,
"learning_rate": 1.0079597599301088e-06,
"loss": 0.5785685777664185,
"step": 3096
},
{
"epoch": 2.4462875197472354,
"grad_norm": 11.059506181609464,
"learning_rate": 1.0051943231294965e-06,
"loss": 1.0506287813186646,
"step": 3097
},
{
"epoch": 2.447077409162717,
"grad_norm": 15.548120320030591,
"learning_rate": 1.0024322611538762e-06,
"loss": 0.4273751974105835,
"step": 3098
},
{
"epoch": 2.447867298578199,
"grad_norm": 9.759199951892318,
"learning_rate": 9.996735763366444e-07,
"loss": 0.37685832381248474,
"step": 3099
},
{
"epoch": 2.448657187993681,
"grad_norm": 11.701020741919134,
"learning_rate": 9.96918271008348e-07,
"loss": 0.4007868766784668,
"step": 3100
},
{
"epoch": 2.449447077409163,
"grad_norm": 11.350172953468489,
"learning_rate": 9.941663474966772e-07,
"loss": 0.5525588989257812,
"step": 3101
},
{
"epoch": 2.4502369668246446,
"grad_norm": 15.30001249449009,
"learning_rate": 9.914178081264685e-07,
"loss": 0.6951796412467957,
"step": 3102
},
{
"epoch": 2.4510268562401265,
"grad_norm": 7.995616395973255,
"learning_rate": 9.886726552196974e-07,
"loss": 0.47238144278526306,
"step": 3103
},
{
"epoch": 2.4518167456556084,
"grad_norm": 10.984193552146008,
"learning_rate": 9.859308910954745e-07,
"loss": 0.32252442836761475,
"step": 3104
},
{
"epoch": 2.4526066350710902,
"grad_norm": 8.04105675908032,
"learning_rate": 9.831925180700525e-07,
"loss": 0.21019114553928375,
"step": 3105
},
{
"epoch": 2.453396524486572,
"grad_norm": 27.322091310870565,
"learning_rate": 9.804575384568194e-07,
"loss": 1.3946754932403564,
"step": 3106
},
{
"epoch": 2.454186413902054,
"grad_norm": 8.839342829542206,
"learning_rate": 9.777259545662944e-07,
"loss": 0.5038160085678101,
"step": 3107
},
{
"epoch": 2.4549763033175354,
"grad_norm": 21.324783217152557,
"learning_rate": 9.749977687061279e-07,
"loss": 0.518517017364502,
"step": 3108
},
{
"epoch": 2.455766192733017,
"grad_norm": 8.481164678205802,
"learning_rate": 9.722729831811007e-07,
"loss": 0.8147182464599609,
"step": 3109
},
{
"epoch": 2.456556082148499,
"grad_norm": 6.780024339611966,
"learning_rate": 9.695516002931204e-07,
"loss": 0.3741002380847931,
"step": 3110
},
{
"epoch": 2.457345971563981,
"grad_norm": 11.801344448984926,
"learning_rate": 9.668336223412207e-07,
"loss": 0.3355671763420105,
"step": 3111
},
{
"epoch": 2.458135860979463,
"grad_norm": 18.426072152940076,
"learning_rate": 9.641190516215583e-07,
"loss": 0.5405136346817017,
"step": 3112
},
{
"epoch": 2.4589257503949447,
"grad_norm": 8.59802906534028,
"learning_rate": 9.614078904274105e-07,
"loss": 0.4377972483634949,
"step": 3113
},
{
"epoch": 2.4597156398104265,
"grad_norm": 14.687076152913516,
"learning_rate": 9.587001410491764e-07,
"loss": 0.46500271558761597,
"step": 3114
},
{
"epoch": 2.4605055292259084,
"grad_norm": 11.107539453005398,
"learning_rate": 9.559958057743712e-07,
"loss": 0.5390537977218628,
"step": 3115
},
{
"epoch": 2.4612954186413902,
"grad_norm": 16.500533704000823,
"learning_rate": 9.532948868876258e-07,
"loss": 0.21158595383167267,
"step": 3116
},
{
"epoch": 2.462085308056872,
"grad_norm": 13.41292359415877,
"learning_rate": 9.505973866706858e-07,
"loss": 0.5755499601364136,
"step": 3117
},
{
"epoch": 2.462875197472354,
"grad_norm": 21.715368481608397,
"learning_rate": 9.47903307402408e-07,
"loss": 1.7295933961868286,
"step": 3118
},
{
"epoch": 2.463665086887836,
"grad_norm": 16.12344079053901,
"learning_rate": 9.452126513587601e-07,
"loss": 0.6120498180389404,
"step": 3119
},
{
"epoch": 2.4644549763033177,
"grad_norm": 6.872808233122644,
"learning_rate": 9.42525420812816e-07,
"loss": 0.20094534754753113,
"step": 3120
},
{
"epoch": 2.4652448657187995,
"grad_norm": 13.205846064827124,
"learning_rate": 9.398416180347581e-07,
"loss": 0.3877865672111511,
"step": 3121
},
{
"epoch": 2.4660347551342814,
"grad_norm": 32.63764306543489,
"learning_rate": 9.371612452918711e-07,
"loss": 1.0538610219955444,
"step": 3122
},
{
"epoch": 2.466824644549763,
"grad_norm": 10.156083117031175,
"learning_rate": 9.34484304848543e-07,
"loss": 0.6839322447776794,
"step": 3123
},
{
"epoch": 2.4676145339652447,
"grad_norm": 10.388055079887849,
"learning_rate": 9.318107989662611e-07,
"loss": 0.19889391958713531,
"step": 3124
},
{
"epoch": 2.4684044233807265,
"grad_norm": 7.682004289712716,
"learning_rate": 9.291407299036148e-07,
"loss": 0.29008305072784424,
"step": 3125
},
{
"epoch": 2.4691943127962084,
"grad_norm": 8.701364307694792,
"learning_rate": 9.264740999162836e-07,
"loss": 0.28695201873779297,
"step": 3126
},
{
"epoch": 2.4699842022116902,
"grad_norm": 17.465630884728238,
"learning_rate": 9.238109112570475e-07,
"loss": 0.29782527685165405,
"step": 3127
},
{
"epoch": 2.470774091627172,
"grad_norm": 8.305085250067776,
"learning_rate": 9.21151166175776e-07,
"loss": 0.21786969900131226,
"step": 3128
},
{
"epoch": 2.471563981042654,
"grad_norm": 9.419126772200544,
"learning_rate": 9.184948669194299e-07,
"loss": 0.6029007434844971,
"step": 3129
},
{
"epoch": 2.472353870458136,
"grad_norm": 20.242949144706834,
"learning_rate": 9.158420157320613e-07,
"loss": 0.539847731590271,
"step": 3130
},
{
"epoch": 2.4731437598736177,
"grad_norm": 14.865152633768327,
"learning_rate": 9.131926148548087e-07,
"loss": 0.6652476787567139,
"step": 3131
},
{
"epoch": 2.4739336492890995,
"grad_norm": 10.195973961976483,
"learning_rate": 9.105466665258916e-07,
"loss": 0.6767930388450623,
"step": 3132
},
{
"epoch": 2.4747235387045814,
"grad_norm": 12.913760166214779,
"learning_rate": 9.07904172980616e-07,
"loss": 0.30187326669692993,
"step": 3133
},
{
"epoch": 2.4755134281200633,
"grad_norm": 7.210724493670894,
"learning_rate": 9.052651364513709e-07,
"loss": 0.2819286584854126,
"step": 3134
},
{
"epoch": 2.476303317535545,
"grad_norm": 9.694770018852635,
"learning_rate": 9.026295591676232e-07,
"loss": 1.3492515087127686,
"step": 3135
},
{
"epoch": 2.477093206951027,
"grad_norm": 10.663258311074385,
"learning_rate": 8.999974433559172e-07,
"loss": 0.6058721542358398,
"step": 3136
},
{
"epoch": 2.477883096366509,
"grad_norm": 8.609940369345201,
"learning_rate": 8.973687912398698e-07,
"loss": 0.3588424324989319,
"step": 3137
},
{
"epoch": 2.4786729857819907,
"grad_norm": 10.99741800368398,
"learning_rate": 8.947436050401792e-07,
"loss": 0.9377810955047607,
"step": 3138
},
{
"epoch": 2.4794628751974725,
"grad_norm": 13.297554259312957,
"learning_rate": 8.92121886974609e-07,
"loss": 0.3509410619735718,
"step": 3139
},
{
"epoch": 2.4802527646129544,
"grad_norm": 25.874105611022717,
"learning_rate": 8.895036392579965e-07,
"loss": 1.956540822982788,
"step": 3140
},
{
"epoch": 2.4810426540284363,
"grad_norm": 13.02959939002725,
"learning_rate": 8.868888641022449e-07,
"loss": 0.32808127999305725,
"step": 3141
},
{
"epoch": 2.4818325434439177,
"grad_norm": 22.382659848118195,
"learning_rate": 8.842775637163259e-07,
"loss": 1.4966964721679688,
"step": 3142
},
{
"epoch": 2.4826224328593995,
"grad_norm": 6.597640303742646,
"learning_rate": 8.816697403062736e-07,
"loss": 0.2421848326921463,
"step": 3143
},
{
"epoch": 2.4834123222748814,
"grad_norm": 10.171804451221856,
"learning_rate": 8.790653960751861e-07,
"loss": 0.3181907534599304,
"step": 3144
},
{
"epoch": 2.4842022116903633,
"grad_norm": 12.678509259303947,
"learning_rate": 8.764645332232225e-07,
"loss": 2.06329345703125,
"step": 3145
},
{
"epoch": 2.484992101105845,
"grad_norm": 8.062598004826272,
"learning_rate": 8.738671539476001e-07,
"loss": 0.40570273995399475,
"step": 3146
},
{
"epoch": 2.485781990521327,
"grad_norm": 8.067583181720805,
"learning_rate": 8.712732604425933e-07,
"loss": 0.2615022361278534,
"step": 3147
},
{
"epoch": 2.486571879936809,
"grad_norm": 11.514174208308086,
"learning_rate": 8.686828548995318e-07,
"loss": 0.3940156102180481,
"step": 3148
},
{
"epoch": 2.4873617693522907,
"grad_norm": 13.106691868280555,
"learning_rate": 8.660959395067991e-07,
"loss": 0.33123183250427246,
"step": 3149
},
{
"epoch": 2.4881516587677726,
"grad_norm": 6.521694530585903,
"learning_rate": 8.635125164498293e-07,
"loss": 0.1495874524116516,
"step": 3150
},
{
"epoch": 2.4889415481832544,
"grad_norm": 8.547476041571496,
"learning_rate": 8.609325879111069e-07,
"loss": 0.5456651449203491,
"step": 3151
},
{
"epoch": 2.4897314375987363,
"grad_norm": 10.933870809001716,
"learning_rate": 8.583561560701647e-07,
"loss": 0.3192444145679474,
"step": 3152
},
{
"epoch": 2.490521327014218,
"grad_norm": 28.363445895171516,
"learning_rate": 8.557832231035796e-07,
"loss": 0.5993216037750244,
"step": 3153
},
{
"epoch": 2.4913112164297,
"grad_norm": 10.933363522689742,
"learning_rate": 8.532137911849747e-07,
"loss": 0.3621593117713928,
"step": 3154
},
{
"epoch": 2.492101105845182,
"grad_norm": 7.744021564422991,
"learning_rate": 8.506478624850145e-07,
"loss": 0.20899435877799988,
"step": 3155
},
{
"epoch": 2.4928909952606633,
"grad_norm": 16.455158224717778,
"learning_rate": 8.480854391714039e-07,
"loss": 0.5033141374588013,
"step": 3156
},
{
"epoch": 2.493680884676145,
"grad_norm": 8.898300886169093,
"learning_rate": 8.45526523408885e-07,
"loss": 0.4480706751346588,
"step": 3157
},
{
"epoch": 2.494470774091627,
"grad_norm": 7.0747670856799045,
"learning_rate": 8.42971117359242e-07,
"loss": 0.21309760212898254,
"step": 3158
},
{
"epoch": 2.495260663507109,
"grad_norm": 9.196654852828132,
"learning_rate": 8.404192231812875e-07,
"loss": 0.3876749873161316,
"step": 3159
},
{
"epoch": 2.4960505529225907,
"grad_norm": 8.41438123486234,
"learning_rate": 8.378708430308702e-07,
"loss": 0.3482446074485779,
"step": 3160
},
{
"epoch": 2.4968404423380726,
"grad_norm": 9.952515369779197,
"learning_rate": 8.353259790608698e-07,
"loss": 0.19175337255001068,
"step": 3161
},
{
"epoch": 2.4976303317535544,
"grad_norm": 15.820391137731752,
"learning_rate": 8.32784633421197e-07,
"loss": 1.1132574081420898,
"step": 3162
},
{
"epoch": 2.4984202211690363,
"grad_norm": 9.122774774351079,
"learning_rate": 8.302468082587906e-07,
"loss": 0.39598995447158813,
"step": 3163
},
{
"epoch": 2.499210110584518,
"grad_norm": 10.71112514476097,
"learning_rate": 8.277125057176095e-07,
"loss": 0.37614136934280396,
"step": 3164
},
{
"epoch": 2.5,
"grad_norm": 15.433700062046594,
"learning_rate": 8.25181727938642e-07,
"loss": 0.4549494981765747,
"step": 3165
},
{
"epoch": 2.500789889415482,
"grad_norm": 6.775255209163935,
"learning_rate": 8.226544770598993e-07,
"loss": 0.38162803649902344,
"step": 3166
},
{
"epoch": 2.5015797788309637,
"grad_norm": 9.07190974562414,
"learning_rate": 8.201307552164106e-07,
"loss": 0.4735422730445862,
"step": 3167
},
{
"epoch": 2.5023696682464456,
"grad_norm": 9.713947704476643,
"learning_rate": 8.176105645402244e-07,
"loss": 0.3699314594268799,
"step": 3168
},
{
"epoch": 2.5031595576619274,
"grad_norm": 11.905314094509981,
"learning_rate": 8.150939071604069e-07,
"loss": 0.2857322692871094,
"step": 3169
},
{
"epoch": 2.5039494470774093,
"grad_norm": 11.738198518074968,
"learning_rate": 8.125807852030349e-07,
"loss": 0.8294199109077454,
"step": 3170
},
{
"epoch": 2.504739336492891,
"grad_norm": 23.694230717994056,
"learning_rate": 8.10071200791206e-07,
"loss": 0.8081961870193481,
"step": 3171
},
{
"epoch": 2.505529225908373,
"grad_norm": 11.356117033254058,
"learning_rate": 8.075651560450237e-07,
"loss": 0.45390763878822327,
"step": 3172
},
{
"epoch": 2.506319115323855,
"grad_norm": 9.254162743946639,
"learning_rate": 8.050626530816036e-07,
"loss": 0.39210453629493713,
"step": 3173
},
{
"epoch": 2.5071090047393367,
"grad_norm": 14.960564989579739,
"learning_rate": 8.025636940150677e-07,
"loss": 0.4236029386520386,
"step": 3174
},
{
"epoch": 2.5078988941548186,
"grad_norm": 9.28804391772842,
"learning_rate": 8.000682809565457e-07,
"loss": 0.38558968901634216,
"step": 3175
},
{
"epoch": 2.5086887835703,
"grad_norm": 9.760069575355349,
"learning_rate": 7.975764160141702e-07,
"loss": 0.197739839553833,
"step": 3176
},
{
"epoch": 2.509478672985782,
"grad_norm": 8.394775554288064,
"learning_rate": 7.950881012930766e-07,
"loss": 0.29299861192703247,
"step": 3177
},
{
"epoch": 2.5102685624012637,
"grad_norm": 13.432882997987198,
"learning_rate": 7.926033388954019e-07,
"loss": 0.6216456294059753,
"step": 3178
},
{
"epoch": 2.5110584518167456,
"grad_norm": 11.340453335100433,
"learning_rate": 7.901221309202817e-07,
"loss": 0.4832969307899475,
"step": 3179
},
{
"epoch": 2.5118483412322274,
"grad_norm": 12.980596557657627,
"learning_rate": 7.876444794638477e-07,
"loss": 0.3053157329559326,
"step": 3180
},
{
"epoch": 2.5126382306477093,
"grad_norm": 10.678528762002763,
"learning_rate": 7.851703866192295e-07,
"loss": 0.5949424505233765,
"step": 3181
},
{
"epoch": 2.513428120063191,
"grad_norm": 8.615764477300809,
"learning_rate": 7.826998544765473e-07,
"loss": 0.3822169005870819,
"step": 3182
},
{
"epoch": 2.514218009478673,
"grad_norm": 12.219849039480954,
"learning_rate": 7.802328851229163e-07,
"loss": 0.35568392276763916,
"step": 3183
},
{
"epoch": 2.515007898894155,
"grad_norm": 12.387025000660175,
"learning_rate": 7.77769480642439e-07,
"loss": 0.386673241853714,
"step": 3184
},
{
"epoch": 2.5157977883096367,
"grad_norm": 8.52886409021774,
"learning_rate": 7.753096431162083e-07,
"loss": 0.24550145864486694,
"step": 3185
},
{
"epoch": 2.5165876777251186,
"grad_norm": 8.816997119450642,
"learning_rate": 7.728533746223032e-07,
"loss": 0.3060890734195709,
"step": 3186
},
{
"epoch": 2.5173775671406005,
"grad_norm": 12.72993681147683,
"learning_rate": 7.70400677235787e-07,
"loss": 0.4496222734451294,
"step": 3187
},
{
"epoch": 2.518167456556082,
"grad_norm": 14.450644987050909,
"learning_rate": 7.679515530287068e-07,
"loss": 0.3027800917625427,
"step": 3188
},
{
"epoch": 2.5189573459715637,
"grad_norm": 6.8289260496500335,
"learning_rate": 7.655060040700895e-07,
"loss": 0.21853289008140564,
"step": 3189
},
{
"epoch": 2.5197472353870456,
"grad_norm": 16.18749221673546,
"learning_rate": 7.630640324259453e-07,
"loss": 0.3517822027206421,
"step": 3190
},
{
"epoch": 2.5205371248025275,
"grad_norm": 7.317050751605334,
"learning_rate": 7.606256401592599e-07,
"loss": 0.20418161153793335,
"step": 3191
},
{
"epoch": 2.5213270142180093,
"grad_norm": 11.575307062369431,
"learning_rate": 7.581908293299923e-07,
"loss": 0.29537758231163025,
"step": 3192
},
{
"epoch": 2.522116903633491,
"grad_norm": 13.330694266343132,
"learning_rate": 7.557596019950797e-07,
"loss": 0.24356764554977417,
"step": 3193
},
{
"epoch": 2.522906793048973,
"grad_norm": 12.770819836355157,
"learning_rate": 7.533319602084321e-07,
"loss": 0.9563419818878174,
"step": 3194
},
{
"epoch": 2.523696682464455,
"grad_norm": 9.323681305876523,
"learning_rate": 7.50907906020929e-07,
"loss": 0.4368267059326172,
"step": 3195
},
{
"epoch": 2.5244865718799367,
"grad_norm": 12.737056250718048,
"learning_rate": 7.484874414804206e-07,
"loss": 0.8397213816642761,
"step": 3196
},
{
"epoch": 2.5252764612954186,
"grad_norm": 11.13269574815902,
"learning_rate": 7.460705686317205e-07,
"loss": 0.6406710743904114,
"step": 3197
},
{
"epoch": 2.5260663507109005,
"grad_norm": 15.377479065191766,
"learning_rate": 7.43657289516611e-07,
"loss": 0.5515921115875244,
"step": 3198
},
{
"epoch": 2.5268562401263823,
"grad_norm": 10.942333674945784,
"learning_rate": 7.412476061738405e-07,
"loss": 0.47541213035583496,
"step": 3199
},
{
"epoch": 2.527646129541864,
"grad_norm": 11.44586983157694,
"learning_rate": 7.388415206391164e-07,
"loss": 0.2634442150592804,
"step": 3200
},
{
"epoch": 2.528436018957346,
"grad_norm": 12.935860176811287,
"learning_rate": 7.364390349451073e-07,
"loss": 1.6421760320663452,
"step": 3201
},
{
"epoch": 2.529225908372828,
"grad_norm": 6.618453794857769,
"learning_rate": 7.340401511214418e-07,
"loss": 0.18729032576084137,
"step": 3202
},
{
"epoch": 2.5300157977883098,
"grad_norm": 21.158575104978276,
"learning_rate": 7.316448711947038e-07,
"loss": 0.6561132073402405,
"step": 3203
},
{
"epoch": 2.5308056872037916,
"grad_norm": 17.41414326406974,
"learning_rate": 7.292531971884348e-07,
"loss": 1.311091423034668,
"step": 3204
},
{
"epoch": 2.5315955766192735,
"grad_norm": 9.054118102670634,
"learning_rate": 7.268651311231278e-07,
"loss": 0.37653642892837524,
"step": 3205
},
{
"epoch": 2.5323854660347553,
"grad_norm": 7.875373579401623,
"learning_rate": 7.244806750162298e-07,
"loss": 0.13674522936344147,
"step": 3206
},
{
"epoch": 2.533175355450237,
"grad_norm": 7.790372139492247,
"learning_rate": 7.22099830882137e-07,
"loss": 0.37409287691116333,
"step": 3207
},
{
"epoch": 2.533965244865719,
"grad_norm": 21.5175970885638,
"learning_rate": 7.197226007321939e-07,
"loss": 0.5270158052444458,
"step": 3208
},
{
"epoch": 2.5347551342812005,
"grad_norm": 9.240238834016377,
"learning_rate": 7.173489865746924e-07,
"loss": 0.57960045337677,
"step": 3209
},
{
"epoch": 2.5355450236966823,
"grad_norm": 17.71089132438801,
"learning_rate": 7.149789904148696e-07,
"loss": 0.9655189514160156,
"step": 3210
},
{
"epoch": 2.536334913112164,
"grad_norm": 7.9060126053659685,
"learning_rate": 7.126126142549067e-07,
"loss": 0.8784974217414856,
"step": 3211
},
{
"epoch": 2.537124802527646,
"grad_norm": 15.460935801679469,
"learning_rate": 7.102498600939256e-07,
"loss": 0.9071961641311646,
"step": 3212
},
{
"epoch": 2.537914691943128,
"grad_norm": 9.491362341174666,
"learning_rate": 7.078907299279886e-07,
"loss": 0.99891197681427,
"step": 3213
},
{
"epoch": 2.5387045813586098,
"grad_norm": 13.553645359898077,
"learning_rate": 7.05535225750097e-07,
"loss": 0.4606255292892456,
"step": 3214
},
{
"epoch": 2.5394944707740916,
"grad_norm": 11.434760827242846,
"learning_rate": 7.031833495501878e-07,
"loss": 0.38194912672042847,
"step": 3215
},
{
"epoch": 2.5402843601895735,
"grad_norm": 8.816989170036669,
"learning_rate": 7.008351033151345e-07,
"loss": 0.5898439884185791,
"step": 3216
},
{
"epoch": 2.5410742496050553,
"grad_norm": 8.941678431636664,
"learning_rate": 6.984904890287419e-07,
"loss": 0.6821322441101074,
"step": 3217
},
{
"epoch": 2.541864139020537,
"grad_norm": 11.879397851452923,
"learning_rate": 6.961495086717518e-07,
"loss": 0.30271491408348083,
"step": 3218
},
{
"epoch": 2.542654028436019,
"grad_norm": 11.904426225856726,
"learning_rate": 6.938121642218277e-07,
"loss": 0.8798356056213379,
"step": 3219
},
{
"epoch": 2.543443917851501,
"grad_norm": 9.833714180030546,
"learning_rate": 6.914784576535671e-07,
"loss": 0.4576849341392517,
"step": 3220
},
{
"epoch": 2.544233807266983,
"grad_norm": 7.60568754755486,
"learning_rate": 6.891483909384927e-07,
"loss": 0.39021506905555725,
"step": 3221
},
{
"epoch": 2.545023696682464,
"grad_norm": 13.385710625425459,
"learning_rate": 6.868219660450542e-07,
"loss": 0.6809737682342529,
"step": 3222
},
{
"epoch": 2.545813586097946,
"grad_norm": 14.296223483394565,
"learning_rate": 6.844991849386234e-07,
"loss": 0.354898601770401,
"step": 3223
},
{
"epoch": 2.546603475513428,
"grad_norm": 7.507430954335532,
"learning_rate": 6.821800495814906e-07,
"loss": 0.3235066831111908,
"step": 3224
},
{
"epoch": 2.5473933649289098,
"grad_norm": 8.831847194569248,
"learning_rate": 6.798645619328709e-07,
"loss": 0.6501250267028809,
"step": 3225
},
{
"epoch": 2.5481832543443916,
"grad_norm": 11.08888965436733,
"learning_rate": 6.775527239488939e-07,
"loss": 0.3581928312778473,
"step": 3226
},
{
"epoch": 2.5489731437598735,
"grad_norm": 14.993261176629105,
"learning_rate": 6.752445375826111e-07,
"loss": 0.47264599800109863,
"step": 3227
},
{
"epoch": 2.5497630331753554,
"grad_norm": 12.12552691044683,
"learning_rate": 6.729400047839834e-07,
"loss": 1.0418339967727661,
"step": 3228
},
{
"epoch": 2.550552922590837,
"grad_norm": 9.816976412231398,
"learning_rate": 6.706391274998908e-07,
"loss": 0.40765923261642456,
"step": 3229
},
{
"epoch": 2.551342812006319,
"grad_norm": 8.814268750339222,
"learning_rate": 6.683419076741166e-07,
"loss": 0.659870982170105,
"step": 3230
},
{
"epoch": 2.552132701421801,
"grad_norm": 12.387930895767527,
"learning_rate": 6.660483472473644e-07,
"loss": 0.6081492900848389,
"step": 3231
},
{
"epoch": 2.552922590837283,
"grad_norm": 17.021655475649457,
"learning_rate": 6.637584481572407e-07,
"loss": 0.8503941297531128,
"step": 3232
},
{
"epoch": 2.5537124802527646,
"grad_norm": 10.612469129748503,
"learning_rate": 6.614722123382583e-07,
"loss": 0.5053238868713379,
"step": 3233
},
{
"epoch": 2.5545023696682465,
"grad_norm": 11.199796923866582,
"learning_rate": 6.591896417218391e-07,
"loss": 0.5718584656715393,
"step": 3234
},
{
"epoch": 2.5552922590837284,
"grad_norm": 24.003473547596283,
"learning_rate": 6.569107382363027e-07,
"loss": 0.8553175330162048,
"step": 3235
},
{
"epoch": 2.5560821484992102,
"grad_norm": 8.724785188614117,
"learning_rate": 6.546355038068774e-07,
"loss": 0.2981413006782532,
"step": 3236
},
{
"epoch": 2.556872037914692,
"grad_norm": 8.108209067959447,
"learning_rate": 6.523639403556875e-07,
"loss": 0.27729976177215576,
"step": 3237
},
{
"epoch": 2.557661927330174,
"grad_norm": 7.579505693226836,
"learning_rate": 6.500960498017578e-07,
"loss": 0.3648611903190613,
"step": 3238
},
{
"epoch": 2.558451816745656,
"grad_norm": 13.670618984625548,
"learning_rate": 6.478318340610091e-07,
"loss": 0.4756515324115753,
"step": 3239
},
{
"epoch": 2.5592417061611377,
"grad_norm": 16.165515463439487,
"learning_rate": 6.45571295046259e-07,
"loss": 0.6007115840911865,
"step": 3240
},
{
"epoch": 2.5600315955766195,
"grad_norm": 9.184975673193794,
"learning_rate": 6.433144346672177e-07,
"loss": 0.22796334326267242,
"step": 3241
},
{
"epoch": 2.5608214849921014,
"grad_norm": 11.242675742774068,
"learning_rate": 6.410612548304884e-07,
"loss": 0.5558523535728455,
"step": 3242
},
{
"epoch": 2.561611374407583,
"grad_norm": 17.912335926244754,
"learning_rate": 6.388117574395652e-07,
"loss": 0.45684516429901123,
"step": 3243
},
{
"epoch": 2.5624012638230647,
"grad_norm": 6.42831978379293,
"learning_rate": 6.365659443948307e-07,
"loss": 0.37593698501586914,
"step": 3244
},
{
"epoch": 2.5631911532385465,
"grad_norm": 14.380203287837352,
"learning_rate": 6.343238175935551e-07,
"loss": 1.0076820850372314,
"step": 3245
},
{
"epoch": 2.5639810426540284,
"grad_norm": 11.649489711574187,
"learning_rate": 6.320853789298942e-07,
"loss": 0.6352476477622986,
"step": 3246
},
{
"epoch": 2.5647709320695102,
"grad_norm": 11.141609677532234,
"learning_rate": 6.298506302948886e-07,
"loss": 0.4481988549232483,
"step": 3247
},
{
"epoch": 2.565560821484992,
"grad_norm": 11.697367831752132,
"learning_rate": 6.276195735764617e-07,
"loss": 0.5129117965698242,
"step": 3248
},
{
"epoch": 2.566350710900474,
"grad_norm": 10.308816622041837,
"learning_rate": 6.253922106594162e-07,
"loss": 0.6752769351005554,
"step": 3249
},
{
"epoch": 2.567140600315956,
"grad_norm": 10.354709362911104,
"learning_rate": 6.231685434254375e-07,
"loss": 0.22525343298912048,
"step": 3250
},
{
"epoch": 2.5679304897314377,
"grad_norm": 11.05644990012399,
"learning_rate": 6.209485737530873e-07,
"loss": 0.35317540168762207,
"step": 3251
},
{
"epoch": 2.5687203791469195,
"grad_norm": 6.477124764043402,
"learning_rate": 6.187323035178012e-07,
"loss": 0.22543203830718994,
"step": 3252
},
{
"epoch": 2.5695102685624014,
"grad_norm": 14.30255153474572,
"learning_rate": 6.165197345918927e-07,
"loss": 0.46911317110061646,
"step": 3253
},
{
"epoch": 2.5703001579778832,
"grad_norm": 9.552175646905189,
"learning_rate": 6.14310868844546e-07,
"loss": 0.33642643690109253,
"step": 3254
},
{
"epoch": 2.5710900473933647,
"grad_norm": 6.838188468089331,
"learning_rate": 6.121057081418202e-07,
"loss": 0.33152109384536743,
"step": 3255
},
{
"epoch": 2.5718799368088465,
"grad_norm": 10.0723559433558,
"learning_rate": 6.099042543466427e-07,
"loss": 0.4825100898742676,
"step": 3256
},
{
"epoch": 2.5726698262243284,
"grad_norm": 9.82952412822834,
"learning_rate": 6.077065093188062e-07,
"loss": 0.3012405037879944,
"step": 3257
},
{
"epoch": 2.5734597156398102,
"grad_norm": 9.586040497432494,
"learning_rate": 6.055124749149738e-07,
"loss": 0.23892341554164886,
"step": 3258
},
{
"epoch": 2.574249605055292,
"grad_norm": 12.603578521658129,
"learning_rate": 6.033221529886745e-07,
"loss": 0.6413030624389648,
"step": 3259
},
{
"epoch": 2.575039494470774,
"grad_norm": 8.672198769457234,
"learning_rate": 6.011355453902984e-07,
"loss": 0.3947061598300934,
"step": 3260
},
{
"epoch": 2.575829383886256,
"grad_norm": 15.44081680860732,
"learning_rate": 5.989526539670992e-07,
"loss": 1.9373308420181274,
"step": 3261
},
{
"epoch": 2.5766192733017377,
"grad_norm": 15.004738901323284,
"learning_rate": 5.967734805631913e-07,
"loss": 0.90089350938797,
"step": 3262
},
{
"epoch": 2.5774091627172195,
"grad_norm": 11.506465084275762,
"learning_rate": 5.945980270195451e-07,
"loss": 0.742828369140625,
"step": 3263
},
{
"epoch": 2.5781990521327014,
"grad_norm": 13.32230582597808,
"learning_rate": 5.924262951739929e-07,
"loss": 0.24065065383911133,
"step": 3264
},
{
"epoch": 2.5789889415481833,
"grad_norm": 9.779711772862912,
"learning_rate": 5.902582868612211e-07,
"loss": 0.5692986249923706,
"step": 3265
},
{
"epoch": 2.579778830963665,
"grad_norm": 8.091630298353223,
"learning_rate": 5.880940039127703e-07,
"loss": 0.3704443573951721,
"step": 3266
},
{
"epoch": 2.580568720379147,
"grad_norm": 10.939765166542545,
"learning_rate": 5.859334481570328e-07,
"loss": 0.5513951778411865,
"step": 3267
},
{
"epoch": 2.581358609794629,
"grad_norm": 12.859593353523778,
"learning_rate": 5.837766214192536e-07,
"loss": 0.3680616319179535,
"step": 3268
},
{
"epoch": 2.5821484992101107,
"grad_norm": 13.927295492601075,
"learning_rate": 5.816235255215275e-07,
"loss": 0.25186580419540405,
"step": 3269
},
{
"epoch": 2.5829383886255926,
"grad_norm": 9.056287052188528,
"learning_rate": 5.794741622827966e-07,
"loss": 0.361020565032959,
"step": 3270
},
{
"epoch": 2.5837282780410744,
"grad_norm": 9.11489390301563,
"learning_rate": 5.773285335188499e-07,
"loss": 0.567996084690094,
"step": 3271
},
{
"epoch": 2.5845181674565563,
"grad_norm": 7.932039650484051,
"learning_rate": 5.751866410423224e-07,
"loss": 0.4094735085964203,
"step": 3272
},
{
"epoch": 2.585308056872038,
"grad_norm": 11.984895953206886,
"learning_rate": 5.730484866626912e-07,
"loss": 0.30167537927627563,
"step": 3273
},
{
"epoch": 2.58609794628752,
"grad_norm": 9.289484227401946,
"learning_rate": 5.70914072186276e-07,
"loss": 0.32061922550201416,
"step": 3274
},
{
"epoch": 2.586887835703002,
"grad_norm": 9.622010065894505,
"learning_rate": 5.687833994162378e-07,
"loss": 0.32422295212745667,
"step": 3275
},
{
"epoch": 2.5876777251184833,
"grad_norm": 14.227211464394562,
"learning_rate": 5.666564701525762e-07,
"loss": 0.27812737226486206,
"step": 3276
},
{
"epoch": 2.588467614533965,
"grad_norm": 10.998826927820959,
"learning_rate": 5.64533286192126e-07,
"loss": 0.4728177785873413,
"step": 3277
},
{
"epoch": 2.589257503949447,
"grad_norm": 13.428356160162433,
"learning_rate": 5.624138493285636e-07,
"loss": 0.36791884899139404,
"step": 3278
},
{
"epoch": 2.590047393364929,
"grad_norm": 10.44768694966881,
"learning_rate": 5.602981613523933e-07,
"loss": 0.5437135100364685,
"step": 3279
},
{
"epoch": 2.5908372827804107,
"grad_norm": 8.729760092755804,
"learning_rate": 5.581862240509561e-07,
"loss": 0.3248332440853119,
"step": 3280
},
{
"epoch": 2.5916271721958926,
"grad_norm": 7.7718729951563725,
"learning_rate": 5.560780392084236e-07,
"loss": 0.30842357873916626,
"step": 3281
},
{
"epoch": 2.5924170616113744,
"grad_norm": 8.64870834436161,
"learning_rate": 5.539736086057968e-07,
"loss": 0.5332027673721313,
"step": 3282
},
{
"epoch": 2.5932069510268563,
"grad_norm": 12.471905432522792,
"learning_rate": 5.518729340209067e-07,
"loss": 0.3546110689640045,
"step": 3283
},
{
"epoch": 2.593996840442338,
"grad_norm": 10.520884370675867,
"learning_rate": 5.497760172284105e-07,
"loss": 0.5727818012237549,
"step": 3284
},
{
"epoch": 2.59478672985782,
"grad_norm": 21.842408865407872,
"learning_rate": 5.476828599997891e-07,
"loss": 0.9324047565460205,
"step": 3285
},
{
"epoch": 2.595576619273302,
"grad_norm": 15.695330799147337,
"learning_rate": 5.455934641033473e-07,
"loss": 0.7432706952095032,
"step": 3286
},
{
"epoch": 2.5963665086887837,
"grad_norm": 8.672936031824056,
"learning_rate": 5.43507831304217e-07,
"loss": 0.32435929775238037,
"step": 3287
},
{
"epoch": 2.597156398104265,
"grad_norm": 12.82117590286232,
"learning_rate": 5.414259633643454e-07,
"loss": 0.31739306449890137,
"step": 3288
},
{
"epoch": 2.597946287519747,
"grad_norm": 8.894198702503063,
"learning_rate": 5.39347862042503e-07,
"loss": 0.23482373356819153,
"step": 3289
},
{
"epoch": 2.598736176935229,
"grad_norm": 9.107176351218104,
"learning_rate": 5.372735290942749e-07,
"loss": 0.37216585874557495,
"step": 3290
},
{
"epoch": 2.5995260663507107,
"grad_norm": 12.065946085010948,
"learning_rate": 5.352029662720643e-07,
"loss": 0.45027846097946167,
"step": 3291
},
{
"epoch": 2.6003159557661926,
"grad_norm": 9.353380690591916,
"learning_rate": 5.331361753250908e-07,
"loss": 0.5409231781959534,
"step": 3292
},
{
"epoch": 2.6011058451816744,
"grad_norm": 9.465254647955879,
"learning_rate": 5.31073157999386e-07,
"loss": 0.30727618932724,
"step": 3293
},
{
"epoch": 2.6018957345971563,
"grad_norm": 9.607201625715247,
"learning_rate": 5.290139160377944e-07,
"loss": 0.31384027004241943,
"step": 3294
},
{
"epoch": 2.602685624012638,
"grad_norm": 14.736505951040836,
"learning_rate": 5.269584511799674e-07,
"loss": 0.9769009351730347,
"step": 3295
},
{
"epoch": 2.60347551342812,
"grad_norm": 11.551220516709076,
"learning_rate": 5.249067651623713e-07,
"loss": 0.4081469178199768,
"step": 3296
},
{
"epoch": 2.604265402843602,
"grad_norm": 11.149694766419422,
"learning_rate": 5.228588597182771e-07,
"loss": 0.31222042441368103,
"step": 3297
},
{
"epoch": 2.6050552922590837,
"grad_norm": 12.284974464013962,
"learning_rate": 5.208147365777605e-07,
"loss": 0.3729371428489685,
"step": 3298
},
{
"epoch": 2.6058451816745656,
"grad_norm": 6.66143671120458,
"learning_rate": 5.187743974677051e-07,
"loss": 0.23765571415424347,
"step": 3299
},
{
"epoch": 2.6066350710900474,
"grad_norm": 10.189036379966337,
"learning_rate": 5.167378441117948e-07,
"loss": 0.5407176613807678,
"step": 3300
},
{
"epoch": 2.6074249605055293,
"grad_norm": 10.508990210906765,
"learning_rate": 5.147050782305174e-07,
"loss": 0.4038906693458557,
"step": 3301
},
{
"epoch": 2.608214849921011,
"grad_norm": 10.671951482144292,
"learning_rate": 5.126761015411602e-07,
"loss": 0.3577304482460022,
"step": 3302
},
{
"epoch": 2.609004739336493,
"grad_norm": 16.67545595391455,
"learning_rate": 5.106509157578088e-07,
"loss": 0.6719971895217896,
"step": 3303
},
{
"epoch": 2.609794628751975,
"grad_norm": 8.313398028205375,
"learning_rate": 5.086295225913468e-07,
"loss": 0.417365163564682,
"step": 3304
},
{
"epoch": 2.6105845181674567,
"grad_norm": 6.802692561793048,
"learning_rate": 5.066119237494543e-07,
"loss": 0.20259469747543335,
"step": 3305
},
{
"epoch": 2.6113744075829386,
"grad_norm": 14.896512148391754,
"learning_rate": 5.045981209366058e-07,
"loss": 0.6620730757713318,
"step": 3306
},
{
"epoch": 2.6121642969984205,
"grad_norm": 10.671850166031216,
"learning_rate": 5.025881158540674e-07,
"loss": 0.6920949220657349,
"step": 3307
},
{
"epoch": 2.6129541864139023,
"grad_norm": 9.483878722714492,
"learning_rate": 5.005819101998993e-07,
"loss": 0.24593792855739594,
"step": 3308
},
{
"epoch": 2.6137440758293837,
"grad_norm": 12.335044317508292,
"learning_rate": 4.985795056689496e-07,
"loss": 0.38339120149612427,
"step": 3309
},
{
"epoch": 2.6145339652448656,
"grad_norm": 11.267803688952569,
"learning_rate": 4.965809039528557e-07,
"loss": 0.7271929383277893,
"step": 3310
},
{
"epoch": 2.6153238546603474,
"grad_norm": 9.841740710130463,
"learning_rate": 4.945861067400459e-07,
"loss": 0.33051010966300964,
"step": 3311
},
{
"epoch": 2.6161137440758293,
"grad_norm": 8.575258139149547,
"learning_rate": 4.925951157157282e-07,
"loss": 0.40669572353363037,
"step": 3312
},
{
"epoch": 2.616903633491311,
"grad_norm": 14.064126439379152,
"learning_rate": 4.906079325618995e-07,
"loss": 0.4142283797264099,
"step": 3313
},
{
"epoch": 2.617693522906793,
"grad_norm": 10.926203888261908,
"learning_rate": 4.886245589573379e-07,
"loss": 0.3864701986312866,
"step": 3314
},
{
"epoch": 2.618483412322275,
"grad_norm": 8.095460272657196,
"learning_rate": 4.86644996577606e-07,
"loss": 0.6239743828773499,
"step": 3315
},
{
"epoch": 2.6192733017377567,
"grad_norm": 9.5988025752161,
"learning_rate": 4.846692470950442e-07,
"loss": 0.44168534874916077,
"step": 3316
},
{
"epoch": 2.6200631911532386,
"grad_norm": 12.25520966680489,
"learning_rate": 4.826973121787704e-07,
"loss": 0.7248414754867554,
"step": 3317
},
{
"epoch": 2.6208530805687205,
"grad_norm": 11.504118164211533,
"learning_rate": 4.807291934946828e-07,
"loss": 0.388072669506073,
"step": 3318
},
{
"epoch": 2.6216429699842023,
"grad_norm": 11.714875357297148,
"learning_rate": 4.787648927054534e-07,
"loss": 0.31926900148391724,
"step": 3319
},
{
"epoch": 2.622432859399684,
"grad_norm": 9.583077187413663,
"learning_rate": 4.7680441147053225e-07,
"loss": 0.25406989455223083,
"step": 3320
},
{
"epoch": 2.623222748815166,
"grad_norm": 12.602149801934326,
"learning_rate": 4.748477514461386e-07,
"loss": 1.1567286252975464,
"step": 3321
},
{
"epoch": 2.6240126382306475,
"grad_norm": 12.380509221098798,
"learning_rate": 4.728949142852668e-07,
"loss": 0.2009027898311615,
"step": 3322
},
{
"epoch": 2.6248025276461293,
"grad_norm": 37.105558180160095,
"learning_rate": 4.709459016376777e-07,
"loss": 0.7523494362831116,
"step": 3323
},
{
"epoch": 2.625592417061611,
"grad_norm": 8.134017867193721,
"learning_rate": 4.6900071514990543e-07,
"loss": 0.2318010777235031,
"step": 3324
},
{
"epoch": 2.626382306477093,
"grad_norm": 8.83131524505039,
"learning_rate": 4.670593564652498e-07,
"loss": 0.43371960520744324,
"step": 3325
},
{
"epoch": 2.627172195892575,
"grad_norm": 10.963397183621947,
"learning_rate": 4.6512182722377677e-07,
"loss": 0.36909347772598267,
"step": 3326
},
{
"epoch": 2.6279620853080567,
"grad_norm": 12.084093646962547,
"learning_rate": 4.63188129062318e-07,
"loss": 0.388027161359787,
"step": 3327
},
{
"epoch": 2.6287519747235386,
"grad_norm": 9.478263087592277,
"learning_rate": 4.6125826361446633e-07,
"loss": 0.3623710870742798,
"step": 3328
},
{
"epoch": 2.6295418641390205,
"grad_norm": 18.041943384926824,
"learning_rate": 4.593322325105798e-07,
"loss": 2.725191831588745,
"step": 3329
},
{
"epoch": 2.6303317535545023,
"grad_norm": 10.053745549005518,
"learning_rate": 4.574100373777762e-07,
"loss": 0.9186097383499146,
"step": 3330
},
{
"epoch": 2.631121642969984,
"grad_norm": 16.56032999257091,
"learning_rate": 4.554916798399311e-07,
"loss": 0.7610374689102173,
"step": 3331
},
{
"epoch": 2.631911532385466,
"grad_norm": 9.799384700311915,
"learning_rate": 4.5357716151768037e-07,
"loss": 0.22280161082744598,
"step": 3332
},
{
"epoch": 2.632701421800948,
"grad_norm": 8.292734812546973,
"learning_rate": 4.5166648402841464e-07,
"loss": 0.3859997093677521,
"step": 3333
},
{
"epoch": 2.6334913112164298,
"grad_norm": 8.969496231310965,
"learning_rate": 4.49759648986281e-07,
"loss": 1.129380226135254,
"step": 3334
},
{
"epoch": 2.6342812006319116,
"grad_norm": 14.025626351449633,
"learning_rate": 4.4785665800217925e-07,
"loss": 0.7709635496139526,
"step": 3335
},
{
"epoch": 2.6350710900473935,
"grad_norm": 12.132670588201732,
"learning_rate": 4.459575126837634e-07,
"loss": 0.31990846991539,
"step": 3336
},
{
"epoch": 2.6358609794628753,
"grad_norm": 12.500867440865886,
"learning_rate": 4.440622146354373e-07,
"loss": 0.7797756195068359,
"step": 3337
},
{
"epoch": 2.636650868878357,
"grad_norm": 13.48812236217527,
"learning_rate": 4.421707654583546e-07,
"loss": 0.7479414939880371,
"step": 3338
},
{
"epoch": 2.637440758293839,
"grad_norm": 19.89516186489078,
"learning_rate": 4.402831667504187e-07,
"loss": 0.7601022720336914,
"step": 3339
},
{
"epoch": 2.638230647709321,
"grad_norm": 11.98723585736858,
"learning_rate": 4.38399420106278e-07,
"loss": 0.5701296329498291,
"step": 3340
},
{
"epoch": 2.639020537124803,
"grad_norm": 5.891135933647762,
"learning_rate": 4.365195271173289e-07,
"loss": 0.18816259503364563,
"step": 3341
},
{
"epoch": 2.639810426540284,
"grad_norm": 14.919973147368829,
"learning_rate": 4.3464348937170996e-07,
"loss": 0.6145678162574768,
"step": 3342
},
{
"epoch": 2.640600315955766,
"grad_norm": 13.275675106320598,
"learning_rate": 4.327713084543056e-07,
"loss": 0.7048325538635254,
"step": 3343
},
{
"epoch": 2.641390205371248,
"grad_norm": 12.503928032714008,
"learning_rate": 4.3090298594674006e-07,
"loss": 0.3908374607563019,
"step": 3344
},
{
"epoch": 2.6421800947867298,
"grad_norm": 11.72770662305032,
"learning_rate": 4.290385234273775e-07,
"loss": 0.505962610244751,
"step": 3345
},
{
"epoch": 2.6429699842022116,
"grad_norm": 12.690173043224384,
"learning_rate": 4.2717792247132293e-07,
"loss": 0.4017457962036133,
"step": 3346
},
{
"epoch": 2.6437598736176935,
"grad_norm": 8.36874609608357,
"learning_rate": 4.253211846504163e-07,
"loss": 0.22178924083709717,
"step": 3347
},
{
"epoch": 2.6445497630331753,
"grad_norm": 10.103545343043429,
"learning_rate": 4.234683115332383e-07,
"loss": 0.2969557046890259,
"step": 3348
},
{
"epoch": 2.645339652448657,
"grad_norm": 13.225582438481474,
"learning_rate": 4.216193046851019e-07,
"loss": 0.37480732798576355,
"step": 3349
},
{
"epoch": 2.646129541864139,
"grad_norm": 10.84422500520785,
"learning_rate": 4.1977416566805264e-07,
"loss": 0.9092705845832825,
"step": 3350
},
{
"epoch": 2.646919431279621,
"grad_norm": 9.789088606323828,
"learning_rate": 4.179328960408696e-07,
"loss": 0.31875336170196533,
"step": 3351
},
{
"epoch": 2.647709320695103,
"grad_norm": 10.486891620887812,
"learning_rate": 4.160954973590664e-07,
"loss": 0.3058662414550781,
"step": 3352
},
{
"epoch": 2.6484992101105846,
"grad_norm": 13.733159335396302,
"learning_rate": 4.1426197117488134e-07,
"loss": 2.2363317012786865,
"step": 3353
},
{
"epoch": 2.6492890995260665,
"grad_norm": 6.946032041163559,
"learning_rate": 4.1243231903728363e-07,
"loss": 0.2704191505908966,
"step": 3354
},
{
"epoch": 2.650078988941548,
"grad_norm": 11.504621566189082,
"learning_rate": 4.106065424919703e-07,
"loss": 0.3017812967300415,
"step": 3355
},
{
"epoch": 2.65086887835703,
"grad_norm": 10.321032506154271,
"learning_rate": 4.087846430813613e-07,
"loss": 1.0040827989578247,
"step": 3356
},
{
"epoch": 2.6516587677725116,
"grad_norm": 11.294039143603817,
"learning_rate": 4.069666223446056e-07,
"loss": 0.4513833522796631,
"step": 3357
},
{
"epoch": 2.6524486571879935,
"grad_norm": 14.456974047266456,
"learning_rate": 4.051524818175723e-07,
"loss": 0.23808935284614563,
"step": 3358
},
{
"epoch": 2.6532385466034754,
"grad_norm": 7.5143794681952745,
"learning_rate": 4.033422230328526e-07,
"loss": 0.2904347777366638,
"step": 3359
},
{
"epoch": 2.654028436018957,
"grad_norm": 7.991560366796248,
"learning_rate": 4.0153584751976007e-07,
"loss": 0.4038187563419342,
"step": 3360
},
{
"epoch": 2.654818325434439,
"grad_norm": 9.497887673841467,
"learning_rate": 3.99733356804326e-07,
"loss": 0.31666648387908936,
"step": 3361
},
{
"epoch": 2.655608214849921,
"grad_norm": 10.401304732363979,
"learning_rate": 3.9793475240930077e-07,
"loss": 0.4911503493785858,
"step": 3362
},
{
"epoch": 2.656398104265403,
"grad_norm": 12.221183465688934,
"learning_rate": 3.9614003585415117e-07,
"loss": 0.8834859728813171,
"step": 3363
},
{
"epoch": 2.6571879936808847,
"grad_norm": 11.390696610946822,
"learning_rate": 3.943492086550599e-07,
"loss": 0.6366713047027588,
"step": 3364
},
{
"epoch": 2.6579778830963665,
"grad_norm": 15.422428455701008,
"learning_rate": 3.9256227232492337e-07,
"loss": 0.3374771773815155,
"step": 3365
},
{
"epoch": 2.6587677725118484,
"grad_norm": 9.583693820140757,
"learning_rate": 3.907792283733514e-07,
"loss": 0.7819290161132812,
"step": 3366
},
{
"epoch": 2.6595576619273302,
"grad_norm": 12.981199540184159,
"learning_rate": 3.8900007830666555e-07,
"loss": 0.5065968036651611,
"step": 3367
},
{
"epoch": 2.660347551342812,
"grad_norm": 7.236244572582725,
"learning_rate": 3.872248236278975e-07,
"loss": 0.6708056926727295,
"step": 3368
},
{
"epoch": 2.661137440758294,
"grad_norm": 8.694974340614756,
"learning_rate": 3.854534658367881e-07,
"loss": 0.26976072788238525,
"step": 3369
},
{
"epoch": 2.661927330173776,
"grad_norm": 11.235824129714906,
"learning_rate": 3.836860064297854e-07,
"loss": 0.3245980441570282,
"step": 3370
},
{
"epoch": 2.6627172195892577,
"grad_norm": 16.011419931261926,
"learning_rate": 3.819224469000482e-07,
"loss": 0.7259745597839355,
"step": 3371
},
{
"epoch": 2.6635071090047395,
"grad_norm": 19.22540678332816,
"learning_rate": 3.8016278873743375e-07,
"loss": 0.45876407623291016,
"step": 3372
},
{
"epoch": 2.6642969984202214,
"grad_norm": 8.780171480326908,
"learning_rate": 3.7840703342850893e-07,
"loss": 0.278285413980484,
"step": 3373
},
{
"epoch": 2.6650868878357032,
"grad_norm": 17.47655948792051,
"learning_rate": 3.766551824565406e-07,
"loss": 0.5746378898620605,
"step": 3374
},
{
"epoch": 2.665876777251185,
"grad_norm": 23.25805650807008,
"learning_rate": 3.7490723730149836e-07,
"loss": 0.507803201675415,
"step": 3375
},
{
"epoch": 2.6666666666666665,
"grad_norm": 9.306113512629594,
"learning_rate": 3.731631994400536e-07,
"loss": 0.18884757161140442,
"step": 3376
},
{
"epoch": 2.6674565560821484,
"grad_norm": 8.348124341525704,
"learning_rate": 3.7142307034557345e-07,
"loss": 0.29510441422462463,
"step": 3377
},
{
"epoch": 2.6682464454976302,
"grad_norm": 10.578489755562448,
"learning_rate": 3.696868514881258e-07,
"loss": 0.33782392740249634,
"step": 3378
},
{
"epoch": 2.669036334913112,
"grad_norm": 14.26143918553158,
"learning_rate": 3.679545443344723e-07,
"loss": 0.9008026719093323,
"step": 3379
},
{
"epoch": 2.669826224328594,
"grad_norm": 11.857039310446325,
"learning_rate": 3.662261503480741e-07,
"loss": 0.5593395233154297,
"step": 3380
},
{
"epoch": 2.670616113744076,
"grad_norm": 10.652642297722368,
"learning_rate": 3.6450167098908253e-07,
"loss": 0.31769973039627075,
"step": 3381
},
{
"epoch": 2.6714060031595577,
"grad_norm": 9.458143348474115,
"learning_rate": 3.6278110771434504e-07,
"loss": 0.2788546681404114,
"step": 3382
},
{
"epoch": 2.6721958925750395,
"grad_norm": 13.225514562775611,
"learning_rate": 3.610644619773973e-07,
"loss": 0.6898187398910522,
"step": 3383
},
{
"epoch": 2.6729857819905214,
"grad_norm": 10.118377023292018,
"learning_rate": 3.59351735228467e-07,
"loss": 0.2825648784637451,
"step": 3384
},
{
"epoch": 2.6737756714060033,
"grad_norm": 5.075483514495616,
"learning_rate": 3.576429289144734e-07,
"loss": 0.23227611184120178,
"step": 3385
},
{
"epoch": 2.674565560821485,
"grad_norm": 11.943311728775157,
"learning_rate": 3.559380444790206e-07,
"loss": 0.8256229758262634,
"step": 3386
},
{
"epoch": 2.675355450236967,
"grad_norm": 8.112570938757484,
"learning_rate": 3.54237083362402e-07,
"loss": 0.5945410132408142,
"step": 3387
},
{
"epoch": 2.6761453396524484,
"grad_norm": 15.634285640069708,
"learning_rate": 3.525400470015916e-07,
"loss": 0.6739033460617065,
"step": 3388
},
{
"epoch": 2.6769352290679302,
"grad_norm": 20.357153137616915,
"learning_rate": 3.508469368302542e-07,
"loss": 0.45265576243400574,
"step": 3389
},
{
"epoch": 2.677725118483412,
"grad_norm": 9.899660066154995,
"learning_rate": 3.4915775427873445e-07,
"loss": 0.4432603120803833,
"step": 3390
},
{
"epoch": 2.678515007898894,
"grad_norm": 8.138889689930242,
"learning_rate": 3.4747250077405925e-07,
"loss": 0.19528892636299133,
"step": 3391
},
{
"epoch": 2.679304897314376,
"grad_norm": 18.23444496802974,
"learning_rate": 3.4579117773993586e-07,
"loss": 0.5927311778068542,
"step": 3392
},
{
"epoch": 2.6800947867298577,
"grad_norm": 10.188403198684156,
"learning_rate": 3.4411378659675197e-07,
"loss": 0.2367173135280609,
"step": 3393
},
{
"epoch": 2.6808846761453395,
"grad_norm": 11.657906732572709,
"learning_rate": 3.424403287615724e-07,
"loss": 0.23523610830307007,
"step": 3394
},
{
"epoch": 2.6816745655608214,
"grad_norm": 15.05635601715627,
"learning_rate": 3.4077080564814126e-07,
"loss": 0.5322354435920715,
"step": 3395
},
{
"epoch": 2.6824644549763033,
"grad_norm": 12.057487107339211,
"learning_rate": 3.391052186668753e-07,
"loss": 0.7865498661994934,
"step": 3396
},
{
"epoch": 2.683254344391785,
"grad_norm": 8.968136456469345,
"learning_rate": 3.374435692248695e-07,
"loss": 0.2981482148170471,
"step": 3397
},
{
"epoch": 2.684044233807267,
"grad_norm": 7.657461394448841,
"learning_rate": 3.3578585872589e-07,
"loss": 0.39448219537734985,
"step": 3398
},
{
"epoch": 2.684834123222749,
"grad_norm": 13.924417235813834,
"learning_rate": 3.3413208857037636e-07,
"loss": 0.33557915687561035,
"step": 3399
},
{
"epoch": 2.6856240126382307,
"grad_norm": 8.80765991011228,
"learning_rate": 3.324822601554389e-07,
"loss": 0.4689452350139618,
"step": 3400
},
{
"epoch": 2.6864139020537126,
"grad_norm": 10.376386874091963,
"learning_rate": 3.308363748748583e-07,
"loss": 0.5030757188796997,
"step": 3401
},
{
"epoch": 2.6872037914691944,
"grad_norm": 7.506351165902702,
"learning_rate": 3.2919443411908335e-07,
"loss": 0.4964003562927246,
"step": 3402
},
{
"epoch": 2.6879936808846763,
"grad_norm": 8.750015580357847,
"learning_rate": 3.275564392752306e-07,
"loss": 0.6735177040100098,
"step": 3403
},
{
"epoch": 2.688783570300158,
"grad_norm": 8.400292003551172,
"learning_rate": 3.25922391727086e-07,
"loss": 0.5502939224243164,
"step": 3404
},
{
"epoch": 2.68957345971564,
"grad_norm": 17.77933866658459,
"learning_rate": 3.2429229285509565e-07,
"loss": 0.561028003692627,
"step": 3405
},
{
"epoch": 2.690363349131122,
"grad_norm": 8.989376021851474,
"learning_rate": 3.226661440363732e-07,
"loss": 0.4054949879646301,
"step": 3406
},
{
"epoch": 2.6911532385466037,
"grad_norm": 8.662923922835425,
"learning_rate": 3.210439466446941e-07,
"loss": 0.18396064639091492,
"step": 3407
},
{
"epoch": 2.6919431279620856,
"grad_norm": 13.879543447285586,
"learning_rate": 3.194257020504976e-07,
"loss": 0.49747684597969055,
"step": 3408
},
{
"epoch": 2.692733017377567,
"grad_norm": 10.218308650070451,
"learning_rate": 3.178114116208819e-07,
"loss": 0.2940269708633423,
"step": 3409
},
{
"epoch": 2.693522906793049,
"grad_norm": 11.95090160706378,
"learning_rate": 3.1620107671960274e-07,
"loss": 0.5844002366065979,
"step": 3410
},
{
"epoch": 2.6943127962085307,
"grad_norm": 12.15425735480325,
"learning_rate": 3.14594698707078e-07,
"loss": 0.3902283012866974,
"step": 3411
},
{
"epoch": 2.6951026856240126,
"grad_norm": 7.527465679424551,
"learning_rate": 3.1299227894038e-07,
"loss": 0.25770941376686096,
"step": 3412
},
{
"epoch": 2.6958925750394944,
"grad_norm": 19.33893813349674,
"learning_rate": 3.113938187732396e-07,
"loss": 0.4086335003376007,
"step": 3413
},
{
"epoch": 2.6966824644549763,
"grad_norm": 11.241807816989384,
"learning_rate": 3.097993195560406e-07,
"loss": 0.28467923402786255,
"step": 3414
},
{
"epoch": 2.697472353870458,
"grad_norm": 6.848242031731855,
"learning_rate": 3.082087826358221e-07,
"loss": 0.460833340883255,
"step": 3415
},
{
"epoch": 2.69826224328594,
"grad_norm": 13.881441071873525,
"learning_rate": 3.0662220935627264e-07,
"loss": 0.5943915843963623,
"step": 3416
},
{
"epoch": 2.699052132701422,
"grad_norm": 13.318701652240515,
"learning_rate": 3.0503960105773664e-07,
"loss": 0.3852251470088959,
"step": 3417
},
{
"epoch": 2.6998420221169037,
"grad_norm": 11.40666777900844,
"learning_rate": 3.034609590772064e-07,
"loss": 0.43630069494247437,
"step": 3418
},
{
"epoch": 2.7006319115323856,
"grad_norm": 17.06726177611801,
"learning_rate": 3.0188628474832283e-07,
"loss": 0.3444702625274658,
"step": 3419
},
{
"epoch": 2.7014218009478674,
"grad_norm": 11.317050973138297,
"learning_rate": 3.0031557940137846e-07,
"loss": 0.5595932006835938,
"step": 3420
},
{
"epoch": 2.7022116903633493,
"grad_norm": 6.595748201513425,
"learning_rate": 2.987488443633063e-07,
"loss": 0.3122694492340088,
"step": 3421
},
{
"epoch": 2.7030015797788307,
"grad_norm": 9.417440233234284,
"learning_rate": 2.971860809576926e-07,
"loss": 0.602641761302948,
"step": 3422
},
{
"epoch": 2.7037914691943126,
"grad_norm": 17.899529937935377,
"learning_rate": 2.956272905047641e-07,
"loss": 1.1811764240264893,
"step": 3423
},
{
"epoch": 2.7045813586097944,
"grad_norm": 21.27161915680074,
"learning_rate": 2.9407247432139184e-07,
"loss": 0.5429356694221497,
"step": 3424
},
{
"epoch": 2.7053712480252763,
"grad_norm": 14.45817636342026,
"learning_rate": 2.9252163372109013e-07,
"loss": 0.2626524567604065,
"step": 3425
},
{
"epoch": 2.706161137440758,
"grad_norm": 11.966013741915225,
"learning_rate": 2.9097477001401364e-07,
"loss": 0.3152087926864624,
"step": 3426
},
{
"epoch": 2.70695102685624,
"grad_norm": 9.01180406739463,
"learning_rate": 2.8943188450695824e-07,
"loss": 0.5591740608215332,
"step": 3427
},
{
"epoch": 2.707740916271722,
"grad_norm": 15.155219955926972,
"learning_rate": 2.878929785033585e-07,
"loss": 0.8030872941017151,
"step": 3428
},
{
"epoch": 2.7085308056872037,
"grad_norm": 9.0050236739937,
"learning_rate": 2.8635805330328783e-07,
"loss": 0.6855502128601074,
"step": 3429
},
{
"epoch": 2.7093206951026856,
"grad_norm": 9.140820391889852,
"learning_rate": 2.8482711020345556e-07,
"loss": 0.8755874037742615,
"step": 3430
},
{
"epoch": 2.7101105845181674,
"grad_norm": 15.402621823538889,
"learning_rate": 2.833001504972077e-07,
"loss": 0.429756760597229,
"step": 3431
},
{
"epoch": 2.7109004739336493,
"grad_norm": 9.610037912675201,
"learning_rate": 2.8177717547452463e-07,
"loss": 0.405164510011673,
"step": 3432
},
{
"epoch": 2.711690363349131,
"grad_norm": 13.695365189160531,
"learning_rate": 2.8025818642202054e-07,
"loss": 0.6108412742614746,
"step": 3433
},
{
"epoch": 2.712480252764613,
"grad_norm": 13.619752565690785,
"learning_rate": 2.787431846229427e-07,
"loss": 0.454412043094635,
"step": 3434
},
{
"epoch": 2.713270142180095,
"grad_norm": 11.53614981565061,
"learning_rate": 2.7723217135716906e-07,
"loss": 0.4228717088699341,
"step": 3435
},
{
"epoch": 2.7140600315955767,
"grad_norm": 6.706916975241827,
"learning_rate": 2.757251479012102e-07,
"loss": 0.2297818660736084,
"step": 3436
},
{
"epoch": 2.7148499210110586,
"grad_norm": 15.153915751186066,
"learning_rate": 2.742221155282027e-07,
"loss": 0.441479355096817,
"step": 3437
},
{
"epoch": 2.7156398104265405,
"grad_norm": 12.736343688491294,
"learning_rate": 2.727230755079141e-07,
"loss": 0.3135189712047577,
"step": 3438
},
{
"epoch": 2.7164296998420223,
"grad_norm": 10.469524983647013,
"learning_rate": 2.712280291067382e-07,
"loss": 0.2524583637714386,
"step": 3439
},
{
"epoch": 2.717219589257504,
"grad_norm": 10.21269953062072,
"learning_rate": 2.6973697758769404e-07,
"loss": 0.6804049015045166,
"step": 3440
},
{
"epoch": 2.718009478672986,
"grad_norm": 13.439002602740715,
"learning_rate": 2.68249922210429e-07,
"loss": 1.7227437496185303,
"step": 3441
},
{
"epoch": 2.7187993680884674,
"grad_norm": 9.105196897219319,
"learning_rate": 2.667668642312121e-07,
"loss": 0.24244064092636108,
"step": 3442
},
{
"epoch": 2.7195892575039493,
"grad_norm": 17.032916970105017,
"learning_rate": 2.6528780490293394e-07,
"loss": 0.42305219173431396,
"step": 3443
},
{
"epoch": 2.720379146919431,
"grad_norm": 8.341873292787225,
"learning_rate": 2.638127454751083e-07,
"loss": 0.3290414810180664,
"step": 3444
},
{
"epoch": 2.721169036334913,
"grad_norm": 14.438304518877404,
"learning_rate": 2.6234168719387275e-07,
"loss": 0.7699002027511597,
"step": 3445
},
{
"epoch": 2.721958925750395,
"grad_norm": 11.533451670720245,
"learning_rate": 2.6087463130198053e-07,
"loss": 0.39063939452171326,
"step": 3446
},
{
"epoch": 2.7227488151658767,
"grad_norm": 7.533668401408119,
"learning_rate": 2.594115790388069e-07,
"loss": 0.30158624053001404,
"step": 3447
},
{
"epoch": 2.7235387045813586,
"grad_norm": 9.966662834105735,
"learning_rate": 2.5795253164034084e-07,
"loss": 0.7435629367828369,
"step": 3448
},
{
"epoch": 2.7243285939968405,
"grad_norm": 14.725256543363951,
"learning_rate": 2.564974903391915e-07,
"loss": 0.22876577079296112,
"step": 3449
},
{
"epoch": 2.7251184834123223,
"grad_norm": 11.42689474920191,
"learning_rate": 2.550464563645827e-07,
"loss": 0.48019081354141235,
"step": 3450
},
{
"epoch": 2.725908372827804,
"grad_norm": 6.525526730403295,
"learning_rate": 2.5359943094235284e-07,
"loss": 0.3264992833137512,
"step": 3451
},
{
"epoch": 2.726698262243286,
"grad_norm": 12.867989920941069,
"learning_rate": 2.521564152949535e-07,
"loss": 0.6327470541000366,
"step": 3452
},
{
"epoch": 2.727488151658768,
"grad_norm": 33.14582147619613,
"learning_rate": 2.5071741064144893e-07,
"loss": 0.7746727466583252,
"step": 3453
},
{
"epoch": 2.7282780410742498,
"grad_norm": 11.842666297046986,
"learning_rate": 2.4928241819751506e-07,
"loss": 0.41545653343200684,
"step": 3454
},
{
"epoch": 2.729067930489731,
"grad_norm": 10.904910444914098,
"learning_rate": 2.4785143917543886e-07,
"loss": 0.30271655321121216,
"step": 3455
},
{
"epoch": 2.729857819905213,
"grad_norm": 7.237418545407075,
"learning_rate": 2.464244747841155e-07,
"loss": 0.2641463577747345,
"step": 3456
},
{
"epoch": 2.730647709320695,
"grad_norm": 12.797520847903757,
"learning_rate": 2.4500152622904895e-07,
"loss": 0.18360668420791626,
"step": 3457
},
{
"epoch": 2.7314375987361768,
"grad_norm": 6.889781589750479,
"learning_rate": 2.435825947123516e-07,
"loss": 0.2461070865392685,
"step": 3458
},
{
"epoch": 2.7322274881516586,
"grad_norm": 9.695805170509885,
"learning_rate": 2.4216768143274115e-07,
"loss": 0.5353419780731201,
"step": 3459
},
{
"epoch": 2.7330173775671405,
"grad_norm": 10.343918221842735,
"learning_rate": 2.4075678758554047e-07,
"loss": 0.6046707630157471,
"step": 3460
},
{
"epoch": 2.7338072669826223,
"grad_norm": 9.007895090494918,
"learning_rate": 2.3934991436267816e-07,
"loss": 0.2909160852432251,
"step": 3461
},
{
"epoch": 2.734597156398104,
"grad_norm": 10.62496325664592,
"learning_rate": 2.3794706295268476e-07,
"loss": 0.30107566714286804,
"step": 3462
},
{
"epoch": 2.735387045813586,
"grad_norm": 13.67484568966304,
"learning_rate": 2.365482345406933e-07,
"loss": 0.3950386643409729,
"step": 3463
},
{
"epoch": 2.736176935229068,
"grad_norm": 21.77362986226219,
"learning_rate": 2.3515343030844073e-07,
"loss": 1.0076971054077148,
"step": 3464
},
{
"epoch": 2.7369668246445498,
"grad_norm": 7.730693964041083,
"learning_rate": 2.3376265143426003e-07,
"loss": 0.24211300909519196,
"step": 3465
},
{
"epoch": 2.7377567140600316,
"grad_norm": 9.296217895983373,
"learning_rate": 2.3237589909308632e-07,
"loss": 0.2887963056564331,
"step": 3466
},
{
"epoch": 2.7385466034755135,
"grad_norm": 10.840522993677942,
"learning_rate": 2.309931744564531e-07,
"loss": 0.41945865750312805,
"step": 3467
},
{
"epoch": 2.7393364928909953,
"grad_norm": 8.917010556292599,
"learning_rate": 2.2961447869248977e-07,
"loss": 0.20327429473400116,
"step": 3468
},
{
"epoch": 2.740126382306477,
"grad_norm": 16.489207308906405,
"learning_rate": 2.2823981296592468e-07,
"loss": 0.445822149515152,
"step": 3469
},
{
"epoch": 2.740916271721959,
"grad_norm": 13.177843752844728,
"learning_rate": 2.2686917843807832e-07,
"loss": 0.4169418513774872,
"step": 3470
},
{
"epoch": 2.741706161137441,
"grad_norm": 18.679760356479285,
"learning_rate": 2.2550257626686835e-07,
"loss": 0.29727548360824585,
"step": 3471
},
{
"epoch": 2.742496050552923,
"grad_norm": 17.416569754051274,
"learning_rate": 2.2414000760680344e-07,
"loss": 0.9264640808105469,
"step": 3472
},
{
"epoch": 2.7432859399684046,
"grad_norm": 11.51848657708186,
"learning_rate": 2.2278147360898726e-07,
"loss": 0.31270313262939453,
"step": 3473
},
{
"epoch": 2.7440758293838865,
"grad_norm": 9.677231027214653,
"learning_rate": 2.2142697542111403e-07,
"loss": 0.519598126411438,
"step": 3474
},
{
"epoch": 2.7448657187993684,
"grad_norm": 8.89666307610365,
"learning_rate": 2.2007651418746777e-07,
"loss": 0.26155680418014526,
"step": 3475
},
{
"epoch": 2.7456556082148498,
"grad_norm": 8.068195381347717,
"learning_rate": 2.1873009104892207e-07,
"loss": 0.26666006445884705,
"step": 3476
},
{
"epoch": 2.7464454976303316,
"grad_norm": 9.967802346446724,
"learning_rate": 2.1738770714293978e-07,
"loss": 0.4388732612133026,
"step": 3477
},
{
"epoch": 2.7472353870458135,
"grad_norm": 12.682142726744404,
"learning_rate": 2.160493636035721e-07,
"loss": 1.5601736307144165,
"step": 3478
},
{
"epoch": 2.7480252764612954,
"grad_norm": 8.216179392592059,
"learning_rate": 2.1471506156145572e-07,
"loss": 0.34426096081733704,
"step": 3479
},
{
"epoch": 2.748815165876777,
"grad_norm": 12.700475916611518,
"learning_rate": 2.133848021438134e-07,
"loss": 0.3579084873199463,
"step": 3480
},
{
"epoch": 2.749605055292259,
"grad_norm": 8.827090645854712,
"learning_rate": 2.1205858647445175e-07,
"loss": 0.3631330728530884,
"step": 3481
},
{
"epoch": 2.750394944707741,
"grad_norm": 9.001270743632745,
"learning_rate": 2.107364156737629e-07,
"loss": 0.30411210656166077,
"step": 3482
},
{
"epoch": 2.751184834123223,
"grad_norm": 9.409837583682123,
"learning_rate": 2.0941829085872168e-07,
"loss": 0.23499026894569397,
"step": 3483
},
{
"epoch": 2.7519747235387046,
"grad_norm": 12.705050308923921,
"learning_rate": 2.0810421314288342e-07,
"loss": 1.2970982789993286,
"step": 3484
},
{
"epoch": 2.7527646129541865,
"grad_norm": 8.03711225808964,
"learning_rate": 2.067941836363857e-07,
"loss": 0.5299091339111328,
"step": 3485
},
{
"epoch": 2.7535545023696684,
"grad_norm": 11.220356334155515,
"learning_rate": 2.0548820344594544e-07,
"loss": 0.4197993278503418,
"step": 3486
},
{
"epoch": 2.7543443917851502,
"grad_norm": 12.91965843833499,
"learning_rate": 2.04186273674859e-07,
"loss": 0.6132807731628418,
"step": 3487
},
{
"epoch": 2.7551342812006316,
"grad_norm": 18.240887411131222,
"learning_rate": 2.02888395423001e-07,
"loss": 1.271854281425476,
"step": 3488
},
{
"epoch": 2.7559241706161135,
"grad_norm": 10.732674912361663,
"learning_rate": 2.0159456978682378e-07,
"loss": 0.3073996901512146,
"step": 3489
},
{
"epoch": 2.7567140600315954,
"grad_norm": 9.252251075885143,
"learning_rate": 2.0030479785935532e-07,
"loss": 0.2578376531600952,
"step": 3490
},
{
"epoch": 2.757503949447077,
"grad_norm": 8.516995882822272,
"learning_rate": 1.9901908073019837e-07,
"loss": 0.519225001335144,
"step": 3491
},
{
"epoch": 2.758293838862559,
"grad_norm": 8.43300426338786,
"learning_rate": 1.9773741948553194e-07,
"loss": 0.199580118060112,
"step": 3492
},
{
"epoch": 2.759083728278041,
"grad_norm": 9.32908519660615,
"learning_rate": 1.964598152081071e-07,
"loss": 0.4582338333129883,
"step": 3493
},
{
"epoch": 2.759873617693523,
"grad_norm": 9.046945945787343,
"learning_rate": 1.9518626897724878e-07,
"loss": 0.25943028926849365,
"step": 3494
},
{
"epoch": 2.7606635071090047,
"grad_norm": 11.314219665735752,
"learning_rate": 1.93916781868852e-07,
"loss": 0.4258866608142853,
"step": 3495
},
{
"epoch": 2.7614533965244865,
"grad_norm": 10.20689716332431,
"learning_rate": 1.9265135495538488e-07,
"loss": 0.271173357963562,
"step": 3496
},
{
"epoch": 2.7622432859399684,
"grad_norm": 16.200438350903056,
"learning_rate": 1.9138998930588348e-07,
"loss": 0.4719555377960205,
"step": 3497
},
{
"epoch": 2.7630331753554502,
"grad_norm": 11.738594703612247,
"learning_rate": 1.901326859859537e-07,
"loss": 0.980524480342865,
"step": 3498
},
{
"epoch": 2.763823064770932,
"grad_norm": 17.919465561173997,
"learning_rate": 1.888794460577692e-07,
"loss": 0.9541090726852417,
"step": 3499
},
{
"epoch": 2.764612954186414,
"grad_norm": 13.890459920478602,
"learning_rate": 1.8763027058007145e-07,
"loss": 0.69322669506073,
"step": 3500
},
{
"epoch": 2.765402843601896,
"grad_norm": 16.321197407525634,
"learning_rate": 1.8638516060816903e-07,
"loss": 0.42115840315818787,
"step": 3501
},
{
"epoch": 2.7661927330173777,
"grad_norm": 10.96242137226945,
"learning_rate": 1.8514411719393445e-07,
"loss": 0.46676504611968994,
"step": 3502
},
{
"epoch": 2.7669826224328595,
"grad_norm": 8.812275087497374,
"learning_rate": 1.8390714138580457e-07,
"loss": 0.5044353604316711,
"step": 3503
},
{
"epoch": 2.7677725118483414,
"grad_norm": 17.943245247505377,
"learning_rate": 1.8267423422878073e-07,
"loss": 0.41426870226860046,
"step": 3504
},
{
"epoch": 2.7685624012638232,
"grad_norm": 10.71591258305487,
"learning_rate": 1.814453967644264e-07,
"loss": 0.6353201270103455,
"step": 3505
},
{
"epoch": 2.769352290679305,
"grad_norm": 10.317665072167928,
"learning_rate": 1.8022063003086952e-07,
"loss": 0.20802390575408936,
"step": 3506
},
{
"epoch": 2.770142180094787,
"grad_norm": 9.95179330655714,
"learning_rate": 1.7899993506279577e-07,
"loss": 0.38674095273017883,
"step": 3507
},
{
"epoch": 2.770932069510269,
"grad_norm": 9.564138951257297,
"learning_rate": 1.7778331289145246e-07,
"loss": 0.37485527992248535,
"step": 3508
},
{
"epoch": 2.7717219589257502,
"grad_norm": 11.837113131259644,
"learning_rate": 1.765707645446446e-07,
"loss": 1.245941162109375,
"step": 3509
},
{
"epoch": 2.772511848341232,
"grad_norm": 16.993616371219062,
"learning_rate": 1.7536229104673952e-07,
"loss": 1.4415150880813599,
"step": 3510
},
{
"epoch": 2.773301737756714,
"grad_norm": 18.46978395432566,
"learning_rate": 1.741578934186583e-07,
"loss": 0.48280513286590576,
"step": 3511
},
{
"epoch": 2.774091627172196,
"grad_norm": 19.046571471804004,
"learning_rate": 1.7295757267787982e-07,
"loss": 0.6890305280685425,
"step": 3512
},
{
"epoch": 2.7748815165876777,
"grad_norm": 8.374569669050457,
"learning_rate": 1.717613298384402e-07,
"loss": 0.17919717729091644,
"step": 3513
},
{
"epoch": 2.7756714060031595,
"grad_norm": 14.297372965397592,
"learning_rate": 1.7056916591092765e-07,
"loss": 0.9247697591781616,
"step": 3514
},
{
"epoch": 2.7764612954186414,
"grad_norm": 12.33201213532176,
"learning_rate": 1.6938108190248714e-07,
"loss": 0.7695714831352234,
"step": 3515
},
{
"epoch": 2.7772511848341233,
"grad_norm": 11.395421501521598,
"learning_rate": 1.681970788168158e-07,
"loss": 0.5965884923934937,
"step": 3516
},
{
"epoch": 2.778041074249605,
"grad_norm": 11.322776826507724,
"learning_rate": 1.670171576541635e-07,
"loss": 0.28291648626327515,
"step": 3517
},
{
"epoch": 2.778830963665087,
"grad_norm": 9.052162465156956,
"learning_rate": 1.658413194113312e-07,
"loss": 0.26967617869377136,
"step": 3518
},
{
"epoch": 2.779620853080569,
"grad_norm": 8.85381583315956,
"learning_rate": 1.6466956508167098e-07,
"loss": 0.27431443333625793,
"step": 3519
},
{
"epoch": 2.7804107424960507,
"grad_norm": 10.26241792481637,
"learning_rate": 1.635018956550849e-07,
"loss": 0.5575605630874634,
"step": 3520
},
{
"epoch": 2.7812006319115326,
"grad_norm": 19.04805022049209,
"learning_rate": 1.6233831211802443e-07,
"loss": 0.507323145866394,
"step": 3521
},
{
"epoch": 2.781990521327014,
"grad_norm": 12.25593303035708,
"learning_rate": 1.6117881545348768e-07,
"loss": 0.7778584957122803,
"step": 3522
},
{
"epoch": 2.782780410742496,
"grad_norm": 10.716935186355487,
"learning_rate": 1.6002340664102222e-07,
"loss": 0.4240133762359619,
"step": 3523
},
{
"epoch": 2.7835703001579777,
"grad_norm": 10.691328997589723,
"learning_rate": 1.588720866567206e-07,
"loss": 0.3414255976676941,
"step": 3524
},
{
"epoch": 2.7843601895734595,
"grad_norm": 11.768217491281503,
"learning_rate": 1.57724856473222e-07,
"loss": 0.41171273589134216,
"step": 3525
},
{
"epoch": 2.7851500789889414,
"grad_norm": 11.135806497121866,
"learning_rate": 1.5658171705971002e-07,
"loss": 0.44822290539741516,
"step": 3526
},
{
"epoch": 2.7859399684044233,
"grad_norm": 12.802286130199466,
"learning_rate": 1.5544266938191277e-07,
"loss": 0.5907123684883118,
"step": 3527
},
{
"epoch": 2.786729857819905,
"grad_norm": 9.291881799567307,
"learning_rate": 1.5430771440210102e-07,
"loss": 0.5149095058441162,
"step": 3528
},
{
"epoch": 2.787519747235387,
"grad_norm": 11.903535206167968,
"learning_rate": 1.5317685307909003e-07,
"loss": 0.3978126645088196,
"step": 3529
},
{
"epoch": 2.788309636650869,
"grad_norm": 11.87266940877088,
"learning_rate": 1.5205008636823392e-07,
"loss": 0.7672142386436462,
"step": 3530
},
{
"epoch": 2.7890995260663507,
"grad_norm": 12.664993847505396,
"learning_rate": 1.509274152214285e-07,
"loss": 0.6181859970092773,
"step": 3531
},
{
"epoch": 2.7898894154818326,
"grad_norm": 11.706302565544545,
"learning_rate": 1.4980884058711122e-07,
"loss": 0.2257220596075058,
"step": 3532
},
{
"epoch": 2.7906793048973144,
"grad_norm": 11.269026746440312,
"learning_rate": 1.486943634102561e-07,
"loss": 0.411458283662796,
"step": 3533
},
{
"epoch": 2.7914691943127963,
"grad_norm": 11.770992911424287,
"learning_rate": 1.4758398463237844e-07,
"loss": 0.391770601272583,
"step": 3534
},
{
"epoch": 2.792259083728278,
"grad_norm": 12.179827825593687,
"learning_rate": 1.464777051915306e-07,
"loss": 0.7870375514030457,
"step": 3535
},
{
"epoch": 2.79304897314376,
"grad_norm": 10.205085198463788,
"learning_rate": 1.4537552602229888e-07,
"loss": 0.24328409135341644,
"step": 3536
},
{
"epoch": 2.793838862559242,
"grad_norm": 11.707290493646688,
"learning_rate": 1.4427744805580902e-07,
"loss": 0.7062838077545166,
"step": 3537
},
{
"epoch": 2.7946287519747237,
"grad_norm": 9.449178364796186,
"learning_rate": 1.4318347221972118e-07,
"loss": 0.4576803743839264,
"step": 3538
},
{
"epoch": 2.7954186413902056,
"grad_norm": 8.800231574547174,
"learning_rate": 1.420935994382294e-07,
"loss": 0.6782954931259155,
"step": 3539
},
{
"epoch": 2.7962085308056874,
"grad_norm": 20.41988700193614,
"learning_rate": 1.4100783063206224e-07,
"loss": 0.5219430923461914,
"step": 3540
},
{
"epoch": 2.7969984202211693,
"grad_norm": 7.5166874724474715,
"learning_rate": 1.3992616671847981e-07,
"loss": 0.3515085279941559,
"step": 3541
},
{
"epoch": 2.7977883096366507,
"grad_norm": 6.932947866396507,
"learning_rate": 1.3884860861127558e-07,
"loss": 0.47976720333099365,
"step": 3542
},
{
"epoch": 2.7985781990521326,
"grad_norm": 9.491598002493626,
"learning_rate": 1.377751572207753e-07,
"loss": 0.6151677370071411,
"step": 3543
},
{
"epoch": 2.7993680884676144,
"grad_norm": 9.194411344753396,
"learning_rate": 1.3670581345383294e-07,
"loss": 0.26150283217430115,
"step": 3544
},
{
"epoch": 2.8001579778830963,
"grad_norm": 17.758549126307468,
"learning_rate": 1.3564057821383426e-07,
"loss": 0.48430135846138,
"step": 3545
},
{
"epoch": 2.800947867298578,
"grad_norm": 10.109089794707371,
"learning_rate": 1.345794524006938e-07,
"loss": 0.4719211459159851,
"step": 3546
},
{
"epoch": 2.80173775671406,
"grad_norm": 6.966698821581658,
"learning_rate": 1.3352243691085343e-07,
"loss": 0.34015411138534546,
"step": 3547
},
{
"epoch": 2.802527646129542,
"grad_norm": 17.133059090598415,
"learning_rate": 1.3246953263728323e-07,
"loss": 0.662192165851593,
"step": 3548
},
{
"epoch": 2.8033175355450237,
"grad_norm": 13.801086934095315,
"learning_rate": 1.3142074046948117e-07,
"loss": 0.48745739459991455,
"step": 3549
},
{
"epoch": 2.8041074249605056,
"grad_norm": 9.455351798236373,
"learning_rate": 1.3037606129346903e-07,
"loss": 0.4470326602458954,
"step": 3550
},
{
"epoch": 2.8048973143759874,
"grad_norm": 16.270417938160158,
"learning_rate": 1.293354959917964e-07,
"loss": 0.5577215552330017,
"step": 3551
},
{
"epoch": 2.8056872037914693,
"grad_norm": 7.398103500346509,
"learning_rate": 1.282990454435362e-07,
"loss": 0.33731377124786377,
"step": 3552
},
{
"epoch": 2.806477093206951,
"grad_norm": 11.993082049967715,
"learning_rate": 1.2726671052428418e-07,
"loss": 0.3734150528907776,
"step": 3553
},
{
"epoch": 2.807266982622433,
"grad_norm": 13.661721906893305,
"learning_rate": 1.26238492106161e-07,
"loss": 0.2888742685317993,
"step": 3554
},
{
"epoch": 2.8080568720379144,
"grad_norm": 10.344916708225794,
"learning_rate": 1.2521439105780909e-07,
"loss": 0.3093745708465576,
"step": 3555
},
{
"epoch": 2.8088467614533963,
"grad_norm": 13.865074941166652,
"learning_rate": 1.2419440824439188e-07,
"loss": 0.4877317547798157,
"step": 3556
},
{
"epoch": 2.809636650868878,
"grad_norm": 10.788397713560993,
"learning_rate": 1.231785445275957e-07,
"loss": 0.4557979702949524,
"step": 3557
},
{
"epoch": 2.81042654028436,
"grad_norm": 10.285465020965095,
"learning_rate": 1.2216680076562347e-07,
"loss": 0.3419281840324402,
"step": 3558
},
{
"epoch": 2.811216429699842,
"grad_norm": 8.877423938913571,
"learning_rate": 1.2115917781320096e-07,
"loss": 0.4035925269126892,
"step": 3559
},
{
"epoch": 2.8120063191153237,
"grad_norm": 13.435791700322584,
"learning_rate": 1.2015567652157057e-07,
"loss": 0.6396586298942566,
"step": 3560
},
{
"epoch": 2.8127962085308056,
"grad_norm": 19.702185429939885,
"learning_rate": 1.1915629773849358e-07,
"loss": 1.082403540611267,
"step": 3561
},
{
"epoch": 2.8135860979462874,
"grad_norm": 11.822298832725835,
"learning_rate": 1.1816104230825021e-07,
"loss": 0.3863110840320587,
"step": 3562
},
{
"epoch": 2.8143759873617693,
"grad_norm": 8.295210827383162,
"learning_rate": 1.1716991107163289e-07,
"loss": 0.58207106590271,
"step": 3563
},
{
"epoch": 2.815165876777251,
"grad_norm": 14.2016386137118,
"learning_rate": 1.1618290486595463e-07,
"loss": 0.2098863422870636,
"step": 3564
},
{
"epoch": 2.815955766192733,
"grad_norm": 10.082988120031807,
"learning_rate": 1.1520002452503953e-07,
"loss": 0.27094680070877075,
"step": 3565
},
{
"epoch": 2.816745655608215,
"grad_norm": 5.875570359193218,
"learning_rate": 1.1422127087923007e-07,
"loss": 0.21748504042625427,
"step": 3566
},
{
"epoch": 2.8175355450236967,
"grad_norm": 12.438687688635737,
"learning_rate": 1.1324664475538038e-07,
"loss": 0.5272513031959534,
"step": 3567
},
{
"epoch": 2.8183254344391786,
"grad_norm": 10.889129102864676,
"learning_rate": 1.1227614697685629e-07,
"loss": 0.9930387139320374,
"step": 3568
},
{
"epoch": 2.8191153238546605,
"grad_norm": 12.382867902327071,
"learning_rate": 1.1130977836353862e-07,
"loss": 0.3840116858482361,
"step": 3569
},
{
"epoch": 2.8199052132701423,
"grad_norm": 9.33063650473277,
"learning_rate": 1.1034753973181877e-07,
"loss": 0.24336904287338257,
"step": 3570
},
{
"epoch": 2.820695102685624,
"grad_norm": 15.374029447353267,
"learning_rate": 1.0938943189459872e-07,
"loss": 0.3328930735588074,
"step": 3571
},
{
"epoch": 2.821484992101106,
"grad_norm": 10.569256308523215,
"learning_rate": 1.0843545566129154e-07,
"loss": 0.40318596363067627,
"step": 3572
},
{
"epoch": 2.822274881516588,
"grad_norm": 11.764490990287323,
"learning_rate": 1.074856118378198e-07,
"loss": 0.2967602014541626,
"step": 3573
},
{
"epoch": 2.8230647709320698,
"grad_norm": 11.711306728260894,
"learning_rate": 1.0653990122661273e-07,
"loss": 0.20409056544303894,
"step": 3574
},
{
"epoch": 2.8238546603475516,
"grad_norm": 8.980141255462513,
"learning_rate": 1.0559832462661234e-07,
"loss": 0.4850717782974243,
"step": 3575
},
{
"epoch": 2.824644549763033,
"grad_norm": 17.31606580117272,
"learning_rate": 1.0466088283326404e-07,
"loss": 0.6416319608688354,
"step": 3576
},
{
"epoch": 2.825434439178515,
"grad_norm": 15.344712637864934,
"learning_rate": 1.0372757663852262e-07,
"loss": 0.46508005261421204,
"step": 3577
},
{
"epoch": 2.8262243285939967,
"grad_norm": 8.57007968363552,
"learning_rate": 1.0279840683084741e-07,
"loss": 0.8428210616111755,
"step": 3578
},
{
"epoch": 2.8270142180094786,
"grad_norm": 8.550821162807685,
"learning_rate": 1.0187337419520493e-07,
"loss": 0.682414710521698,
"step": 3579
},
{
"epoch": 2.8278041074249605,
"grad_norm": 7.199168718842685,
"learning_rate": 1.0095247951306508e-07,
"loss": 0.25026553869247437,
"step": 3580
},
{
"epoch": 2.8285939968404423,
"grad_norm": 12.81831188890812,
"learning_rate": 1.0003572356240333e-07,
"loss": 0.933559238910675,
"step": 3581
},
{
"epoch": 2.829383886255924,
"grad_norm": 7.6333393554744156,
"learning_rate": 9.912310711769734e-08,
"loss": 0.26869896054267883,
"step": 3582
},
{
"epoch": 2.830173775671406,
"grad_norm": 9.048728044949662,
"learning_rate": 9.821463094992934e-08,
"loss": 0.44904714822769165,
"step": 3583
},
{
"epoch": 2.830963665086888,
"grad_norm": 10.975379290838536,
"learning_rate": 9.731029582658258e-08,
"loss": 0.7174665927886963,
"step": 3584
},
{
"epoch": 2.8317535545023698,
"grad_norm": 9.80556755933507,
"learning_rate": 9.641010251164263e-08,
"loss": 0.47703051567077637,
"step": 3585
},
{
"epoch": 2.8325434439178516,
"grad_norm": 9.546590438112691,
"learning_rate": 9.551405176559558e-08,
"loss": 0.32327979803085327,
"step": 3586
},
{
"epoch": 2.8333333333333335,
"grad_norm": 11.700829296565805,
"learning_rate": 9.46221443454276e-08,
"loss": 0.7538062334060669,
"step": 3587
},
{
"epoch": 2.834123222748815,
"grad_norm": 13.33437853882375,
"learning_rate": 9.37343810046254e-08,
"loss": 0.5941387414932251,
"step": 3588
},
{
"epoch": 2.8349131121642968,
"grad_norm": 5.894312748053277,
"learning_rate": 9.28507624931746e-08,
"loss": 0.14909735321998596,
"step": 3589
},
{
"epoch": 2.8357030015797786,
"grad_norm": 11.328914671419902,
"learning_rate": 9.197128955755919e-08,
"loss": 0.6000841856002808,
"step": 3590
},
{
"epoch": 2.8364928909952605,
"grad_norm": 14.987543569871976,
"learning_rate": 9.109596294075983e-08,
"loss": 0.4695100784301758,
"step": 3591
},
{
"epoch": 2.8372827804107423,
"grad_norm": 7.157405951875022,
"learning_rate": 9.022478338225616e-08,
"loss": 0.2676977217197418,
"step": 3592
},
{
"epoch": 2.838072669826224,
"grad_norm": 10.025275842504612,
"learning_rate": 8.935775161802274e-08,
"loss": 0.6638664603233337,
"step": 3593
},
{
"epoch": 2.838862559241706,
"grad_norm": 7.7262320503218005,
"learning_rate": 8.849486838053201e-08,
"loss": 0.2409912347793579,
"step": 3594
},
{
"epoch": 2.839652448657188,
"grad_norm": 12.117641612900385,
"learning_rate": 8.763613439875085e-08,
"loss": 0.4687439203262329,
"step": 3595
},
{
"epoch": 2.8404423380726698,
"grad_norm": 10.35658710588346,
"learning_rate": 8.67815503981384e-08,
"loss": 0.8544546961784363,
"step": 3596
},
{
"epoch": 2.8412322274881516,
"grad_norm": 11.638767331627365,
"learning_rate": 8.59311171006516e-08,
"loss": 0.8178413510322571,
"step": 3597
},
{
"epoch": 2.8420221169036335,
"grad_norm": 8.669410862880607,
"learning_rate": 8.508483522473909e-08,
"loss": 0.4358055293560028,
"step": 3598
},
{
"epoch": 2.8428120063191153,
"grad_norm": 15.987554618443884,
"learning_rate": 8.424270548534286e-08,
"loss": 0.5043225884437561,
"step": 3599
},
{
"epoch": 2.843601895734597,
"grad_norm": 33.576463764455724,
"learning_rate": 8.340472859389714e-08,
"loss": 0.38993752002716064,
"step": 3600
},
{
"epoch": 2.844391785150079,
"grad_norm": 12.91658376574621,
"learning_rate": 8.257090525832623e-08,
"loss": 0.8641099333763123,
"step": 3601
},
{
"epoch": 2.845181674565561,
"grad_norm": 7.742223493684328,
"learning_rate": 8.17412361830472e-08,
"loss": 0.36251041293144226,
"step": 3602
},
{
"epoch": 2.845971563981043,
"grad_norm": 9.525833528037221,
"learning_rate": 8.091572206896769e-08,
"loss": 0.4047582745552063,
"step": 3603
},
{
"epoch": 2.8467614533965246,
"grad_norm": 12.327238364324716,
"learning_rate": 8.009436361348543e-08,
"loss": 0.22032329440116882,
"step": 3604
},
{
"epoch": 2.8475513428120065,
"grad_norm": 8.279880501050302,
"learning_rate": 7.927716151048537e-08,
"loss": 0.4643440246582031,
"step": 3605
},
{
"epoch": 2.8483412322274884,
"grad_norm": 11.646960594230466,
"learning_rate": 7.846411645034302e-08,
"loss": 0.20467007160186768,
"step": 3606
},
{
"epoch": 2.84913112164297,
"grad_norm": 11.370802049937053,
"learning_rate": 7.765522911992229e-08,
"loss": 0.27269160747528076,
"step": 3607
},
{
"epoch": 2.849921011058452,
"grad_norm": 23.699981990614827,
"learning_rate": 7.685050020257267e-08,
"loss": 0.8710867762565613,
"step": 3608
},
{
"epoch": 2.8507109004739335,
"grad_norm": 13.69662146387405,
"learning_rate": 7.604993037813257e-08,
"loss": 0.7823088765144348,
"step": 3609
},
{
"epoch": 2.8515007898894154,
"grad_norm": 11.238494785694472,
"learning_rate": 7.525352032292599e-08,
"loss": 0.47667503356933594,
"step": 3610
},
{
"epoch": 2.852290679304897,
"grad_norm": 11.90408448356978,
"learning_rate": 7.44612707097625e-08,
"loss": 0.4056919515132904,
"step": 3611
},
{
"epoch": 2.853080568720379,
"grad_norm": 14.64818168207235,
"learning_rate": 7.367318220793673e-08,
"loss": 0.31270867586135864,
"step": 3612
},
{
"epoch": 2.853870458135861,
"grad_norm": 8.786247946279532,
"learning_rate": 7.288925548322945e-08,
"loss": 0.18290819227695465,
"step": 3613
},
{
"epoch": 2.854660347551343,
"grad_norm": 9.827541412175302,
"learning_rate": 7.210949119790423e-08,
"loss": 1.236111044883728,
"step": 3614
},
{
"epoch": 2.8554502369668247,
"grad_norm": 7.902486340561014,
"learning_rate": 7.133389001070801e-08,
"loss": 0.19697430729866028,
"step": 3615
},
{
"epoch": 2.8562401263823065,
"grad_norm": 10.36680879415055,
"learning_rate": 7.056245257687166e-08,
"loss": 0.4995325207710266,
"step": 3616
},
{
"epoch": 2.8570300157977884,
"grad_norm": 8.029144851923272,
"learning_rate": 6.979517954810777e-08,
"loss": 0.35892802476882935,
"step": 3617
},
{
"epoch": 2.8578199052132702,
"grad_norm": 10.443143901118459,
"learning_rate": 6.903207157261116e-08,
"loss": 0.6018155813217163,
"step": 3618
},
{
"epoch": 2.858609794628752,
"grad_norm": 9.972612916820504,
"learning_rate": 6.827312929505837e-08,
"loss": 0.3768244683742523,
"step": 3619
},
{
"epoch": 2.859399684044234,
"grad_norm": 9.967954749916855,
"learning_rate": 6.75183533566065e-08,
"loss": 0.6008504033088684,
"step": 3620
},
{
"epoch": 2.860189573459716,
"grad_norm": 11.826572647957775,
"learning_rate": 6.676774439489109e-08,
"loss": 0.9027575254440308,
"step": 3621
},
{
"epoch": 2.860979462875197,
"grad_norm": 7.566116639922751,
"learning_rate": 6.602130304403153e-08,
"loss": 0.3454285264015198,
"step": 3622
},
{
"epoch": 2.861769352290679,
"grad_norm": 11.334110441287553,
"learning_rate": 6.527902993462232e-08,
"loss": 0.3822672963142395,
"step": 3623
},
{
"epoch": 2.862559241706161,
"grad_norm": 30.42379993258823,
"learning_rate": 6.454092569373849e-08,
"loss": 1.1499581336975098,
"step": 3624
},
{
"epoch": 2.863349131121643,
"grad_norm": 12.189705870905884,
"learning_rate": 6.380699094493292e-08,
"loss": 0.28113341331481934,
"step": 3625
},
{
"epoch": 2.8641390205371247,
"grad_norm": 10.306143494702912,
"learning_rate": 6.307722630823631e-08,
"loss": 0.6603707671165466,
"step": 3626
},
{
"epoch": 2.8649289099526065,
"grad_norm": 12.679232897956698,
"learning_rate": 6.235163240015608e-08,
"loss": 0.6723718047142029,
"step": 3627
},
{
"epoch": 2.8657187993680884,
"grad_norm": 11.11612011580192,
"learning_rate": 6.163020983367685e-08,
"loss": 0.28444093465805054,
"step": 3628
},
{
"epoch": 2.8665086887835702,
"grad_norm": 10.621322926022057,
"learning_rate": 6.091295921825779e-08,
"loss": 0.2627624571323395,
"step": 3629
},
{
"epoch": 2.867298578199052,
"grad_norm": 9.584051079895877,
"learning_rate": 6.019988115983533e-08,
"loss": 0.6522977948188782,
"step": 3630
},
{
"epoch": 2.868088467614534,
"grad_norm": 10.104067057656607,
"learning_rate": 5.949097626081979e-08,
"loss": 0.806284487247467,
"step": 3631
},
{
"epoch": 2.868878357030016,
"grad_norm": 9.3150959160621,
"learning_rate": 5.878624512009712e-08,
"loss": 0.49238792061805725,
"step": 3632
},
{
"epoch": 2.8696682464454977,
"grad_norm": 11.064857677412675,
"learning_rate": 5.808568833302552e-08,
"loss": 0.3209676146507263,
"step": 3633
},
{
"epoch": 2.8704581358609795,
"grad_norm": 9.937366426134174,
"learning_rate": 5.73893064914377e-08,
"loss": 0.5651168823242188,
"step": 3634
},
{
"epoch": 2.8712480252764614,
"grad_norm": 9.43706524762976,
"learning_rate": 5.669710018364028e-08,
"loss": 0.25006648898124695,
"step": 3635
},
{
"epoch": 2.8720379146919433,
"grad_norm": 8.086894664969703,
"learning_rate": 5.600906999441047e-08,
"loss": 0.289908766746521,
"step": 3636
},
{
"epoch": 2.872827804107425,
"grad_norm": 11.049864934040414,
"learning_rate": 5.5325216504999445e-08,
"loss": 0.2839067578315735,
"step": 3637
},
{
"epoch": 2.873617693522907,
"grad_norm": 11.326901492566288,
"learning_rate": 5.464554029312785e-08,
"loss": 0.4851897358894348,
"step": 3638
},
{
"epoch": 2.874407582938389,
"grad_norm": 9.870517653518927,
"learning_rate": 5.39700419329886e-08,
"loss": 0.3466881215572357,
"step": 3639
},
{
"epoch": 2.8751974723538707,
"grad_norm": 9.471904879535721,
"learning_rate": 5.329872199524577e-08,
"loss": 0.41402703523635864,
"step": 3640
},
{
"epoch": 2.8759873617693525,
"grad_norm": 11.444565675892793,
"learning_rate": 5.263158104703125e-08,
"loss": 0.7139902710914612,
"step": 3641
},
{
"epoch": 2.876777251184834,
"grad_norm": 9.863961110827145,
"learning_rate": 5.196861965194922e-08,
"loss": 0.4099463224411011,
"step": 3642
},
{
"epoch": 2.877567140600316,
"grad_norm": 14.395860031992335,
"learning_rate": 5.1309838370071086e-08,
"loss": 0.635336697101593,
"step": 3643
},
{
"epoch": 2.8783570300157977,
"grad_norm": 11.549260196243356,
"learning_rate": 5.0655237757937236e-08,
"loss": 0.5518008470535278,
"step": 3644
},
{
"epoch": 2.8791469194312795,
"grad_norm": 16.914171209055556,
"learning_rate": 5.000481836855697e-08,
"loss": 0.5935429930686951,
"step": 3645
},
{
"epoch": 2.8799368088467614,
"grad_norm": 10.519627347888093,
"learning_rate": 4.935858075140631e-08,
"loss": 0.2353929877281189,
"step": 3646
},
{
"epoch": 2.8807266982622433,
"grad_norm": 10.147294910154294,
"learning_rate": 4.8716525452428556e-08,
"loss": 0.5117212533950806,
"step": 3647
},
{
"epoch": 2.881516587677725,
"grad_norm": 7.79219575426103,
"learning_rate": 4.807865301403536e-08,
"loss": 0.29170703887939453,
"step": 3648
},
{
"epoch": 2.882306477093207,
"grad_norm": 9.052236405274959,
"learning_rate": 4.744496397510234e-08,
"loss": 0.34299367666244507,
"step": 3649
},
{
"epoch": 2.883096366508689,
"grad_norm": 12.457687363908496,
"learning_rate": 4.681545887097239e-08,
"loss": 0.33234238624572754,
"step": 3650
},
{
"epoch": 2.8838862559241707,
"grad_norm": 13.05592857803276,
"learning_rate": 4.6190138233453976e-08,
"loss": 0.27858805656433105,
"step": 3651
},
{
"epoch": 2.8846761453396526,
"grad_norm": 10.916182057073014,
"learning_rate": 4.556900259081898e-08,
"loss": 0.6553702354431152,
"step": 3652
},
{
"epoch": 2.8854660347551344,
"grad_norm": 9.39192402230028,
"learning_rate": 4.495205246780543e-08,
"loss": 0.5300555229187012,
"step": 3653
},
{
"epoch": 2.8862559241706163,
"grad_norm": 5.7960707776723295,
"learning_rate": 4.43392883856153e-08,
"loss": 0.24335479736328125,
"step": 3654
},
{
"epoch": 2.8870458135860977,
"grad_norm": 7.039387389488856,
"learning_rate": 4.373071086191338e-08,
"loss": 0.3162982761859894,
"step": 3655
},
{
"epoch": 2.8878357030015795,
"grad_norm": 10.4351163530241,
"learning_rate": 4.312632041082787e-08,
"loss": 0.36064600944519043,
"step": 3656
},
{
"epoch": 2.8886255924170614,
"grad_norm": 11.795988539884263,
"learning_rate": 4.2526117542949774e-08,
"loss": 0.343291312456131,
"step": 3657
},
{
"epoch": 2.8894154818325433,
"grad_norm": 10.393934212831047,
"learning_rate": 4.193010276533183e-08,
"loss": 0.6907198429107666,
"step": 3658
},
{
"epoch": 2.890205371248025,
"grad_norm": 9.56184934798155,
"learning_rate": 4.13382765814907e-08,
"loss": 0.7030273675918579,
"step": 3659
},
{
"epoch": 2.890995260663507,
"grad_norm": 7.063388291601224,
"learning_rate": 4.075063949140201e-08,
"loss": 0.17387212812900543,
"step": 3660
},
{
"epoch": 2.891785150078989,
"grad_norm": 8.661731775267791,
"learning_rate": 4.016719199150365e-08,
"loss": 0.5848299264907837,
"step": 3661
},
{
"epoch": 2.8925750394944707,
"grad_norm": 16.72243976052531,
"learning_rate": 3.958793457469412e-08,
"loss": 0.4991316795349121,
"step": 3662
},
{
"epoch": 2.8933649289099526,
"grad_norm": 14.820335568585508,
"learning_rate": 3.901286773033253e-08,
"loss": 0.30617228150367737,
"step": 3663
},
{
"epoch": 2.8941548183254344,
"grad_norm": 13.043213388040584,
"learning_rate": 3.8441991944236365e-08,
"loss": 0.35002079606056213,
"step": 3664
},
{
"epoch": 2.8949447077409163,
"grad_norm": 7.380501973738135,
"learning_rate": 3.787530769868431e-08,
"loss": 0.14187008142471313,
"step": 3665
},
{
"epoch": 2.895734597156398,
"grad_norm": 9.224074386705713,
"learning_rate": 3.731281547241228e-08,
"loss": 0.1989508867263794,
"step": 3666
},
{
"epoch": 2.89652448657188,
"grad_norm": 10.063244268865743,
"learning_rate": 3.675451574061517e-08,
"loss": 0.33559074997901917,
"step": 3667
},
{
"epoch": 2.897314375987362,
"grad_norm": 17.367117559815597,
"learning_rate": 3.620040897494737e-08,
"loss": 0.7757275104522705,
"step": 3668
},
{
"epoch": 2.8981042654028437,
"grad_norm": 14.80922413991561,
"learning_rate": 3.565049564351997e-08,
"loss": 0.5378328561782837,
"step": 3669
},
{
"epoch": 2.8988941548183256,
"grad_norm": 11.674232729531763,
"learning_rate": 3.510477621090192e-08,
"loss": 0.5047122240066528,
"step": 3670
},
{
"epoch": 2.8996840442338074,
"grad_norm": 9.61930347404772,
"learning_rate": 3.456325113811776e-08,
"loss": 0.22347621619701385,
"step": 3671
},
{
"epoch": 2.9004739336492893,
"grad_norm": 12.428464114281123,
"learning_rate": 3.4025920882649886e-08,
"loss": 0.2939353585243225,
"step": 3672
},
{
"epoch": 2.901263823064771,
"grad_norm": 11.787785169855981,
"learning_rate": 3.3492785898437407e-08,
"loss": 0.6785522699356079,
"step": 3673
},
{
"epoch": 2.902053712480253,
"grad_norm": 10.424690477560073,
"learning_rate": 3.296384663587338e-08,
"loss": 0.9920533895492554,
"step": 3674
},
{
"epoch": 2.902843601895735,
"grad_norm": 9.203768626957741,
"learning_rate": 3.243910354180868e-08,
"loss": 0.6544739007949829,
"step": 3675
},
{
"epoch": 2.9036334913112163,
"grad_norm": 8.198475817478249,
"learning_rate": 3.1918557059547605e-08,
"loss": 0.2281288057565689,
"step": 3676
},
{
"epoch": 2.904423380726698,
"grad_norm": 10.800632093569277,
"learning_rate": 3.1402207628848935e-08,
"loss": 0.2759685218334198,
"step": 3677
},
{
"epoch": 2.90521327014218,
"grad_norm": 11.368103163964738,
"learning_rate": 3.089005568592707e-08,
"loss": 0.4425602853298187,
"step": 3678
},
{
"epoch": 2.906003159557662,
"grad_norm": 15.676906486716955,
"learning_rate": 3.038210166344924e-08,
"loss": 0.9129424691200256,
"step": 3679
},
{
"epoch": 2.9067930489731437,
"grad_norm": 7.319860581451804,
"learning_rate": 2.9878345990536626e-08,
"loss": 0.5854052305221558,
"step": 3680
},
{
"epoch": 2.9075829383886256,
"grad_norm": 9.087306671915462,
"learning_rate": 2.9378789092763816e-08,
"loss": 0.2461828887462616,
"step": 3681
},
{
"epoch": 2.9083728278041074,
"grad_norm": 9.89538463931709,
"learning_rate": 2.8883431392158768e-08,
"loss": 0.6880824565887451,
"step": 3682
},
{
"epoch": 2.9091627172195893,
"grad_norm": 12.748919800585687,
"learning_rate": 2.8392273307200068e-08,
"loss": 0.9422566294670105,
"step": 3683
},
{
"epoch": 2.909952606635071,
"grad_norm": 8.421188816814379,
"learning_rate": 2.7905315252820808e-08,
"loss": 0.4385561943054199,
"step": 3684
},
{
"epoch": 2.910742496050553,
"grad_norm": 14.078145544877923,
"learning_rate": 2.7422557640404135e-08,
"loss": 0.5254925489425659,
"step": 3685
},
{
"epoch": 2.911532385466035,
"grad_norm": 9.082888238767863,
"learning_rate": 2.6944000877784925e-08,
"loss": 0.5949431657791138,
"step": 3686
},
{
"epoch": 2.9123222748815167,
"grad_norm": 13.607392988749478,
"learning_rate": 2.646964536925034e-08,
"loss": 0.691329300403595,
"step": 3687
},
{
"epoch": 2.913112164296998,
"grad_norm": 10.530634100048369,
"learning_rate": 2.59994915155376e-08,
"loss": 0.4718400239944458,
"step": 3688
},
{
"epoch": 2.91390205371248,
"grad_norm": 10.59452097306126,
"learning_rate": 2.5533539713832878e-08,
"loss": 0.33786359429359436,
"step": 3689
},
{
"epoch": 2.914691943127962,
"grad_norm": 11.039511779455802,
"learning_rate": 2.5071790357775183e-08,
"loss": 0.8592206239700317,
"step": 3690
},
{
"epoch": 2.9154818325434437,
"grad_norm": 11.096868713143294,
"learning_rate": 2.461424383745137e-08,
"loss": 0.5247258543968201,
"step": 3691
},
{
"epoch": 2.9162717219589256,
"grad_norm": 9.7943810808923,
"learning_rate": 2.4160900539397792e-08,
"loss": 0.4834524393081665,
"step": 3692
},
{
"epoch": 2.9170616113744074,
"grad_norm": 14.220945945722814,
"learning_rate": 2.3711760846601427e-08,
"loss": 0.49882930517196655,
"step": 3693
},
{
"epoch": 2.9178515007898893,
"grad_norm": 9.444978699118497,
"learning_rate": 2.326682513849654e-08,
"loss": 0.4514763653278351,
"step": 3694
},
{
"epoch": 2.918641390205371,
"grad_norm": 16.735071211993855,
"learning_rate": 2.282609379096523e-08,
"loss": 0.413520872592926,
"step": 3695
},
{
"epoch": 2.919431279620853,
"grad_norm": 10.18649650301645,
"learning_rate": 2.238956717634022e-08,
"loss": 0.3149523138999939,
"step": 3696
},
{
"epoch": 2.920221169036335,
"grad_norm": 7.242383152098311,
"learning_rate": 2.1957245663399852e-08,
"loss": 0.42863062024116516,
"step": 3697
},
{
"epoch": 2.9210110584518167,
"grad_norm": 12.249847711419223,
"learning_rate": 2.1529129617371414e-08,
"loss": 0.3425447344779968,
"step": 3698
},
{
"epoch": 2.9218009478672986,
"grad_norm": 21.564346952177747,
"learning_rate": 2.1105219399927823e-08,
"loss": 0.6913363933563232,
"step": 3699
},
{
"epoch": 2.9225908372827805,
"grad_norm": 14.247309861203046,
"learning_rate": 2.068551536919039e-08,
"loss": 0.5754636526107788,
"step": 3700
},
{
"epoch": 2.9233807266982623,
"grad_norm": 14.729656431110298,
"learning_rate": 2.02700178797266e-08,
"loss": 0.1754104197025299,
"step": 3701
},
{
"epoch": 2.924170616113744,
"grad_norm": 10.378294101333756,
"learning_rate": 1.9858727282549563e-08,
"loss": 0.5599175095558167,
"step": 3702
},
{
"epoch": 2.924960505529226,
"grad_norm": 7.713686434655625,
"learning_rate": 1.9451643925119667e-08,
"loss": 0.429149329662323,
"step": 3703
},
{
"epoch": 2.925750394944708,
"grad_norm": 18.397053591718155,
"learning_rate": 1.9048768151341825e-08,
"loss": 1.2451764345169067,
"step": 3704
},
{
"epoch": 2.9265402843601898,
"grad_norm": 11.278121609485279,
"learning_rate": 1.8650100301566553e-08,
"loss": 0.2728927433490753,
"step": 3705
},
{
"epoch": 2.9273301737756716,
"grad_norm": 12.412783868000135,
"learning_rate": 1.8255640712590004e-08,
"loss": 0.39738449454307556,
"step": 3706
},
{
"epoch": 2.9281200631911535,
"grad_norm": 14.573969152453861,
"learning_rate": 1.7865389717653392e-08,
"loss": 0.4282408356666565,
"step": 3707
},
{
"epoch": 2.9289099526066353,
"grad_norm": 12.27556396872362,
"learning_rate": 1.7479347646441323e-08,
"loss": 0.573512852191925,
"step": 3708
},
{
"epoch": 2.9296998420221168,
"grad_norm": 13.78449081143613,
"learning_rate": 1.709751482508404e-08,
"loss": 0.4519804120063782,
"step": 3709
},
{
"epoch": 2.9304897314375986,
"grad_norm": 8.515540650947582,
"learning_rate": 1.6719891576154612e-08,
"loss": 0.6025781035423279,
"step": 3710
},
{
"epoch": 2.9312796208530805,
"grad_norm": 9.597411743199306,
"learning_rate": 1.634647821867119e-08,
"loss": 0.15691038966178894,
"step": 3711
},
{
"epoch": 2.9320695102685623,
"grad_norm": 10.486595198764013,
"learning_rate": 1.5977275068093657e-08,
"loss": 0.22191372513771057,
"step": 3712
},
{
"epoch": 2.932859399684044,
"grad_norm": 11.847663895398458,
"learning_rate": 1.56122824363264e-08,
"loss": 0.5614909529685974,
"step": 3713
},
{
"epoch": 2.933649289099526,
"grad_norm": 13.508354632592122,
"learning_rate": 1.52515006317161e-08,
"loss": 0.42124590277671814,
"step": 3714
},
{
"epoch": 2.934439178515008,
"grad_norm": 12.085187248497775,
"learning_rate": 1.4894929959053395e-08,
"loss": 0.39891332387924194,
"step": 3715
},
{
"epoch": 2.9352290679304898,
"grad_norm": 11.772324248757105,
"learning_rate": 1.4542570719569549e-08,
"loss": 0.743476152420044,
"step": 3716
},
{
"epoch": 2.9360189573459716,
"grad_norm": 13.824383157557017,
"learning_rate": 1.4194423210939223e-08,
"loss": 0.7813572883605957,
"step": 3717
},
{
"epoch": 2.9368088467614535,
"grad_norm": 12.023055278816088,
"learning_rate": 1.3850487727278261e-08,
"loss": 0.18068841099739075,
"step": 3718
},
{
"epoch": 2.9375987361769353,
"grad_norm": 9.004344869826209,
"learning_rate": 1.351076455914424e-08,
"loss": 0.35265034437179565,
"step": 3719
},
{
"epoch": 2.938388625592417,
"grad_norm": 6.411410825512506,
"learning_rate": 1.3175253993537584e-08,
"loss": 0.1986955851316452,
"step": 3720
},
{
"epoch": 2.9391785150078986,
"grad_norm": 15.542536754284288,
"learning_rate": 1.284395631389712e-08,
"loss": 1.2308954000473022,
"step": 3721
},
{
"epoch": 2.9399684044233805,
"grad_norm": 11.589586159826656,
"learning_rate": 1.251687180010508e-08,
"loss": 0.3059263229370117,
"step": 3722
},
{
"epoch": 2.9407582938388623,
"grad_norm": 11.848666416715453,
"learning_rate": 1.2194000728483758e-08,
"loss": 0.381493479013443,
"step": 3723
},
{
"epoch": 2.941548183254344,
"grad_norm": 9.762685828075522,
"learning_rate": 1.1875343371795522e-08,
"loss": 0.3416656255722046,
"step": 3724
},
{
"epoch": 2.942338072669826,
"grad_norm": 13.146351843743089,
"learning_rate": 1.1560899999242813e-08,
"loss": 0.24065348505973816,
"step": 3725
},
{
"epoch": 2.943127962085308,
"grad_norm": 8.753449681389933,
"learning_rate": 1.1250670876468694e-08,
"loss": 0.5435752868652344,
"step": 3726
},
{
"epoch": 2.9439178515007898,
"grad_norm": 9.571620847460116,
"learning_rate": 1.0944656265555186e-08,
"loss": 0.5217230319976807,
"step": 3727
},
{
"epoch": 2.9447077409162716,
"grad_norm": 13.736435986610593,
"learning_rate": 1.0642856425025494e-08,
"loss": 0.30995261669158936,
"step": 3728
},
{
"epoch": 2.9454976303317535,
"grad_norm": 11.819774523695497,
"learning_rate": 1.0345271609840668e-08,
"loss": 0.27620571851730347,
"step": 3729
},
{
"epoch": 2.9462875197472354,
"grad_norm": 15.156276372780589,
"learning_rate": 1.0051902071401276e-08,
"loss": 0.6031606793403625,
"step": 3730
},
{
"epoch": 2.947077409162717,
"grad_norm": 12.80904870667671,
"learning_rate": 9.762748057546845e-09,
"loss": 0.4767545461654663,
"step": 3731
},
{
"epoch": 2.947867298578199,
"grad_norm": 9.951774811263586,
"learning_rate": 9.477809812555306e-09,
"loss": 0.6249617338180542,
"step": 3732
},
{
"epoch": 2.948657187993681,
"grad_norm": 8.066686549216158,
"learning_rate": 9.197087577144104e-09,
"loss": 0.2534811198711395,
"step": 3733
},
{
"epoch": 2.949447077409163,
"grad_norm": 14.503347622672706,
"learning_rate": 8.92058158846798e-09,
"loss": 0.259134441614151,
"step": 3734
},
{
"epoch": 2.9502369668246446,
"grad_norm": 12.45013867650853,
"learning_rate": 8.648292080119524e-09,
"loss": 0.7195329070091248,
"step": 3735
},
{
"epoch": 2.9510268562401265,
"grad_norm": 14.526124154717072,
"learning_rate": 8.38021928213084e-09,
"loss": 0.5841654539108276,
"step": 3736
},
{
"epoch": 2.9518167456556084,
"grad_norm": 9.850968529140541,
"learning_rate": 8.11636342097022e-09,
"loss": 0.36491304636001587,
"step": 3737
},
{
"epoch": 2.9526066350710902,
"grad_norm": 10.07563348526924,
"learning_rate": 7.856724719544351e-09,
"loss": 0.4188252091407776,
"step": 3738
},
{
"epoch": 2.953396524486572,
"grad_norm": 22.212639293967655,
"learning_rate": 7.601303397196113e-09,
"loss": 0.3442292809486389,
"step": 3739
},
{
"epoch": 2.954186413902054,
"grad_norm": 14.029241381991797,
"learning_rate": 7.350099669706235e-09,
"loss": 0.21167263388633728,
"step": 3740
},
{
"epoch": 2.954976303317536,
"grad_norm": 12.136596175552866,
"learning_rate": 7.103113749293844e-09,
"loss": 0.6398590207099915,
"step": 3741
},
{
"epoch": 2.955766192733017,
"grad_norm": 9.741023971909382,
"learning_rate": 6.860345844612038e-09,
"loss": 0.5108597278594971,
"step": 3742
},
{
"epoch": 2.956556082148499,
"grad_norm": 7.655686126361488,
"learning_rate": 6.621796160752314e-09,
"loss": 0.36386775970458984,
"step": 3743
},
{
"epoch": 2.957345971563981,
"grad_norm": 10.018875222198334,
"learning_rate": 6.387464899242357e-09,
"loss": 0.30166739225387573,
"step": 3744
},
{
"epoch": 2.958135860979463,
"grad_norm": 9.095182318951428,
"learning_rate": 6.1573522580460346e-09,
"loss": 0.45996037125587463,
"step": 3745
},
{
"epoch": 2.9589257503949447,
"grad_norm": 10.26232984567016,
"learning_rate": 5.931458431562842e-09,
"loss": 0.5725584626197815,
"step": 3746
},
{
"epoch": 2.9597156398104265,
"grad_norm": 8.756926533099401,
"learning_rate": 5.709783610629571e-09,
"loss": 0.29128578305244446,
"step": 3747
},
{
"epoch": 2.9605055292259084,
"grad_norm": 11.68804184137246,
"learning_rate": 5.492327982516976e-09,
"loss": 0.5195301175117493,
"step": 3748
},
{
"epoch": 2.9612954186413902,
"grad_norm": 10.545825985768683,
"learning_rate": 5.2790917309325505e-09,
"loss": 0.5382466912269592,
"step": 3749
},
{
"epoch": 2.962085308056872,
"grad_norm": 11.350603837614045,
"learning_rate": 5.070075036018863e-09,
"loss": 0.43567579984664917,
"step": 3750
},
{
"epoch": 2.962875197472354,
"grad_norm": 13.409834921169738,
"learning_rate": 4.865278074354107e-09,
"loss": 0.4768742322921753,
"step": 3751
},
{
"epoch": 2.963665086887836,
"grad_norm": 14.469370990431916,
"learning_rate": 4.664701018951001e-09,
"loss": 0.48725759983062744,
"step": 3752
},
{
"epoch": 2.9644549763033177,
"grad_norm": 10.832912501886673,
"learning_rate": 4.468344039258443e-09,
"loss": 0.2995648980140686,
"step": 3753
},
{
"epoch": 2.9652448657187995,
"grad_norm": 17.54919278880869,
"learning_rate": 4.2762073011592966e-09,
"loss": 0.6087955832481384,
"step": 3754
},
{
"epoch": 2.966034755134281,
"grad_norm": 10.31999554844462,
"learning_rate": 4.088290966970943e-09,
"loss": 0.8731129169464111,
"step": 3755
},
{
"epoch": 2.966824644549763,
"grad_norm": 10.898544647919328,
"learning_rate": 3.904595195445837e-09,
"loss": 0.7485541105270386,
"step": 3756
},
{
"epoch": 2.9676145339652447,
"grad_norm": 13.203065381428397,
"learning_rate": 3.7251201417703996e-09,
"loss": 0.3719606399536133,
"step": 3757
},
{
"epoch": 2.9684044233807265,
"grad_norm": 8.052328293519382,
"learning_rate": 3.5498659575666783e-09,
"loss": 0.2648746371269226,
"step": 3758
},
{
"epoch": 2.9691943127962084,
"grad_norm": 12.815000426949975,
"learning_rate": 3.3788327908890196e-09,
"loss": 0.3461490869522095,
"step": 3759
},
{
"epoch": 2.9699842022116902,
"grad_norm": 7.421454114527485,
"learning_rate": 3.212020786227399e-09,
"loss": 0.3270190358161926,
"step": 3760
},
{
"epoch": 2.970774091627172,
"grad_norm": 10.632206347492682,
"learning_rate": 3.0494300845052006e-09,
"loss": 0.48663121461868286,
"step": 3761
},
{
"epoch": 2.971563981042654,
"grad_norm": 9.052624248155158,
"learning_rate": 2.8910608230786617e-09,
"loss": 0.3653707206249237,
"step": 3762
},
{
"epoch": 2.972353870458136,
"grad_norm": 21.507879887876314,
"learning_rate": 2.7369131357402045e-09,
"loss": 0.6693405508995056,
"step": 3763
},
{
"epoch": 2.9731437598736177,
"grad_norm": 11.871106908449853,
"learning_rate": 2.586987152712883e-09,
"loss": 0.3541383445262909,
"step": 3764
},
{
"epoch": 2.9739336492890995,
"grad_norm": 9.347340281030029,
"learning_rate": 2.441283000655381e-09,
"loss": 0.2928946614265442,
"step": 3765
},
{
"epoch": 2.9747235387045814,
"grad_norm": 14.20794932298081,
"learning_rate": 2.2998008026592356e-09,
"loss": 0.8095124363899231,
"step": 3766
},
{
"epoch": 2.9755134281200633,
"grad_norm": 19.781359673109964,
"learning_rate": 2.1625406782482817e-09,
"loss": 0.9541232585906982,
"step": 3767
},
{
"epoch": 2.976303317535545,
"grad_norm": 12.307332036811797,
"learning_rate": 2.0295027433808736e-09,
"loss": 1.270265817642212,
"step": 3768
},
{
"epoch": 2.977093206951027,
"grad_norm": 9.268483761306474,
"learning_rate": 1.9006871104482183e-09,
"loss": 0.3142393231391907,
"step": 3769
},
{
"epoch": 2.977883096366509,
"grad_norm": 12.935271250550574,
"learning_rate": 1.776093888273267e-09,
"loss": 2.3558170795440674,
"step": 3770
},
{
"epoch": 2.9786729857819907,
"grad_norm": 14.14628000993969,
"learning_rate": 1.655723182113489e-09,
"loss": 0.3965626657009125,
"step": 3771
},
{
"epoch": 2.9794628751974725,
"grad_norm": 10.694643173228462,
"learning_rate": 1.5395750936580967e-09,
"loss": 0.3927484452724457,
"step": 3772
},
{
"epoch": 2.9802527646129544,
"grad_norm": 15.868269189008021,
"learning_rate": 1.4276497210297114e-09,
"loss": 0.2537468671798706,
"step": 3773
},
{
"epoch": 2.9810426540284363,
"grad_norm": 13.865339454139647,
"learning_rate": 1.3199471587832524e-09,
"loss": 0.46213477849960327,
"step": 3774
},
{
"epoch": 2.981832543443918,
"grad_norm": 12.317620064332576,
"learning_rate": 1.2164674979059378e-09,
"loss": 0.6187537312507629,
"step": 3775
},
{
"epoch": 2.9826224328593995,
"grad_norm": 12.70811742821486,
"learning_rate": 1.1172108258178382e-09,
"loss": 0.9330609440803528,
"step": 3776
},
{
"epoch": 2.9834123222748814,
"grad_norm": 12.521602937647542,
"learning_rate": 1.0221772263707686e-09,
"loss": 0.48245567083358765,
"step": 3777
},
{
"epoch": 2.9842022116903633,
"grad_norm": 13.143173870731685,
"learning_rate": 9.313667798505065e-10,
"loss": 0.4393605887889862,
"step": 3778
},
{
"epoch": 2.984992101105845,
"grad_norm": 7.047235748717833,
"learning_rate": 8.447795629734634e-10,
"loss": 0.32978931069374084,
"step": 3779
},
{
"epoch": 2.985781990521327,
"grad_norm": 8.024033854946516,
"learning_rate": 7.624156488883483e-10,
"loss": 0.41405847668647766,
"step": 3780
},
{
"epoch": 2.986571879936809,
"grad_norm": 22.169614392633356,
"learning_rate": 6.842751071772791e-10,
"loss": 0.6391655206680298,
"step": 3781
},
{
"epoch": 2.9873617693522907,
"grad_norm": 12.141908595978165,
"learning_rate": 6.103580038530066e-10,
"loss": 0.5002020001411438,
"step": 3782
},
{
"epoch": 2.9881516587677726,
"grad_norm": 11.596188780257132,
"learning_rate": 5.406644013616902e-10,
"loss": 0.745347261428833,
"step": 3783
},
{
"epoch": 2.9889415481832544,
"grad_norm": 16.660637478949464,
"learning_rate": 4.751943585801222e-10,
"loss": 0.6414915919303894,
"step": 3784
},
{
"epoch": 2.9897314375987363,
"grad_norm": 10.793044072751652,
"learning_rate": 4.1394793081739324e-10,
"loss": 0.5143662691116333,
"step": 3785
},
{
"epoch": 2.990521327014218,
"grad_norm": 10.877705766316154,
"learning_rate": 3.5692516981544744e-10,
"loss": 0.35477590560913086,
"step": 3786
},
{
"epoch": 2.9913112164297,
"grad_norm": 10.463002285868905,
"learning_rate": 3.041261237463067e-10,
"loss": 0.5146927833557129,
"step": 3787
},
{
"epoch": 2.9921011058451814,
"grad_norm": 12.683003185650735,
"learning_rate": 2.5555083721595654e-10,
"loss": 0.7653088569641113,
"step": 3788
},
{
"epoch": 2.9928909952606633,
"grad_norm": 11.791360620237418,
"learning_rate": 2.1119935125990532e-10,
"loss": 0.3955351710319519,
"step": 3789
},
{
"epoch": 2.993680884676145,
"grad_norm": 18.909176585649405,
"learning_rate": 1.710717033470699e-10,
"loss": 0.6973004341125488,
"step": 3790
},
{
"epoch": 2.994470774091627,
"grad_norm": 10.889260209523764,
"learning_rate": 1.3516792737700014e-10,
"loss": 0.3600936233997345,
"step": 3791
},
{
"epoch": 2.995260663507109,
"grad_norm": 8.488379905621288,
"learning_rate": 1.0348805368209924e-10,
"loss": 0.3776974678039551,
"step": 3792
},
{
"epoch": 2.9960505529225907,
"grad_norm": 7.3688565178666146,
"learning_rate": 7.603210902484837e-11,
"loss": 0.2519044876098633,
"step": 3793
},
{
"epoch": 2.9968404423380726,
"grad_norm": 9.212568490304637,
"learning_rate": 5.280011660002693e-11,
"loss": 0.3771563470363617,
"step": 3794
},
{
"epoch": 2.9976303317535544,
"grad_norm": 10.294914027544094,
"learning_rate": 3.3792096034712675e-11,
"loss": 0.35696250200271606,
"step": 3795
},
{
"epoch": 2.9984202211690363,
"grad_norm": 6.820884906019979,
"learning_rate": 1.9008063386616315e-11,
"loss": 0.24771413207054138,
"step": 3796
},
{
"epoch": 2.999210110584518,
"grad_norm": 11.182619500206162,
"learning_rate": 8.448031145746883e-12,
"loss": 0.6118890047073364,
"step": 3797
},
{
"epoch": 3.0,
"grad_norm": 11.835933309637019,
"learning_rate": 2.1120082327463766e-12,
"loss": 0.2823507785797119,
"step": 3798
},
{
"epoch": 3.0,
"step": 3798,
"total_flos": 10133269585920.0,
"train_loss": 1.6510612177336417,
"train_runtime": 4328.0202,
"train_samples_per_second": 3.51,
"train_steps_per_second": 0.878
}
],
"logging_steps": 1,
"max_steps": 3798,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 10133269585920.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}