{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3798, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007898894154818325, "grad_norm": 40.71173454733523, "learning_rate": 0.0, "loss": 3.673168182373047, "step": 1 }, { "epoch": 0.001579778830963665, "grad_norm": 33.74250956215495, "learning_rate": 2.631578947368421e-08, "loss": 4.129596710205078, "step": 2 }, { "epoch": 0.002369668246445498, "grad_norm": 36.11546455389956, "learning_rate": 5.263157894736842e-08, "loss": 4.184542655944824, "step": 3 }, { "epoch": 0.00315955766192733, "grad_norm": 35.383571366831234, "learning_rate": 7.894736842105264e-08, "loss": 4.1142401695251465, "step": 4 }, { "epoch": 0.0039494470774091624, "grad_norm": 34.37559125944977, "learning_rate": 1.0526315789473685e-07, "loss": 3.7817375659942627, "step": 5 }, { "epoch": 0.004739336492890996, "grad_norm": 36.14685215620937, "learning_rate": 1.3157894736842107e-07, "loss": 4.114397048950195, "step": 6 }, { "epoch": 0.005529225908372828, "grad_norm": 41.482542925342734, "learning_rate": 1.5789473684210527e-07, "loss": 4.3803019523620605, "step": 7 }, { "epoch": 0.00631911532385466, "grad_norm": 33.58108993777, "learning_rate": 1.8421052631578948e-07, "loss": 4.10243558883667, "step": 8 }, { "epoch": 0.0071090047393364926, "grad_norm": 39.33271362667731, "learning_rate": 2.105263157894737e-07, "loss": 3.9596402645111084, "step": 9 }, { "epoch": 0.007898894154818325, "grad_norm": 50.03237601031588, "learning_rate": 2.3684210526315792e-07, "loss": 4.745785713195801, "step": 10 }, { "epoch": 0.008688783570300158, "grad_norm": 35.67111148226347, "learning_rate": 2.6315789473684213e-07, "loss": 3.8755834102630615, "step": 11 }, { "epoch": 0.009478672985781991, "grad_norm": 32.70142008970876, "learning_rate": 2.894736842105263e-07, "loss": 4.783450603485107, "step": 12 }, { "epoch": 0.010268562401263823, "grad_norm": 52.049082104276714, "learning_rate": 3.1578947368421055e-07, "loss": 4.5102057456970215, "step": 13 }, { "epoch": 0.011058451816745656, "grad_norm": 29.064967370756015, "learning_rate": 3.421052631578948e-07, "loss": 3.072397470474243, "step": 14 }, { "epoch": 0.011848341232227487, "grad_norm": 22.10944679220599, "learning_rate": 3.6842105263157896e-07, "loss": 2.7109313011169434, "step": 15 }, { "epoch": 0.01263823064770932, "grad_norm": 40.53408902245728, "learning_rate": 3.9473684210526315e-07, "loss": 4.520854949951172, "step": 16 }, { "epoch": 0.013428120063191154, "grad_norm": 36.60530673657224, "learning_rate": 4.210526315789474e-07, "loss": 4.224055290222168, "step": 17 }, { "epoch": 0.014218009478672985, "grad_norm": 33.336265071789754, "learning_rate": 4.473684210526316e-07, "loss": 4.239147186279297, "step": 18 }, { "epoch": 0.015007898894154818, "grad_norm": 43.94432106792913, "learning_rate": 4.7368421052631585e-07, "loss": 4.043759346008301, "step": 19 }, { "epoch": 0.01579778830963665, "grad_norm": 33.99185039525682, "learning_rate": 5.000000000000001e-07, "loss": 4.236106872558594, "step": 20 }, { "epoch": 0.016587677725118485, "grad_norm": 38.29448839626639, "learning_rate": 5.263157894736843e-07, "loss": 4.180179595947266, "step": 21 }, { "epoch": 0.017377567140600316, "grad_norm": 30.15666884397043, "learning_rate": 5.526315789473684e-07, "loss": 3.893503189086914, "step": 22 }, { "epoch": 0.018167456556082148, "grad_norm": 36.85491373097027, "learning_rate": 5.789473684210526e-07, "loss": 4.568385124206543, "step": 23 }, { "epoch": 0.018957345971563982, "grad_norm": 32.16948864627544, "learning_rate": 6.052631578947369e-07, "loss": 3.1852474212646484, "step": 24 }, { "epoch": 0.019747235387045814, "grad_norm": 40.73483389215707, "learning_rate": 6.315789473684211e-07, "loss": 5.036445617675781, "step": 25 }, { "epoch": 0.020537124802527645, "grad_norm": 37.103608164895796, "learning_rate": 6.578947368421053e-07, "loss": 4.371845245361328, "step": 26 }, { "epoch": 0.02132701421800948, "grad_norm": 24.05172035473979, "learning_rate": 6.842105263157896e-07, "loss": 3.7923712730407715, "step": 27 }, { "epoch": 0.022116903633491312, "grad_norm": 35.28854412414826, "learning_rate": 7.105263157894737e-07, "loss": 4.174007415771484, "step": 28 }, { "epoch": 0.022906793048973143, "grad_norm": 18.614769000709014, "learning_rate": 7.368421052631579e-07, "loss": 2.7547430992126465, "step": 29 }, { "epoch": 0.023696682464454975, "grad_norm": 28.426674964913303, "learning_rate": 7.631578947368422e-07, "loss": 4.140813827514648, "step": 30 }, { "epoch": 0.02448657187993681, "grad_norm": 22.844379088367365, "learning_rate": 7.894736842105263e-07, "loss": 3.9789390563964844, "step": 31 }, { "epoch": 0.02527646129541864, "grad_norm": 16.324910957553573, "learning_rate": 8.157894736842106e-07, "loss": 3.408233165740967, "step": 32 }, { "epoch": 0.026066350710900472, "grad_norm": 17.84036406675527, "learning_rate": 8.421052631578948e-07, "loss": 3.152615547180176, "step": 33 }, { "epoch": 0.026856240126382307, "grad_norm": 37.04828879151497, "learning_rate": 8.68421052631579e-07, "loss": 4.3278489112854, "step": 34 }, { "epoch": 0.02764612954186414, "grad_norm": 23.57795025738247, "learning_rate": 8.947368421052632e-07, "loss": 3.982863426208496, "step": 35 }, { "epoch": 0.02843601895734597, "grad_norm": 21.626842144038697, "learning_rate": 9.210526315789474e-07, "loss": 3.7231547832489014, "step": 36 }, { "epoch": 0.029225908372827805, "grad_norm": 16.16330549232114, "learning_rate": 9.473684210526317e-07, "loss": 3.6238391399383545, "step": 37 }, { "epoch": 0.030015797788309637, "grad_norm": 19.328825464830587, "learning_rate": 9.736842105263158e-07, "loss": 3.8130602836608887, "step": 38 }, { "epoch": 0.030805687203791468, "grad_norm": 13.0720498885057, "learning_rate": 1.0000000000000002e-06, "loss": 3.765305519104004, "step": 39 }, { "epoch": 0.0315955766192733, "grad_norm": 20.81732653658493, "learning_rate": 1.0263157894736843e-06, "loss": 3.808793783187866, "step": 40 }, { "epoch": 0.03238546603475513, "grad_norm": 16.11869827988803, "learning_rate": 1.0526315789473685e-06, "loss": 3.3212504386901855, "step": 41 }, { "epoch": 0.03317535545023697, "grad_norm": 19.85916750032169, "learning_rate": 1.0789473684210527e-06, "loss": 3.4793872833251953, "step": 42 }, { "epoch": 0.0339652448657188, "grad_norm": 14.5384162049997, "learning_rate": 1.1052631578947369e-06, "loss": 3.4185662269592285, "step": 43 }, { "epoch": 0.03475513428120063, "grad_norm": 13.037105709248864, "learning_rate": 1.1315789473684213e-06, "loss": 3.05875563621521, "step": 44 }, { "epoch": 0.035545023696682464, "grad_norm": 28.094177440990883, "learning_rate": 1.1578947368421053e-06, "loss": 4.200315475463867, "step": 45 }, { "epoch": 0.036334913112164295, "grad_norm": 15.298203417574237, "learning_rate": 1.1842105263157894e-06, "loss": 3.230691909790039, "step": 46 }, { "epoch": 0.03712480252764613, "grad_norm": 20.703338617496122, "learning_rate": 1.2105263157894738e-06, "loss": 3.226253032684326, "step": 47 }, { "epoch": 0.037914691943127965, "grad_norm": 19.766902333430476, "learning_rate": 1.236842105263158e-06, "loss": 3.6443753242492676, "step": 48 }, { "epoch": 0.038704581358609796, "grad_norm": 12.540974533970843, "learning_rate": 1.2631578947368422e-06, "loss": 3.3909823894500732, "step": 49 }, { "epoch": 0.03949447077409163, "grad_norm": 8.67596125382628, "learning_rate": 1.2894736842105266e-06, "loss": 3.0414187908172607, "step": 50 }, { "epoch": 0.04028436018957346, "grad_norm": 14.600880901536001, "learning_rate": 1.3157894736842106e-06, "loss": 3.7503838539123535, "step": 51 }, { "epoch": 0.04107424960505529, "grad_norm": 17.076909526004943, "learning_rate": 1.342105263157895e-06, "loss": 3.5003442764282227, "step": 52 }, { "epoch": 0.04186413902053712, "grad_norm": 9.236759947184684, "learning_rate": 1.3684210526315791e-06, "loss": 2.874293088912964, "step": 53 }, { "epoch": 0.04265402843601896, "grad_norm": 11.84849423549626, "learning_rate": 1.394736842105263e-06, "loss": 3.1183371543884277, "step": 54 }, { "epoch": 0.04344391785150079, "grad_norm": 12.90432487886042, "learning_rate": 1.4210526315789475e-06, "loss": 3.250241756439209, "step": 55 }, { "epoch": 0.044233807266982623, "grad_norm": 16.869043357282678, "learning_rate": 1.4473684210526317e-06, "loss": 3.1247291564941406, "step": 56 }, { "epoch": 0.045023696682464455, "grad_norm": 9.282453097796106, "learning_rate": 1.4736842105263159e-06, "loss": 2.924187421798706, "step": 57 }, { "epoch": 0.045813586097946286, "grad_norm": 8.648636973287344, "learning_rate": 1.5e-06, "loss": 3.1763153076171875, "step": 58 }, { "epoch": 0.04660347551342812, "grad_norm": 8.629843477816483, "learning_rate": 1.5263157894736844e-06, "loss": 3.0944461822509766, "step": 59 }, { "epoch": 0.04739336492890995, "grad_norm": 52.54991212893474, "learning_rate": 1.5526315789473686e-06, "loss": 2.84696888923645, "step": 60 }, { "epoch": 0.04818325434439179, "grad_norm": 10.192033588061753, "learning_rate": 1.5789473684210526e-06, "loss": 3.2654457092285156, "step": 61 }, { "epoch": 0.04897314375987362, "grad_norm": 19.884719085034156, "learning_rate": 1.605263157894737e-06, "loss": 3.7629897594451904, "step": 62 }, { "epoch": 0.04976303317535545, "grad_norm": 12.313693728331911, "learning_rate": 1.6315789473684212e-06, "loss": 3.5302507877349854, "step": 63 }, { "epoch": 0.05055292259083728, "grad_norm": 17.01311845510386, "learning_rate": 1.6578947368421053e-06, "loss": 3.5614633560180664, "step": 64 }, { "epoch": 0.05134281200631911, "grad_norm": 16.95840885870688, "learning_rate": 1.6842105263157895e-06, "loss": 3.264988660812378, "step": 65 }, { "epoch": 0.052132701421800945, "grad_norm": 14.308550297948006, "learning_rate": 1.710526315789474e-06, "loss": 3.265235424041748, "step": 66 }, { "epoch": 0.05292259083728278, "grad_norm": 14.519207172660034, "learning_rate": 1.736842105263158e-06, "loss": 3.188286542892456, "step": 67 }, { "epoch": 0.053712480252764615, "grad_norm": 17.603652475188834, "learning_rate": 1.7631578947368423e-06, "loss": 2.9039247035980225, "step": 68 }, { "epoch": 0.054502369668246446, "grad_norm": 11.285868937730449, "learning_rate": 1.7894736842105265e-06, "loss": 3.108914375305176, "step": 69 }, { "epoch": 0.05529225908372828, "grad_norm": 9.21536421673824, "learning_rate": 1.8157894736842109e-06, "loss": 2.8382675647735596, "step": 70 }, { "epoch": 0.05608214849921011, "grad_norm": 13.008599403705528, "learning_rate": 1.8421052631578948e-06, "loss": 3.444577217102051, "step": 71 }, { "epoch": 0.05687203791469194, "grad_norm": 18.071771458815004, "learning_rate": 1.868421052631579e-06, "loss": 3.67020320892334, "step": 72 }, { "epoch": 0.05766192733017378, "grad_norm": 10.064836586829417, "learning_rate": 1.8947368421052634e-06, "loss": 3.3886473178863525, "step": 73 }, { "epoch": 0.05845181674565561, "grad_norm": 10.757354094621407, "learning_rate": 1.9210526315789474e-06, "loss": 2.9087605476379395, "step": 74 }, { "epoch": 0.05924170616113744, "grad_norm": 11.450459481352778, "learning_rate": 1.9473684210526315e-06, "loss": 3.3567676544189453, "step": 75 }, { "epoch": 0.06003159557661927, "grad_norm": 13.789083297248077, "learning_rate": 1.973684210526316e-06, "loss": 3.567488431930542, "step": 76 }, { "epoch": 0.060821484992101105, "grad_norm": 10.972976083153702, "learning_rate": 2.0000000000000003e-06, "loss": 3.606407642364502, "step": 77 }, { "epoch": 0.061611374407582936, "grad_norm": 12.31068362309533, "learning_rate": 2.026315789473684e-06, "loss": 3.5690627098083496, "step": 78 }, { "epoch": 0.06240126382306477, "grad_norm": 10.473991872514576, "learning_rate": 2.0526315789473687e-06, "loss": 3.2413203716278076, "step": 79 }, { "epoch": 0.0631911532385466, "grad_norm": 11.476517559025107, "learning_rate": 2.078947368421053e-06, "loss": 3.541959524154663, "step": 80 }, { "epoch": 0.06398104265402843, "grad_norm": 10.327083091837444, "learning_rate": 2.105263157894737e-06, "loss": 2.6203503608703613, "step": 81 }, { "epoch": 0.06477093206951026, "grad_norm": 8.585567685933643, "learning_rate": 2.1315789473684212e-06, "loss": 3.0848870277404785, "step": 82 }, { "epoch": 0.06556082148499211, "grad_norm": 13.504858685023217, "learning_rate": 2.1578947368421054e-06, "loss": 2.950331687927246, "step": 83 }, { "epoch": 0.06635071090047394, "grad_norm": 12.928368968515905, "learning_rate": 2.1842105263157896e-06, "loss": 3.3477673530578613, "step": 84 }, { "epoch": 0.06714060031595577, "grad_norm": 34.5955948056376, "learning_rate": 2.2105263157894738e-06, "loss": 3.6285767555236816, "step": 85 }, { "epoch": 0.0679304897314376, "grad_norm": 21.770696431108018, "learning_rate": 2.236842105263158e-06, "loss": 2.9111273288726807, "step": 86 }, { "epoch": 0.06872037914691943, "grad_norm": 10.760518044408816, "learning_rate": 2.2631578947368426e-06, "loss": 3.0999932289123535, "step": 87 }, { "epoch": 0.06951026856240126, "grad_norm": 14.808381283086948, "learning_rate": 2.2894736842105263e-06, "loss": 2.990964412689209, "step": 88 }, { "epoch": 0.0703001579778831, "grad_norm": 10.068849907277835, "learning_rate": 2.3157894736842105e-06, "loss": 3.190941095352173, "step": 89 }, { "epoch": 0.07109004739336493, "grad_norm": 13.214233489035319, "learning_rate": 2.342105263157895e-06, "loss": 3.2512447834014893, "step": 90 }, { "epoch": 0.07187993680884676, "grad_norm": 9.43746117081832, "learning_rate": 2.368421052631579e-06, "loss": 2.6215569972991943, "step": 91 }, { "epoch": 0.07266982622432859, "grad_norm": 6.790489861990664, "learning_rate": 2.3947368421052635e-06, "loss": 2.6833393573760986, "step": 92 }, { "epoch": 0.07345971563981042, "grad_norm": 10.046189419343667, "learning_rate": 2.4210526315789477e-06, "loss": 3.4258813858032227, "step": 93 }, { "epoch": 0.07424960505529225, "grad_norm": 10.439120097914849, "learning_rate": 2.447368421052632e-06, "loss": 3.059093952178955, "step": 94 }, { "epoch": 0.0750394944707741, "grad_norm": 15.352463342964036, "learning_rate": 2.473684210526316e-06, "loss": 3.1503472328186035, "step": 95 }, { "epoch": 0.07582938388625593, "grad_norm": 10.372976525025615, "learning_rate": 2.5e-06, "loss": 3.1993770599365234, "step": 96 }, { "epoch": 0.07661927330173776, "grad_norm": 9.496881055859197, "learning_rate": 2.5263157894736844e-06, "loss": 3.2442855834960938, "step": 97 }, { "epoch": 0.07740916271721959, "grad_norm": 18.476512661393848, "learning_rate": 2.552631578947369e-06, "loss": 3.0400185585021973, "step": 98 }, { "epoch": 0.07819905213270142, "grad_norm": 9.034794663205666, "learning_rate": 2.578947368421053e-06, "loss": 3.134880542755127, "step": 99 }, { "epoch": 0.07898894154818326, "grad_norm": 9.677932164121641, "learning_rate": 2.605263157894737e-06, "loss": 3.2033190727233887, "step": 100 }, { "epoch": 0.07977883096366509, "grad_norm": 11.430338365698852, "learning_rate": 2.631578947368421e-06, "loss": 2.273861885070801, "step": 101 }, { "epoch": 0.08056872037914692, "grad_norm": 11.80829320514626, "learning_rate": 2.6578947368421053e-06, "loss": 2.6837069988250732, "step": 102 }, { "epoch": 0.08135860979462875, "grad_norm": 11.605993978422056, "learning_rate": 2.68421052631579e-06, "loss": 3.128217935562134, "step": 103 }, { "epoch": 0.08214849921011058, "grad_norm": 11.686211118151036, "learning_rate": 2.710526315789474e-06, "loss": 3.3786544799804688, "step": 104 }, { "epoch": 0.08293838862559241, "grad_norm": 11.970591219069323, "learning_rate": 2.7368421052631583e-06, "loss": 3.7821977138519287, "step": 105 }, { "epoch": 0.08372827804107424, "grad_norm": 8.789149918197234, "learning_rate": 2.7631578947368424e-06, "loss": 2.573795795440674, "step": 106 }, { "epoch": 0.08451816745655608, "grad_norm": 12.34914621357682, "learning_rate": 2.789473684210526e-06, "loss": 3.3090810775756836, "step": 107 }, { "epoch": 0.08530805687203792, "grad_norm": 13.080241716157126, "learning_rate": 2.815789473684211e-06, "loss": 2.717519998550415, "step": 108 }, { "epoch": 0.08609794628751975, "grad_norm": 10.317983708712724, "learning_rate": 2.842105263157895e-06, "loss": 2.5964150428771973, "step": 109 }, { "epoch": 0.08688783570300158, "grad_norm": 8.933831846542349, "learning_rate": 2.868421052631579e-06, "loss": 3.3069779872894287, "step": 110 }, { "epoch": 0.08767772511848342, "grad_norm": 9.117041629342554, "learning_rate": 2.8947368421052634e-06, "loss": 2.860931396484375, "step": 111 }, { "epoch": 0.08846761453396525, "grad_norm": 12.283222755341374, "learning_rate": 2.921052631578948e-06, "loss": 3.619509220123291, "step": 112 }, { "epoch": 0.08925750394944708, "grad_norm": 9.277584709531098, "learning_rate": 2.9473684210526317e-06, "loss": 2.366508960723877, "step": 113 }, { "epoch": 0.09004739336492891, "grad_norm": 7.894805274949022, "learning_rate": 2.973684210526316e-06, "loss": 3.2010068893432617, "step": 114 }, { "epoch": 0.09083728278041074, "grad_norm": 7.815315629294605, "learning_rate": 3e-06, "loss": 3.1761326789855957, "step": 115 }, { "epoch": 0.09162717219589257, "grad_norm": 12.929613818899176, "learning_rate": 3.0263157894736843e-06, "loss": 3.074592351913452, "step": 116 }, { "epoch": 0.0924170616113744, "grad_norm": 10.211678960918519, "learning_rate": 3.052631578947369e-06, "loss": 2.9478042125701904, "step": 117 }, { "epoch": 0.09320695102685624, "grad_norm": 14.765610562991277, "learning_rate": 3.078947368421053e-06, "loss": 2.716470718383789, "step": 118 }, { "epoch": 0.09399684044233807, "grad_norm": 13.058271020031325, "learning_rate": 3.1052631578947372e-06, "loss": 2.669914722442627, "step": 119 }, { "epoch": 0.0947867298578199, "grad_norm": 10.81941062693438, "learning_rate": 3.131578947368421e-06, "loss": 2.9119224548339844, "step": 120 }, { "epoch": 0.09557661927330174, "grad_norm": 10.596711287978934, "learning_rate": 3.157894736842105e-06, "loss": 2.921963930130005, "step": 121 }, { "epoch": 0.09636650868878358, "grad_norm": 15.574644209065054, "learning_rate": 3.1842105263157898e-06, "loss": 3.0262293815612793, "step": 122 }, { "epoch": 0.0971563981042654, "grad_norm": 10.64778680746815, "learning_rate": 3.210526315789474e-06, "loss": 3.186441421508789, "step": 123 }, { "epoch": 0.09794628751974724, "grad_norm": 14.63351232300644, "learning_rate": 3.236842105263158e-06, "loss": 3.451972246170044, "step": 124 }, { "epoch": 0.09873617693522907, "grad_norm": 8.585310462971533, "learning_rate": 3.2631578947368423e-06, "loss": 2.5213232040405273, "step": 125 }, { "epoch": 0.0995260663507109, "grad_norm": 7.39672659605034, "learning_rate": 3.289473684210527e-06, "loss": 2.5365099906921387, "step": 126 }, { "epoch": 0.10031595576619273, "grad_norm": 11.157374472691357, "learning_rate": 3.3157894736842107e-06, "loss": 2.8651609420776367, "step": 127 }, { "epoch": 0.10110584518167456, "grad_norm": 10.596959610695333, "learning_rate": 3.342105263157895e-06, "loss": 3.034381866455078, "step": 128 }, { "epoch": 0.1018957345971564, "grad_norm": 14.277741979207777, "learning_rate": 3.368421052631579e-06, "loss": 2.627697467803955, "step": 129 }, { "epoch": 0.10268562401263823, "grad_norm": 7.246920014548371, "learning_rate": 3.3947368421052636e-06, "loss": 2.9030356407165527, "step": 130 }, { "epoch": 0.10347551342812006, "grad_norm": 7.2011210077095775, "learning_rate": 3.421052631578948e-06, "loss": 2.7330222129821777, "step": 131 }, { "epoch": 0.10426540284360189, "grad_norm": 9.54971680440488, "learning_rate": 3.447368421052632e-06, "loss": 2.8853511810302734, "step": 132 }, { "epoch": 0.10505529225908374, "grad_norm": 18.526435046610747, "learning_rate": 3.473684210526316e-06, "loss": 3.570380449295044, "step": 133 }, { "epoch": 0.10584518167456557, "grad_norm": 34.71437679140294, "learning_rate": 3.5e-06, "loss": 3.5759708881378174, "step": 134 }, { "epoch": 0.1066350710900474, "grad_norm": 13.769311251161728, "learning_rate": 3.5263157894736846e-06, "loss": 2.9467697143554688, "step": 135 }, { "epoch": 0.10742496050552923, "grad_norm": 8.108715220223203, "learning_rate": 3.5526315789473687e-06, "loss": 2.982203483581543, "step": 136 }, { "epoch": 0.10821484992101106, "grad_norm": 9.835029796798992, "learning_rate": 3.578947368421053e-06, "loss": 2.9849791526794434, "step": 137 }, { "epoch": 0.10900473933649289, "grad_norm": 10.17747887266375, "learning_rate": 3.605263157894737e-06, "loss": 2.5947208404541016, "step": 138 }, { "epoch": 0.10979462875197472, "grad_norm": 30.956825233562924, "learning_rate": 3.6315789473684217e-06, "loss": 2.478144407272339, "step": 139 }, { "epoch": 0.11058451816745656, "grad_norm": 14.433139764637794, "learning_rate": 3.657894736842106e-06, "loss": 3.3270864486694336, "step": 140 }, { "epoch": 0.11137440758293839, "grad_norm": 9.114525691597905, "learning_rate": 3.6842105263157896e-06, "loss": 3.3300728797912598, "step": 141 }, { "epoch": 0.11216429699842022, "grad_norm": 10.962185488198799, "learning_rate": 3.710526315789474e-06, "loss": 2.858724594116211, "step": 142 }, { "epoch": 0.11295418641390205, "grad_norm": 9.002282231003035, "learning_rate": 3.736842105263158e-06, "loss": 3.2307381629943848, "step": 143 }, { "epoch": 0.11374407582938388, "grad_norm": 9.727066408665603, "learning_rate": 3.7631578947368426e-06, "loss": 3.1068058013916016, "step": 144 }, { "epoch": 0.11453396524486571, "grad_norm": 11.203620625302497, "learning_rate": 3.789473684210527e-06, "loss": 3.130736827850342, "step": 145 }, { "epoch": 0.11532385466034756, "grad_norm": 9.522046772312253, "learning_rate": 3.815789473684211e-06, "loss": 3.020020008087158, "step": 146 }, { "epoch": 0.11611374407582939, "grad_norm": 8.938687629412906, "learning_rate": 3.842105263157895e-06, "loss": 3.1917128562927246, "step": 147 }, { "epoch": 0.11690363349131122, "grad_norm": 12.372597800255045, "learning_rate": 3.868421052631579e-06, "loss": 3.393472671508789, "step": 148 }, { "epoch": 0.11769352290679305, "grad_norm": 11.917832534808403, "learning_rate": 3.894736842105263e-06, "loss": 2.8924148082733154, "step": 149 }, { "epoch": 0.11848341232227488, "grad_norm": 8.376978563766828, "learning_rate": 3.921052631578947e-06, "loss": 3.18516206741333, "step": 150 }, { "epoch": 0.11927330173775672, "grad_norm": 10.751606207100387, "learning_rate": 3.947368421052632e-06, "loss": 3.0006637573242188, "step": 151 }, { "epoch": 0.12006319115323855, "grad_norm": 10.601401419025592, "learning_rate": 3.9736842105263165e-06, "loss": 2.7774577140808105, "step": 152 }, { "epoch": 0.12085308056872038, "grad_norm": 16.707393086077566, "learning_rate": 4.000000000000001e-06, "loss": 3.4506897926330566, "step": 153 }, { "epoch": 0.12164296998420221, "grad_norm": 11.964190828423138, "learning_rate": 4.026315789473684e-06, "loss": 2.7091317176818848, "step": 154 }, { "epoch": 0.12243285939968404, "grad_norm": 24.527932349156284, "learning_rate": 4.052631578947368e-06, "loss": 3.345675468444824, "step": 155 }, { "epoch": 0.12322274881516587, "grad_norm": 7.3190241803547895, "learning_rate": 4.078947368421053e-06, "loss": 2.6796462535858154, "step": 156 }, { "epoch": 0.1240126382306477, "grad_norm": 11.87038661241619, "learning_rate": 4.105263157894737e-06, "loss": 2.6081080436706543, "step": 157 }, { "epoch": 0.12480252764612954, "grad_norm": 11.72825788387354, "learning_rate": 4.1315789473684216e-06, "loss": 3.0451087951660156, "step": 158 }, { "epoch": 0.12559241706161137, "grad_norm": 11.390036203544065, "learning_rate": 4.157894736842106e-06, "loss": 2.8156795501708984, "step": 159 }, { "epoch": 0.1263823064770932, "grad_norm": 21.914947252886318, "learning_rate": 4.18421052631579e-06, "loss": 2.579136848449707, "step": 160 }, { "epoch": 0.12717219589257503, "grad_norm": 10.243517013753197, "learning_rate": 4.210526315789474e-06, "loss": 2.9866762161254883, "step": 161 }, { "epoch": 0.12796208530805686, "grad_norm": 14.2986733371629, "learning_rate": 4.236842105263158e-06, "loss": 2.317359685897827, "step": 162 }, { "epoch": 0.1287519747235387, "grad_norm": 7.467677343055879, "learning_rate": 4.2631578947368425e-06, "loss": 1.8712537288665771, "step": 163 }, { "epoch": 0.12954186413902052, "grad_norm": 13.673831526330856, "learning_rate": 4.289473684210527e-06, "loss": 3.0335092544555664, "step": 164 }, { "epoch": 0.13033175355450238, "grad_norm": 16.02571007766451, "learning_rate": 4.315789473684211e-06, "loss": 3.632401943206787, "step": 165 }, { "epoch": 0.13112164296998421, "grad_norm": 17.67212914050096, "learning_rate": 4.342105263157895e-06, "loss": 2.679199457168579, "step": 166 }, { "epoch": 0.13191153238546605, "grad_norm": 19.192049393351457, "learning_rate": 4.368421052631579e-06, "loss": 2.2504091262817383, "step": 167 }, { "epoch": 0.13270142180094788, "grad_norm": 15.388906369037276, "learning_rate": 4.394736842105263e-06, "loss": 2.8348331451416016, "step": 168 }, { "epoch": 0.1334913112164297, "grad_norm": 16.961630671510154, "learning_rate": 4.4210526315789476e-06, "loss": 3.5437369346618652, "step": 169 }, { "epoch": 0.13428120063191154, "grad_norm": 19.29309682197195, "learning_rate": 4.447368421052632e-06, "loss": 3.0312232971191406, "step": 170 }, { "epoch": 0.13507109004739337, "grad_norm": 12.4885877307498, "learning_rate": 4.473684210526316e-06, "loss": 2.930471658706665, "step": 171 }, { "epoch": 0.1358609794628752, "grad_norm": 9.308821835922053, "learning_rate": 4.5e-06, "loss": 2.809572696685791, "step": 172 }, { "epoch": 0.13665086887835703, "grad_norm": 22.10410295713834, "learning_rate": 4.526315789473685e-06, "loss": 2.782520294189453, "step": 173 }, { "epoch": 0.13744075829383887, "grad_norm": 9.424258451368976, "learning_rate": 4.552631578947369e-06, "loss": 3.311187267303467, "step": 174 }, { "epoch": 0.1382306477093207, "grad_norm": 11.81370027596634, "learning_rate": 4.578947368421053e-06, "loss": 2.8037075996398926, "step": 175 }, { "epoch": 0.13902053712480253, "grad_norm": 19.14018612073658, "learning_rate": 4.605263157894737e-06, "loss": 2.7607202529907227, "step": 176 }, { "epoch": 0.13981042654028436, "grad_norm": 9.985114785424416, "learning_rate": 4.631578947368421e-06, "loss": 3.2012553215026855, "step": 177 }, { "epoch": 0.1406003159557662, "grad_norm": 9.786086973550603, "learning_rate": 4.657894736842106e-06, "loss": 2.4347238540649414, "step": 178 }, { "epoch": 0.14139020537124802, "grad_norm": 8.769407464343628, "learning_rate": 4.68421052631579e-06, "loss": 2.9895355701446533, "step": 179 }, { "epoch": 0.14218009478672985, "grad_norm": 10.531181285243829, "learning_rate": 4.710526315789474e-06, "loss": 2.5453662872314453, "step": 180 }, { "epoch": 0.1429699842022117, "grad_norm": 11.460636592646704, "learning_rate": 4.736842105263158e-06, "loss": 2.439375400543213, "step": 181 }, { "epoch": 0.14375987361769352, "grad_norm": 11.905830261985201, "learning_rate": 4.763157894736842e-06, "loss": 4.2906694412231445, "step": 182 }, { "epoch": 0.14454976303317535, "grad_norm": 16.583174963846417, "learning_rate": 4.789473684210527e-06, "loss": 3.135786533355713, "step": 183 }, { "epoch": 0.14533965244865718, "grad_norm": 16.327103894696393, "learning_rate": 4.815789473684211e-06, "loss": 1.763169288635254, "step": 184 }, { "epoch": 0.146129541864139, "grad_norm": 9.224173810150578, "learning_rate": 4.842105263157895e-06, "loss": 2.444568157196045, "step": 185 }, { "epoch": 0.14691943127962084, "grad_norm": 13.627965372331012, "learning_rate": 4.8684210526315795e-06, "loss": 2.6934571266174316, "step": 186 }, { "epoch": 0.14770932069510267, "grad_norm": 11.077507415528911, "learning_rate": 4.894736842105264e-06, "loss": 2.9741287231445312, "step": 187 }, { "epoch": 0.1484992101105845, "grad_norm": 11.532032255217114, "learning_rate": 4.921052631578948e-06, "loss": 2.824622869491577, "step": 188 }, { "epoch": 0.14928909952606634, "grad_norm": 18.679360225668912, "learning_rate": 4.947368421052632e-06, "loss": 3.0964691638946533, "step": 189 }, { "epoch": 0.1500789889415482, "grad_norm": 9.620068127723037, "learning_rate": 4.973684210526316e-06, "loss": 2.981693744659424, "step": 190 }, { "epoch": 0.15086887835703003, "grad_norm": 7.772300956867857, "learning_rate": 5e-06, "loss": 2.799048662185669, "step": 191 }, { "epoch": 0.15165876777251186, "grad_norm": 12.414480771670288, "learning_rate": 5.026315789473685e-06, "loss": 3.1177570819854736, "step": 192 }, { "epoch": 0.1524486571879937, "grad_norm": 12.641305628552601, "learning_rate": 5.052631578947369e-06, "loss": 2.37302827835083, "step": 193 }, { "epoch": 0.15323854660347552, "grad_norm": 11.55906392820543, "learning_rate": 5.078947368421053e-06, "loss": 2.5041732788085938, "step": 194 }, { "epoch": 0.15402843601895735, "grad_norm": 7.7791945091120285, "learning_rate": 5.105263157894738e-06, "loss": 2.874569892883301, "step": 195 }, { "epoch": 0.15481832543443919, "grad_norm": 10.474120784603407, "learning_rate": 5.131578947368422e-06, "loss": 2.8549094200134277, "step": 196 }, { "epoch": 0.15560821484992102, "grad_norm": 7.993346143459158, "learning_rate": 5.157894736842106e-06, "loss": 2.993251085281372, "step": 197 }, { "epoch": 0.15639810426540285, "grad_norm": 20.941550789609742, "learning_rate": 5.18421052631579e-06, "loss": 4.168525695800781, "step": 198 }, { "epoch": 0.15718799368088468, "grad_norm": 12.528721144731515, "learning_rate": 5.210526315789474e-06, "loss": 2.4273793697357178, "step": 199 }, { "epoch": 0.1579778830963665, "grad_norm": 11.286529989353973, "learning_rate": 5.236842105263158e-06, "loss": 3.011016368865967, "step": 200 }, { "epoch": 0.15876777251184834, "grad_norm": 17.406854394281535, "learning_rate": 5.263157894736842e-06, "loss": 2.8892858028411865, "step": 201 }, { "epoch": 0.15955766192733017, "grad_norm": 12.78306492867607, "learning_rate": 5.289473684210526e-06, "loss": 2.4699714183807373, "step": 202 }, { "epoch": 0.160347551342812, "grad_norm": 12.969828387945576, "learning_rate": 5.315789473684211e-06, "loss": 2.7423977851867676, "step": 203 }, { "epoch": 0.16113744075829384, "grad_norm": 7.657893790006663, "learning_rate": 5.342105263157895e-06, "loss": 2.7218151092529297, "step": 204 }, { "epoch": 0.16192733017377567, "grad_norm": 9.492276008453782, "learning_rate": 5.36842105263158e-06, "loss": 2.555281639099121, "step": 205 }, { "epoch": 0.1627172195892575, "grad_norm": 13.463617124979155, "learning_rate": 5.394736842105264e-06, "loss": 3.284069538116455, "step": 206 }, { "epoch": 0.16350710900473933, "grad_norm": 12.35995760266546, "learning_rate": 5.421052631578948e-06, "loss": 2.8107781410217285, "step": 207 }, { "epoch": 0.16429699842022116, "grad_norm": 22.996410093914797, "learning_rate": 5.447368421052632e-06, "loss": 2.7325127124786377, "step": 208 }, { "epoch": 0.165086887835703, "grad_norm": 16.585705791801974, "learning_rate": 5.4736842105263165e-06, "loss": 2.7490532398223877, "step": 209 }, { "epoch": 0.16587677725118483, "grad_norm": 8.638152990987923, "learning_rate": 5.500000000000001e-06, "loss": 3.5097951889038086, "step": 210 }, { "epoch": 0.16666666666666666, "grad_norm": 13.734843553912885, "learning_rate": 5.526315789473685e-06, "loss": 3.340397834777832, "step": 211 }, { "epoch": 0.1674565560821485, "grad_norm": 32.571959419532426, "learning_rate": 5.552631578947368e-06, "loss": 3.0845370292663574, "step": 212 }, { "epoch": 0.16824644549763032, "grad_norm": 12.411694267674704, "learning_rate": 5.578947368421052e-06, "loss": 3.3935816287994385, "step": 213 }, { "epoch": 0.16903633491311215, "grad_norm": 8.554865869690868, "learning_rate": 5.605263157894737e-06, "loss": 2.5191092491149902, "step": 214 }, { "epoch": 0.16982622432859398, "grad_norm": 13.713331969172893, "learning_rate": 5.631578947368422e-06, "loss": 2.265045166015625, "step": 215 }, { "epoch": 0.17061611374407584, "grad_norm": 8.759424141686443, "learning_rate": 5.657894736842106e-06, "loss": 2.9024651050567627, "step": 216 }, { "epoch": 0.17140600315955767, "grad_norm": 20.192932719198154, "learning_rate": 5.68421052631579e-06, "loss": 3.174898624420166, "step": 217 }, { "epoch": 0.1721958925750395, "grad_norm": 9.300452899615632, "learning_rate": 5.710526315789474e-06, "loss": 2.203352212905884, "step": 218 }, { "epoch": 0.17298578199052134, "grad_norm": 16.964918499584563, "learning_rate": 5.736842105263158e-06, "loss": 2.399977684020996, "step": 219 }, { "epoch": 0.17377567140600317, "grad_norm": 20.569787010147447, "learning_rate": 5.7631578947368425e-06, "loss": 3.2228713035583496, "step": 220 }, { "epoch": 0.174565560821485, "grad_norm": 10.59360929404439, "learning_rate": 5.789473684210527e-06, "loss": 2.613041639328003, "step": 221 }, { "epoch": 0.17535545023696683, "grad_norm": 12.774907668976972, "learning_rate": 5.815789473684212e-06, "loss": 3.2169127464294434, "step": 222 }, { "epoch": 0.17614533965244866, "grad_norm": 38.185104925418536, "learning_rate": 5.842105263157896e-06, "loss": 3.227830648422241, "step": 223 }, { "epoch": 0.1769352290679305, "grad_norm": 15.337225837697051, "learning_rate": 5.86842105263158e-06, "loss": 2.9216299057006836, "step": 224 }, { "epoch": 0.17772511848341233, "grad_norm": 19.652283271220284, "learning_rate": 5.8947368421052634e-06, "loss": 3.0096397399902344, "step": 225 }, { "epoch": 0.17851500789889416, "grad_norm": 21.342277640312286, "learning_rate": 5.921052631578948e-06, "loss": 2.2839317321777344, "step": 226 }, { "epoch": 0.179304897314376, "grad_norm": 18.794368318684786, "learning_rate": 5.947368421052632e-06, "loss": 3.2453808784484863, "step": 227 }, { "epoch": 0.18009478672985782, "grad_norm": 20.372960047849478, "learning_rate": 5.973684210526316e-06, "loss": 3.73872447013855, "step": 228 }, { "epoch": 0.18088467614533965, "grad_norm": 7.477591538110839, "learning_rate": 6e-06, "loss": 2.9210574626922607, "step": 229 }, { "epoch": 0.18167456556082148, "grad_norm": 20.286752935347092, "learning_rate": 6.026315789473684e-06, "loss": 3.0264639854431152, "step": 230 }, { "epoch": 0.18246445497630331, "grad_norm": 7.752616268806402, "learning_rate": 6.0526315789473685e-06, "loss": 2.5452468395233154, "step": 231 }, { "epoch": 0.18325434439178515, "grad_norm": 14.021383263434027, "learning_rate": 6.0789473684210535e-06, "loss": 3.122269868850708, "step": 232 }, { "epoch": 0.18404423380726698, "grad_norm": 31.61505834794581, "learning_rate": 6.105263157894738e-06, "loss": 2.652003765106201, "step": 233 }, { "epoch": 0.1848341232227488, "grad_norm": 14.714502845759036, "learning_rate": 6.131578947368422e-06, "loss": 3.3281772136688232, "step": 234 }, { "epoch": 0.18562401263823064, "grad_norm": 8.281361641079902, "learning_rate": 6.157894736842106e-06, "loss": 2.793631076812744, "step": 235 }, { "epoch": 0.18641390205371247, "grad_norm": 18.19614005385017, "learning_rate": 6.18421052631579e-06, "loss": 2.75974178314209, "step": 236 }, { "epoch": 0.1872037914691943, "grad_norm": 14.062302050239529, "learning_rate": 6.2105263157894745e-06, "loss": 3.2623343467712402, "step": 237 }, { "epoch": 0.18799368088467613, "grad_norm": 11.273662081089574, "learning_rate": 6.236842105263159e-06, "loss": 3.017319440841675, "step": 238 }, { "epoch": 0.18878357030015797, "grad_norm": 12.442581117618769, "learning_rate": 6.263157894736842e-06, "loss": 2.9280037879943848, "step": 239 }, { "epoch": 0.1895734597156398, "grad_norm": 14.868405534686683, "learning_rate": 6.289473684210526e-06, "loss": 4.794824123382568, "step": 240 }, { "epoch": 0.19036334913112166, "grad_norm": 18.342383362134704, "learning_rate": 6.31578947368421e-06, "loss": 2.6316001415252686, "step": 241 }, { "epoch": 0.1911532385466035, "grad_norm": 12.903091182966538, "learning_rate": 6.342105263157895e-06, "loss": 3.190481185913086, "step": 242 }, { "epoch": 0.19194312796208532, "grad_norm": 13.060648808489958, "learning_rate": 6.3684210526315795e-06, "loss": 2.072887420654297, "step": 243 }, { "epoch": 0.19273301737756715, "grad_norm": 10.434484710036394, "learning_rate": 6.394736842105264e-06, "loss": 3.4056625366210938, "step": 244 }, { "epoch": 0.19352290679304898, "grad_norm": 17.07700555394061, "learning_rate": 6.421052631578948e-06, "loss": 3.1885697841644287, "step": 245 }, { "epoch": 0.1943127962085308, "grad_norm": 11.242377631499217, "learning_rate": 6.447368421052632e-06, "loss": 2.7972757816314697, "step": 246 }, { "epoch": 0.19510268562401265, "grad_norm": 10.122508148985222, "learning_rate": 6.473684210526316e-06, "loss": 2.625680923461914, "step": 247 }, { "epoch": 0.19589257503949448, "grad_norm": 15.194302817759452, "learning_rate": 6.5000000000000004e-06, "loss": 3.370196580886841, "step": 248 }, { "epoch": 0.1966824644549763, "grad_norm": 9.367476542559452, "learning_rate": 6.526315789473685e-06, "loss": 3.2335524559020996, "step": 249 }, { "epoch": 0.19747235387045814, "grad_norm": 13.638698209387572, "learning_rate": 6.55263157894737e-06, "loss": 3.060309410095215, "step": 250 }, { "epoch": 0.19826224328593997, "grad_norm": 21.504696461723928, "learning_rate": 6.578947368421054e-06, "loss": 2.5595827102661133, "step": 251 }, { "epoch": 0.1990521327014218, "grad_norm": 12.760844619202933, "learning_rate": 6.605263157894738e-06, "loss": 2.9294533729553223, "step": 252 }, { "epoch": 0.19984202211690363, "grad_norm": 16.14327478944924, "learning_rate": 6.631578947368421e-06, "loss": 3.1590025424957275, "step": 253 }, { "epoch": 0.20063191153238547, "grad_norm": 10.670002260991126, "learning_rate": 6.6578947368421055e-06, "loss": 2.868878126144409, "step": 254 }, { "epoch": 0.2014218009478673, "grad_norm": 7.410086829781492, "learning_rate": 6.68421052631579e-06, "loss": 2.524019479751587, "step": 255 }, { "epoch": 0.20221169036334913, "grad_norm": 9.676832864068576, "learning_rate": 6.710526315789474e-06, "loss": 2.7657065391540527, "step": 256 }, { "epoch": 0.20300157977883096, "grad_norm": 15.950580447163818, "learning_rate": 6.736842105263158e-06, "loss": 3.229970932006836, "step": 257 }, { "epoch": 0.2037914691943128, "grad_norm": 15.60324691981264, "learning_rate": 6.763157894736842e-06, "loss": 2.7795071601867676, "step": 258 }, { "epoch": 0.20458135860979462, "grad_norm": 7.647680832081348, "learning_rate": 6.789473684210527e-06, "loss": 2.202699661254883, "step": 259 }, { "epoch": 0.20537124802527645, "grad_norm": 11.113708735727473, "learning_rate": 6.8157894736842115e-06, "loss": 3.018834114074707, "step": 260 }, { "epoch": 0.20616113744075829, "grad_norm": 19.140215467090133, "learning_rate": 6.842105263157896e-06, "loss": 2.560054063796997, "step": 261 }, { "epoch": 0.20695102685624012, "grad_norm": 9.162828639443463, "learning_rate": 6.86842105263158e-06, "loss": 2.5356526374816895, "step": 262 }, { "epoch": 0.20774091627172195, "grad_norm": 13.818767936109861, "learning_rate": 6.894736842105264e-06, "loss": 3.1693482398986816, "step": 263 }, { "epoch": 0.20853080568720378, "grad_norm": 18.595765057532514, "learning_rate": 6.921052631578948e-06, "loss": 2.3949215412139893, "step": 264 }, { "epoch": 0.2093206951026856, "grad_norm": 17.318744899310534, "learning_rate": 6.947368421052632e-06, "loss": 3.075978994369507, "step": 265 }, { "epoch": 0.21011058451816747, "grad_norm": 24.655257168385077, "learning_rate": 6.973684210526316e-06, "loss": 3.427466869354248, "step": 266 }, { "epoch": 0.2109004739336493, "grad_norm": 15.241527713725562, "learning_rate": 7e-06, "loss": 2.109011650085449, "step": 267 }, { "epoch": 0.21169036334913113, "grad_norm": 8.501293757028371, "learning_rate": 7.026315789473684e-06, "loss": 3.112001419067383, "step": 268 }, { "epoch": 0.21248025276461296, "grad_norm": 15.448418886330026, "learning_rate": 7.052631578947369e-06, "loss": 1.7198365926742554, "step": 269 }, { "epoch": 0.2132701421800948, "grad_norm": 9.545915874496153, "learning_rate": 7.078947368421053e-06, "loss": 2.8806190490722656, "step": 270 }, { "epoch": 0.21406003159557663, "grad_norm": 18.727295292899043, "learning_rate": 7.1052631578947375e-06, "loss": 3.1914749145507812, "step": 271 }, { "epoch": 0.21484992101105846, "grad_norm": 19.609264443447998, "learning_rate": 7.131578947368422e-06, "loss": 3.506901741027832, "step": 272 }, { "epoch": 0.2156398104265403, "grad_norm": 13.136262515857457, "learning_rate": 7.157894736842106e-06, "loss": 3.197396755218506, "step": 273 }, { "epoch": 0.21642969984202212, "grad_norm": 23.821750570981514, "learning_rate": 7.18421052631579e-06, "loss": 3.0057897567749023, "step": 274 }, { "epoch": 0.21721958925750395, "grad_norm": 12.168381845745005, "learning_rate": 7.210526315789474e-06, "loss": 3.459364652633667, "step": 275 }, { "epoch": 0.21800947867298578, "grad_norm": 17.00612657277193, "learning_rate": 7.236842105263158e-06, "loss": 3.4900269508361816, "step": 276 }, { "epoch": 0.21879936808846762, "grad_norm": 10.283758953220772, "learning_rate": 7.263157894736843e-06, "loss": 3.3802132606506348, "step": 277 }, { "epoch": 0.21958925750394945, "grad_norm": 16.02360521858266, "learning_rate": 7.289473684210528e-06, "loss": 3.455819606781006, "step": 278 }, { "epoch": 0.22037914691943128, "grad_norm": 11.274076685112723, "learning_rate": 7.315789473684212e-06, "loss": 2.847879409790039, "step": 279 }, { "epoch": 0.2211690363349131, "grad_norm": 15.606199898777566, "learning_rate": 7.342105263157895e-06, "loss": 3.4169538021087646, "step": 280 }, { "epoch": 0.22195892575039494, "grad_norm": 12.898430910622961, "learning_rate": 7.368421052631579e-06, "loss": 3.030590772628784, "step": 281 }, { "epoch": 0.22274881516587677, "grad_norm": 15.500513401800355, "learning_rate": 7.3947368421052635e-06, "loss": 2.649216890335083, "step": 282 }, { "epoch": 0.2235387045813586, "grad_norm": 18.378896929038152, "learning_rate": 7.421052631578948e-06, "loss": 3.274688720703125, "step": 283 }, { "epoch": 0.22432859399684044, "grad_norm": 9.090747343971215, "learning_rate": 7.447368421052632e-06, "loss": 2.7677786350250244, "step": 284 }, { "epoch": 0.22511848341232227, "grad_norm": 11.906482510855422, "learning_rate": 7.473684210526316e-06, "loss": 3.239955186843872, "step": 285 }, { "epoch": 0.2259083728278041, "grad_norm": 11.474852988282166, "learning_rate": 7.500000000000001e-06, "loss": 3.009387969970703, "step": 286 }, { "epoch": 0.22669826224328593, "grad_norm": 12.575433605163134, "learning_rate": 7.526315789473685e-06, "loss": 2.9016571044921875, "step": 287 }, { "epoch": 0.22748815165876776, "grad_norm": 14.9975427549329, "learning_rate": 7.552631578947369e-06, "loss": 3.257054328918457, "step": 288 }, { "epoch": 0.2282780410742496, "grad_norm": 26.71508060406798, "learning_rate": 7.578947368421054e-06, "loss": 2.75146222114563, "step": 289 }, { "epoch": 0.22906793048973143, "grad_norm": 16.910026550475397, "learning_rate": 7.605263157894738e-06, "loss": 2.757063388824463, "step": 290 }, { "epoch": 0.22985781990521326, "grad_norm": 9.680373357034156, "learning_rate": 7.631578947368423e-06, "loss": 2.7959532737731934, "step": 291 }, { "epoch": 0.23064770932069512, "grad_norm": 12.18894296936391, "learning_rate": 7.657894736842106e-06, "loss": 2.466348171234131, "step": 292 }, { "epoch": 0.23143759873617695, "grad_norm": 10.475566667489755, "learning_rate": 7.68421052631579e-06, "loss": 3.3009557723999023, "step": 293 }, { "epoch": 0.23222748815165878, "grad_norm": 51.393448155359216, "learning_rate": 7.710526315789474e-06, "loss": 2.725738048553467, "step": 294 }, { "epoch": 0.2330173775671406, "grad_norm": 14.358238125160575, "learning_rate": 7.736842105263158e-06, "loss": 2.783334493637085, "step": 295 }, { "epoch": 0.23380726698262244, "grad_norm": 19.57667640214124, "learning_rate": 7.763157894736843e-06, "loss": 3.1590988636016846, "step": 296 }, { "epoch": 0.23459715639810427, "grad_norm": 12.972851406285965, "learning_rate": 7.789473684210526e-06, "loss": 3.1896674633026123, "step": 297 }, { "epoch": 0.2353870458135861, "grad_norm": 10.525192505195403, "learning_rate": 7.815789473684211e-06, "loss": 2.8256943225860596, "step": 298 }, { "epoch": 0.23617693522906794, "grad_norm": 11.099287801098866, "learning_rate": 7.842105263157895e-06, "loss": 3.2492432594299316, "step": 299 }, { "epoch": 0.23696682464454977, "grad_norm": 9.230404731264265, "learning_rate": 7.86842105263158e-06, "loss": 2.5488808155059814, "step": 300 }, { "epoch": 0.2377567140600316, "grad_norm": 15.341199592416597, "learning_rate": 7.894736842105265e-06, "loss": 2.4112818241119385, "step": 301 }, { "epoch": 0.23854660347551343, "grad_norm": 7.319063117780196, "learning_rate": 7.921052631578948e-06, "loss": 3.260639190673828, "step": 302 }, { "epoch": 0.23933649289099526, "grad_norm": 11.851958212642176, "learning_rate": 7.947368421052633e-06, "loss": 3.049391269683838, "step": 303 }, { "epoch": 0.2401263823064771, "grad_norm": 28.13791336194168, "learning_rate": 7.973684210526316e-06, "loss": 4.401567459106445, "step": 304 }, { "epoch": 0.24091627172195892, "grad_norm": 9.429106394158737, "learning_rate": 8.000000000000001e-06, "loss": 2.9501960277557373, "step": 305 }, { "epoch": 0.24170616113744076, "grad_norm": 7.158645831331756, "learning_rate": 8.026315789473685e-06, "loss": 2.5581390857696533, "step": 306 }, { "epoch": 0.2424960505529226, "grad_norm": 12.768920646366887, "learning_rate": 8.052631578947368e-06, "loss": 3.1385931968688965, "step": 307 }, { "epoch": 0.24328593996840442, "grad_norm": 12.913024370975956, "learning_rate": 8.078947368421053e-06, "loss": 2.7068610191345215, "step": 308 }, { "epoch": 0.24407582938388625, "grad_norm": 10.536088937809055, "learning_rate": 8.105263157894736e-06, "loss": 2.828160524368286, "step": 309 }, { "epoch": 0.24486571879936808, "grad_norm": 8.341704390314765, "learning_rate": 8.131578947368421e-06, "loss": 2.663266658782959, "step": 310 }, { "epoch": 0.2456556082148499, "grad_norm": 11.240401421047006, "learning_rate": 8.157894736842106e-06, "loss": 3.2981982231140137, "step": 311 }, { "epoch": 0.24644549763033174, "grad_norm": 11.187665628610933, "learning_rate": 8.18421052631579e-06, "loss": 2.5415968894958496, "step": 312 }, { "epoch": 0.24723538704581358, "grad_norm": 11.94500524279879, "learning_rate": 8.210526315789475e-06, "loss": 2.7816240787506104, "step": 313 }, { "epoch": 0.2480252764612954, "grad_norm": 6.355221325715025, "learning_rate": 8.236842105263158e-06, "loss": 2.686246395111084, "step": 314 }, { "epoch": 0.24881516587677724, "grad_norm": 17.979652703894846, "learning_rate": 8.263157894736843e-06, "loss": 3.970867395401001, "step": 315 }, { "epoch": 0.24960505529225907, "grad_norm": 11.300024279923099, "learning_rate": 8.289473684210526e-06, "loss": 2.6537764072418213, "step": 316 }, { "epoch": 0.25039494470774093, "grad_norm": 14.279900232687705, "learning_rate": 8.315789473684212e-06, "loss": 2.353628396987915, "step": 317 }, { "epoch": 0.25118483412322273, "grad_norm": 15.211272553278281, "learning_rate": 8.342105263157897e-06, "loss": 2.978910207748413, "step": 318 }, { "epoch": 0.2519747235387046, "grad_norm": 10.672333315833844, "learning_rate": 8.36842105263158e-06, "loss": 3.0564768314361572, "step": 319 }, { "epoch": 0.2527646129541864, "grad_norm": 13.177853507822194, "learning_rate": 8.394736842105263e-06, "loss": 3.3562185764312744, "step": 320 }, { "epoch": 0.25355450236966826, "grad_norm": 13.208149326921594, "learning_rate": 8.421052631578948e-06, "loss": 2.7793097496032715, "step": 321 }, { "epoch": 0.25434439178515006, "grad_norm": 7.862272858844339, "learning_rate": 8.447368421052632e-06, "loss": 2.6758689880371094, "step": 322 }, { "epoch": 0.2551342812006319, "grad_norm": 8.599054067109956, "learning_rate": 8.473684210526317e-06, "loss": 2.71248722076416, "step": 323 }, { "epoch": 0.2559241706161137, "grad_norm": 7.541371370024793, "learning_rate": 8.5e-06, "loss": 2.3310184478759766, "step": 324 }, { "epoch": 0.2567140600315956, "grad_norm": 7.201546562450413, "learning_rate": 8.526315789473685e-06, "loss": 2.957345962524414, "step": 325 }, { "epoch": 0.2575039494470774, "grad_norm": 12.601533616205508, "learning_rate": 8.552631578947368e-06, "loss": 3.0129475593566895, "step": 326 }, { "epoch": 0.25829383886255924, "grad_norm": 11.854948826772384, "learning_rate": 8.578947368421053e-06, "loss": 2.7769176959991455, "step": 327 }, { "epoch": 0.25908372827804105, "grad_norm": 10.669512492571942, "learning_rate": 8.605263157894738e-06, "loss": 2.550236940383911, "step": 328 }, { "epoch": 0.2598736176935229, "grad_norm": 17.79545384444541, "learning_rate": 8.631578947368422e-06, "loss": 2.8490400314331055, "step": 329 }, { "epoch": 0.26066350710900477, "grad_norm": 14.8135974727651, "learning_rate": 8.657894736842107e-06, "loss": 3.2657470703125, "step": 330 }, { "epoch": 0.26145339652448657, "grad_norm": 6.186576343564378, "learning_rate": 8.68421052631579e-06, "loss": 2.884676456451416, "step": 331 }, { "epoch": 0.26224328593996843, "grad_norm": 10.116320453707134, "learning_rate": 8.710526315789475e-06, "loss": 2.7804079055786133, "step": 332 }, { "epoch": 0.26303317535545023, "grad_norm": 9.185373101458339, "learning_rate": 8.736842105263158e-06, "loss": 3.2659783363342285, "step": 333 }, { "epoch": 0.2638230647709321, "grad_norm": 12.059802052206885, "learning_rate": 8.763157894736842e-06, "loss": 2.868699073791504, "step": 334 }, { "epoch": 0.2646129541864139, "grad_norm": 9.397740276905777, "learning_rate": 8.789473684210527e-06, "loss": 2.720752716064453, "step": 335 }, { "epoch": 0.26540284360189575, "grad_norm": 15.209731085255642, "learning_rate": 8.81578947368421e-06, "loss": 2.566018581390381, "step": 336 }, { "epoch": 0.26619273301737756, "grad_norm": 9.966999191876049, "learning_rate": 8.842105263157895e-06, "loss": 3.5632197856903076, "step": 337 }, { "epoch": 0.2669826224328594, "grad_norm": 13.910376272854188, "learning_rate": 8.86842105263158e-06, "loss": 2.7050907611846924, "step": 338 }, { "epoch": 0.2677725118483412, "grad_norm": 16.554248620157857, "learning_rate": 8.894736842105264e-06, "loss": 2.687314987182617, "step": 339 }, { "epoch": 0.2685624012638231, "grad_norm": 8.06865971019336, "learning_rate": 8.921052631578949e-06, "loss": 2.154885768890381, "step": 340 }, { "epoch": 0.2693522906793049, "grad_norm": 11.43463991295312, "learning_rate": 8.947368421052632e-06, "loss": 3.1463260650634766, "step": 341 }, { "epoch": 0.27014218009478674, "grad_norm": 8.94663093119559, "learning_rate": 8.973684210526317e-06, "loss": 2.7585976123809814, "step": 342 }, { "epoch": 0.27093206951026855, "grad_norm": 12.607303575239936, "learning_rate": 9e-06, "loss": 2.725893974304199, "step": 343 }, { "epoch": 0.2717219589257504, "grad_norm": 40.64523343780517, "learning_rate": 9.026315789473685e-06, "loss": 4.318365097045898, "step": 344 }, { "epoch": 0.2725118483412322, "grad_norm": 13.253916224420975, "learning_rate": 9.05263157894737e-06, "loss": 3.0328493118286133, "step": 345 }, { "epoch": 0.27330173775671407, "grad_norm": 10.67307265402947, "learning_rate": 9.078947368421054e-06, "loss": 3.1137566566467285, "step": 346 }, { "epoch": 0.2740916271721959, "grad_norm": 21.521088237334375, "learning_rate": 9.105263157894739e-06, "loss": 2.418055295944214, "step": 347 }, { "epoch": 0.27488151658767773, "grad_norm": 11.029078243075645, "learning_rate": 9.131578947368422e-06, "loss": 2.902218818664551, "step": 348 }, { "epoch": 0.27567140600315954, "grad_norm": 9.383500534244467, "learning_rate": 9.157894736842105e-06, "loss": 3.282095432281494, "step": 349 }, { "epoch": 0.2764612954186414, "grad_norm": 12.131725163736556, "learning_rate": 9.18421052631579e-06, "loss": 2.7594404220581055, "step": 350 }, { "epoch": 0.2772511848341232, "grad_norm": 8.647890182750494, "learning_rate": 9.210526315789474e-06, "loss": 2.8441665172576904, "step": 351 }, { "epoch": 0.27804107424960506, "grad_norm": 24.531984631457014, "learning_rate": 9.236842105263159e-06, "loss": 2.7135703563690186, "step": 352 }, { "epoch": 0.27883096366508686, "grad_norm": 18.852163365198667, "learning_rate": 9.263157894736842e-06, "loss": 2.604950428009033, "step": 353 }, { "epoch": 0.2796208530805687, "grad_norm": 8.864599334587579, "learning_rate": 9.289473684210527e-06, "loss": 2.4473190307617188, "step": 354 }, { "epoch": 0.2804107424960506, "grad_norm": 12.76373613296727, "learning_rate": 9.315789473684212e-06, "loss": 2.7692112922668457, "step": 355 }, { "epoch": 0.2812006319115324, "grad_norm": 13.999019935363314, "learning_rate": 9.342105263157895e-06, "loss": 2.699820041656494, "step": 356 }, { "epoch": 0.28199052132701424, "grad_norm": 12.316515560670386, "learning_rate": 9.36842105263158e-06, "loss": 2.9352567195892334, "step": 357 }, { "epoch": 0.28278041074249605, "grad_norm": 12.51002958910899, "learning_rate": 9.394736842105264e-06, "loss": 3.0598864555358887, "step": 358 }, { "epoch": 0.2835703001579779, "grad_norm": 9.85811587507726, "learning_rate": 9.421052631578949e-06, "loss": 2.760265588760376, "step": 359 }, { "epoch": 0.2843601895734597, "grad_norm": 6.097027548328658, "learning_rate": 9.447368421052632e-06, "loss": 2.914360761642456, "step": 360 }, { "epoch": 0.28515007898894157, "grad_norm": 7.443161559538115, "learning_rate": 9.473684210526315e-06, "loss": 2.4574689865112305, "step": 361 }, { "epoch": 0.2859399684044234, "grad_norm": 11.482751191563155, "learning_rate": 9.5e-06, "loss": 2.4862399101257324, "step": 362 }, { "epoch": 0.28672985781990523, "grad_norm": 15.424578264173912, "learning_rate": 9.526315789473684e-06, "loss": 3.046543598175049, "step": 363 }, { "epoch": 0.28751974723538704, "grad_norm": 9.948975453360674, "learning_rate": 9.552631578947369e-06, "loss": 2.4152588844299316, "step": 364 }, { "epoch": 0.2883096366508689, "grad_norm": 13.617591686696079, "learning_rate": 9.578947368421054e-06, "loss": 2.736586332321167, "step": 365 }, { "epoch": 0.2890995260663507, "grad_norm": 15.14504958752738, "learning_rate": 9.605263157894737e-06, "loss": 2.4989566802978516, "step": 366 }, { "epoch": 0.28988941548183256, "grad_norm": 10.799521972148789, "learning_rate": 9.631578947368422e-06, "loss": 3.044905424118042, "step": 367 }, { "epoch": 0.29067930489731436, "grad_norm": 12.761463042249291, "learning_rate": 9.657894736842106e-06, "loss": 3.2041683197021484, "step": 368 }, { "epoch": 0.2914691943127962, "grad_norm": 12.283682401493921, "learning_rate": 9.68421052631579e-06, "loss": 2.799464225769043, "step": 369 }, { "epoch": 0.292259083728278, "grad_norm": 8.71212269177479, "learning_rate": 9.710526315789474e-06, "loss": 2.7430920600891113, "step": 370 }, { "epoch": 0.2930489731437599, "grad_norm": 9.849085667279017, "learning_rate": 9.736842105263159e-06, "loss": 2.5595669746398926, "step": 371 }, { "epoch": 0.2938388625592417, "grad_norm": 10.009176803122426, "learning_rate": 9.763157894736844e-06, "loss": 2.3133273124694824, "step": 372 }, { "epoch": 0.29462875197472355, "grad_norm": 12.273926407605984, "learning_rate": 9.789473684210527e-06, "loss": 3.021390438079834, "step": 373 }, { "epoch": 0.29541864139020535, "grad_norm": 11.93736332674034, "learning_rate": 9.815789473684212e-06, "loss": 2.2160844802856445, "step": 374 }, { "epoch": 0.2962085308056872, "grad_norm": 12.560377306481286, "learning_rate": 9.842105263157896e-06, "loss": 3.1752209663391113, "step": 375 }, { "epoch": 0.296998420221169, "grad_norm": 11.075691497999232, "learning_rate": 9.868421052631579e-06, "loss": 3.2403969764709473, "step": 376 }, { "epoch": 0.29778830963665087, "grad_norm": 9.127329916365918, "learning_rate": 9.894736842105264e-06, "loss": 2.920243501663208, "step": 377 }, { "epoch": 0.2985781990521327, "grad_norm": 20.657276819664517, "learning_rate": 9.921052631578947e-06, "loss": 2.298133134841919, "step": 378 }, { "epoch": 0.29936808846761453, "grad_norm": 11.374882419716124, "learning_rate": 9.947368421052632e-06, "loss": 3.370616912841797, "step": 379 }, { "epoch": 0.3001579778830964, "grad_norm": 62.541878662922215, "learning_rate": 9.973684210526316e-06, "loss": 2.8304061889648438, "step": 380 }, { "epoch": 0.3009478672985782, "grad_norm": 12.325577555726225, "learning_rate": 1e-05, "loss": 2.761497974395752, "step": 381 }, { "epoch": 0.30173775671406006, "grad_norm": 12.217449721945043, "learning_rate": 9.999997887991768e-06, "loss": 2.8636984825134277, "step": 382 }, { "epoch": 0.30252764612954186, "grad_norm": 14.74250753809572, "learning_rate": 9.999991551968855e-06, "loss": 3.0699851512908936, "step": 383 }, { "epoch": 0.3033175355450237, "grad_norm": 10.26093399476213, "learning_rate": 9.999980991936614e-06, "loss": 3.05659818649292, "step": 384 }, { "epoch": 0.3041074249605055, "grad_norm": 9.938077108356492, "learning_rate": 9.999966207903965e-06, "loss": 2.55549955368042, "step": 385 }, { "epoch": 0.3048973143759874, "grad_norm": 15.044911851031989, "learning_rate": 9.999947199883402e-06, "loss": 3.1818037033081055, "step": 386 }, { "epoch": 0.3056872037914692, "grad_norm": 10.290661955924978, "learning_rate": 9.999923967890976e-06, "loss": 2.8169567584991455, "step": 387 }, { "epoch": 0.30647709320695105, "grad_norm": 10.322447186873362, "learning_rate": 9.999896511946318e-06, "loss": 3.116442918777466, "step": 388 }, { "epoch": 0.30726698262243285, "grad_norm": 11.292349498728926, "learning_rate": 9.999864832072623e-06, "loss": 3.246173858642578, "step": 389 }, { "epoch": 0.3080568720379147, "grad_norm": 15.478127685526808, "learning_rate": 9.999828928296653e-06, "loss": 2.8075480461120605, "step": 390 }, { "epoch": 0.3088467614533965, "grad_norm": 11.742069475711396, "learning_rate": 9.999788800648741e-06, "loss": 2.6594908237457275, "step": 391 }, { "epoch": 0.30963665086887837, "grad_norm": 19.69321922275349, "learning_rate": 9.999744449162785e-06, "loss": 3.7354040145874023, "step": 392 }, { "epoch": 0.3104265402843602, "grad_norm": 29.415130395985084, "learning_rate": 9.999695873876255e-06, "loss": 3.1815226078033447, "step": 393 }, { "epoch": 0.31121642969984203, "grad_norm": 9.941742752266487, "learning_rate": 9.999643074830185e-06, "loss": 2.3940351009368896, "step": 394 }, { "epoch": 0.31200631911532384, "grad_norm": 9.210678237061227, "learning_rate": 9.999586052069184e-06, "loss": 2.998382568359375, "step": 395 }, { "epoch": 0.3127962085308057, "grad_norm": 10.901057718421931, "learning_rate": 9.99952480564142e-06, "loss": 3.067406177520752, "step": 396 }, { "epoch": 0.3135860979462875, "grad_norm": 9.78128771641425, "learning_rate": 9.999459335598639e-06, "loss": 2.52431058883667, "step": 397 }, { "epoch": 0.31437598736176936, "grad_norm": 14.851243022076401, "learning_rate": 9.999389641996148e-06, "loss": 3.1244137287139893, "step": 398 }, { "epoch": 0.31516587677725116, "grad_norm": 14.329659712124773, "learning_rate": 9.999315724892824e-06, "loss": 2.4937219619750977, "step": 399 }, { "epoch": 0.315955766192733, "grad_norm": 20.409817077031228, "learning_rate": 9.999237584351112e-06, "loss": 3.1036581993103027, "step": 400 }, { "epoch": 0.3167456556082148, "grad_norm": 14.805880939701334, "learning_rate": 9.999155220437027e-06, "loss": 3.108419418334961, "step": 401 }, { "epoch": 0.3175355450236967, "grad_norm": 14.542870401516955, "learning_rate": 9.99906863322015e-06, "loss": 2.667811632156372, "step": 402 }, { "epoch": 0.3183254344391785, "grad_norm": 12.987665849474283, "learning_rate": 9.99897782277363e-06, "loss": 2.661196708679199, "step": 403 }, { "epoch": 0.31911532385466035, "grad_norm": 6.198659599172178, "learning_rate": 9.998882789174182e-06, "loss": 2.734131336212158, "step": 404 }, { "epoch": 0.31990521327014215, "grad_norm": 9.512514147413063, "learning_rate": 9.998783532502094e-06, "loss": 3.1050682067871094, "step": 405 }, { "epoch": 0.320695102685624, "grad_norm": 12.163533280198134, "learning_rate": 9.998680052841217e-06, "loss": 3.1977107524871826, "step": 406 }, { "epoch": 0.32148499210110587, "grad_norm": 14.380824912116953, "learning_rate": 9.99857235027897e-06, "loss": 2.267914056777954, "step": 407 }, { "epoch": 0.3222748815165877, "grad_norm": 20.641763401030374, "learning_rate": 9.998460424906343e-06, "loss": 3.1761436462402344, "step": 408 }, { "epoch": 0.32306477093206953, "grad_norm": 13.912380925992013, "learning_rate": 9.998344276817888e-06, "loss": 2.7362403869628906, "step": 409 }, { "epoch": 0.32385466034755134, "grad_norm": 19.47995219931616, "learning_rate": 9.998223906111728e-06, "loss": 2.5798957347869873, "step": 410 }, { "epoch": 0.3246445497630332, "grad_norm": 9.554629411150461, "learning_rate": 9.998099312889553e-06, "loss": 3.0284502506256104, "step": 411 }, { "epoch": 0.325434439178515, "grad_norm": 7.846029378625489, "learning_rate": 9.997970497256619e-06, "loss": 3.201026201248169, "step": 412 }, { "epoch": 0.32622432859399686, "grad_norm": 14.900346297835531, "learning_rate": 9.997837459321753e-06, "loss": 2.7072229385375977, "step": 413 }, { "epoch": 0.32701421800947866, "grad_norm": 13.87299677542047, "learning_rate": 9.997700199197342e-06, "loss": 3.2912824153900146, "step": 414 }, { "epoch": 0.3278041074249605, "grad_norm": 9.946081688834646, "learning_rate": 9.997558716999345e-06, "loss": 2.7330331802368164, "step": 415 }, { "epoch": 0.3285939968404423, "grad_norm": 11.42849151295169, "learning_rate": 9.997413012847288e-06, "loss": 2.7119922637939453, "step": 416 }, { "epoch": 0.3293838862559242, "grad_norm": 7.619660611663469, "learning_rate": 9.997263086864261e-06, "loss": 2.9227335453033447, "step": 417 }, { "epoch": 0.330173775671406, "grad_norm": 8.897091541230195, "learning_rate": 9.997108939176923e-06, "loss": 2.9391653537750244, "step": 418 }, { "epoch": 0.33096366508688785, "grad_norm": 12.48321978059039, "learning_rate": 9.996950569915496e-06, "loss": 2.7242140769958496, "step": 419 }, { "epoch": 0.33175355450236965, "grad_norm": 7.856468794186075, "learning_rate": 9.996787979213774e-06, "loss": 2.9598379135131836, "step": 420 }, { "epoch": 0.3325434439178515, "grad_norm": 6.881298773384626, "learning_rate": 9.996621167209112e-06, "loss": 2.7800540924072266, "step": 421 }, { "epoch": 0.3333333333333333, "grad_norm": 8.554885683292087, "learning_rate": 9.996450134042435e-06, "loss": 3.363186836242676, "step": 422 }, { "epoch": 0.3341232227488152, "grad_norm": 17.067634709237346, "learning_rate": 9.99627487985823e-06, "loss": 2.5413265228271484, "step": 423 }, { "epoch": 0.334913112164297, "grad_norm": 7.715891787574891, "learning_rate": 9.996095404804556e-06, "loss": 2.6663031578063965, "step": 424 }, { "epoch": 0.33570300157977884, "grad_norm": 12.33434757520458, "learning_rate": 9.99591170903303e-06, "loss": 3.798855781555176, "step": 425 }, { "epoch": 0.33649289099526064, "grad_norm": 12.051508519336322, "learning_rate": 9.995723792698841e-06, "loss": 2.8377747535705566, "step": 426 }, { "epoch": 0.3372827804107425, "grad_norm": 15.639973760945276, "learning_rate": 9.995531655960742e-06, "loss": 2.916019916534424, "step": 427 }, { "epoch": 0.3380726698262243, "grad_norm": 19.40003707121016, "learning_rate": 9.995335298981051e-06, "loss": 3.0988028049468994, "step": 428 }, { "epoch": 0.33886255924170616, "grad_norm": 8.979641220891013, "learning_rate": 9.995134721925647e-06, "loss": 3.1492342948913574, "step": 429 }, { "epoch": 0.33965244865718797, "grad_norm": 21.875396313167574, "learning_rate": 9.994929924963982e-06, "loss": 2.8157858848571777, "step": 430 }, { "epoch": 0.3404423380726698, "grad_norm": 10.410663939939454, "learning_rate": 9.99472090826907e-06, "loss": 2.898981809616089, "step": 431 }, { "epoch": 0.3412322274881517, "grad_norm": 7.870464638075447, "learning_rate": 9.994507672017483e-06, "loss": 2.815958023071289, "step": 432 }, { "epoch": 0.3420221169036335, "grad_norm": 10.074834498515466, "learning_rate": 9.99429021638937e-06, "loss": 2.8587734699249268, "step": 433 }, { "epoch": 0.34281200631911535, "grad_norm": 9.479047537805325, "learning_rate": 9.994068541568439e-06, "loss": 2.758235216140747, "step": 434 }, { "epoch": 0.34360189573459715, "grad_norm": 11.805213339073592, "learning_rate": 9.993842647741955e-06, "loss": 2.754237174987793, "step": 435 }, { "epoch": 0.344391785150079, "grad_norm": 28.89113952525437, "learning_rate": 9.993612535100759e-06, "loss": 2.8837943077087402, "step": 436 }, { "epoch": 0.3451816745655608, "grad_norm": 11.902001596459298, "learning_rate": 9.993378203839248e-06, "loss": 2.9341726303100586, "step": 437 }, { "epoch": 0.3459715639810427, "grad_norm": 41.81687674626728, "learning_rate": 9.99313965415539e-06, "loss": 2.7734274864196777, "step": 438 }, { "epoch": 0.3467614533965245, "grad_norm": 24.406866447867205, "learning_rate": 9.992896886250708e-06, "loss": 3.654956817626953, "step": 439 }, { "epoch": 0.34755134281200634, "grad_norm": 9.751754855929297, "learning_rate": 9.992649900330295e-06, "loss": 2.4842309951782227, "step": 440 }, { "epoch": 0.34834123222748814, "grad_norm": 9.90576591780518, "learning_rate": 9.992398696602805e-06, "loss": 2.8286516666412354, "step": 441 }, { "epoch": 0.34913112164297, "grad_norm": 15.419437091625719, "learning_rate": 9.992143275280458e-06, "loss": 2.875483989715576, "step": 442 }, { "epoch": 0.3499210110584518, "grad_norm": 19.78870286772306, "learning_rate": 9.991883636579031e-06, "loss": 2.9084625244140625, "step": 443 }, { "epoch": 0.35071090047393366, "grad_norm": 12.586490948950289, "learning_rate": 9.99161978071787e-06, "loss": 3.2318763732910156, "step": 444 }, { "epoch": 0.35150078988941547, "grad_norm": 14.996840973011029, "learning_rate": 9.991351707919881e-06, "loss": 2.615654468536377, "step": 445 }, { "epoch": 0.3522906793048973, "grad_norm": 18.69675800078161, "learning_rate": 9.991079418411534e-06, "loss": 2.594451427459717, "step": 446 }, { "epoch": 0.35308056872037913, "grad_norm": 12.66669407216749, "learning_rate": 9.990802912422857e-06, "loss": 2.9762067794799805, "step": 447 }, { "epoch": 0.353870458135861, "grad_norm": 6.37331509167863, "learning_rate": 9.990522190187446e-06, "loss": 2.9721593856811523, "step": 448 }, { "epoch": 0.3546603475513428, "grad_norm": 12.731115361238068, "learning_rate": 9.990237251942455e-06, "loss": 3.0602238178253174, "step": 449 }, { "epoch": 0.35545023696682465, "grad_norm": 10.595337761744732, "learning_rate": 9.9899480979286e-06, "loss": 2.5146780014038086, "step": 450 }, { "epoch": 0.35624012638230645, "grad_norm": 9.131974465956034, "learning_rate": 9.98965472839016e-06, "loss": 2.864349842071533, "step": 451 }, { "epoch": 0.3570300157977883, "grad_norm": 9.313334991197689, "learning_rate": 9.989357143574977e-06, "loss": 3.1569554805755615, "step": 452 }, { "epoch": 0.3578199052132701, "grad_norm": 9.952415070876715, "learning_rate": 9.989055343734446e-06, "loss": 2.114063262939453, "step": 453 }, { "epoch": 0.358609794628752, "grad_norm": 8.442614576050127, "learning_rate": 9.988749329123532e-06, "loss": 2.6696736812591553, "step": 454 }, { "epoch": 0.3593996840442338, "grad_norm": 12.468144651927648, "learning_rate": 9.988439100000758e-06, "loss": 2.951082468032837, "step": 455 }, { "epoch": 0.36018957345971564, "grad_norm": 13.301749174192217, "learning_rate": 9.988124656628205e-06, "loss": 2.8643898963928223, "step": 456 }, { "epoch": 0.3609794628751975, "grad_norm": 51.815990840364165, "learning_rate": 9.987805999271517e-06, "loss": 2.082789421081543, "step": 457 }, { "epoch": 0.3617693522906793, "grad_norm": 14.907621177805696, "learning_rate": 9.987483128199896e-06, "loss": 2.7254719734191895, "step": 458 }, { "epoch": 0.36255924170616116, "grad_norm": 11.168139294706775, "learning_rate": 9.987156043686103e-06, "loss": 2.920536994934082, "step": 459 }, { "epoch": 0.36334913112164297, "grad_norm": 14.696725014468187, "learning_rate": 9.986824746006463e-06, "loss": 2.5195441246032715, "step": 460 }, { "epoch": 0.3641390205371248, "grad_norm": 13.759499455594858, "learning_rate": 9.986489235440858e-06, "loss": 3.0576400756835938, "step": 461 }, { "epoch": 0.36492890995260663, "grad_norm": 24.22268015695568, "learning_rate": 9.986149512272723e-06, "loss": 2.798210382461548, "step": 462 }, { "epoch": 0.3657187993680885, "grad_norm": 7.164355232959107, "learning_rate": 9.985805576789061e-06, "loss": 3.2072739601135254, "step": 463 }, { "epoch": 0.3665086887835703, "grad_norm": 21.200711435446056, "learning_rate": 9.985457429280431e-06, "loss": 2.2786130905151367, "step": 464 }, { "epoch": 0.36729857819905215, "grad_norm": 10.245055778605597, "learning_rate": 9.985105070040948e-06, "loss": 2.6123218536376953, "step": 465 }, { "epoch": 0.36808846761453395, "grad_norm": 11.013361642571633, "learning_rate": 9.984748499368285e-06, "loss": 2.8954989910125732, "step": 466 }, { "epoch": 0.3688783570300158, "grad_norm": 11.61804983156601, "learning_rate": 9.984387717563675e-06, "loss": 2.308267116546631, "step": 467 }, { "epoch": 0.3696682464454976, "grad_norm": 19.221327600533158, "learning_rate": 9.984022724931908e-06, "loss": 3.475597381591797, "step": 468 }, { "epoch": 0.3704581358609795, "grad_norm": 11.854231560542186, "learning_rate": 9.98365352178133e-06, "loss": 3.1217355728149414, "step": 469 }, { "epoch": 0.3712480252764613, "grad_norm": 6.403968704450684, "learning_rate": 9.983280108423846e-06, "loss": 3.116569995880127, "step": 470 }, { "epoch": 0.37203791469194314, "grad_norm": 9.683074091826104, "learning_rate": 9.982902485174917e-06, "loss": 2.748913526535034, "step": 471 }, { "epoch": 0.37282780410742494, "grad_norm": 45.29403837946451, "learning_rate": 9.98252065235356e-06, "loss": 2.734304428100586, "step": 472 }, { "epoch": 0.3736176935229068, "grad_norm": 8.629169803711216, "learning_rate": 9.982134610282348e-06, "loss": 2.9549429416656494, "step": 473 }, { "epoch": 0.3744075829383886, "grad_norm": 11.378526346658687, "learning_rate": 9.98174435928741e-06, "loss": 3.1600706577301025, "step": 474 }, { "epoch": 0.37519747235387046, "grad_norm": 13.877969372280013, "learning_rate": 9.981349899698433e-06, "loss": 3.0187835693359375, "step": 475 }, { "epoch": 0.37598736176935227, "grad_norm": 16.082987857555008, "learning_rate": 9.98095123184866e-06, "loss": 2.525953769683838, "step": 476 }, { "epoch": 0.3767772511848341, "grad_norm": 15.97178916944899, "learning_rate": 9.980548356074882e-06, "loss": 2.4600391387939453, "step": 477 }, { "epoch": 0.37756714060031593, "grad_norm": 11.222135044819899, "learning_rate": 9.980141272717451e-06, "loss": 3.0577778816223145, "step": 478 }, { "epoch": 0.3783570300157978, "grad_norm": 10.38398589216634, "learning_rate": 9.979729982120274e-06, "loss": 2.5604796409606934, "step": 479 }, { "epoch": 0.3791469194312796, "grad_norm": 28.714703496966944, "learning_rate": 9.979314484630812e-06, "loss": 2.344428300857544, "step": 480 }, { "epoch": 0.37993680884676145, "grad_norm": 8.421863202990965, "learning_rate": 9.978894780600072e-06, "loss": 2.434558391571045, "step": 481 }, { "epoch": 0.3807266982622433, "grad_norm": 8.802465948474383, "learning_rate": 9.978470870382631e-06, "loss": 2.9265592098236084, "step": 482 }, { "epoch": 0.3815165876777251, "grad_norm": 16.360909810195515, "learning_rate": 9.9780427543366e-06, "loss": 2.2389657497406006, "step": 483 }, { "epoch": 0.382306477093207, "grad_norm": 12.714530836347008, "learning_rate": 9.977610432823661e-06, "loss": 2.518057346343994, "step": 484 }, { "epoch": 0.3830963665086888, "grad_norm": 12.593833443055948, "learning_rate": 9.977173906209035e-06, "loss": 2.7258565425872803, "step": 485 }, { "epoch": 0.38388625592417064, "grad_norm": 12.935614043323996, "learning_rate": 9.976733174861504e-06, "loss": 2.57004451751709, "step": 486 }, { "epoch": 0.38467614533965244, "grad_norm": 6.953226645670275, "learning_rate": 9.9762882391534e-06, "loss": 2.808042526245117, "step": 487 }, { "epoch": 0.3854660347551343, "grad_norm": 7.174411200974892, "learning_rate": 9.975839099460603e-06, "loss": 2.4963083267211914, "step": 488 }, { "epoch": 0.3862559241706161, "grad_norm": 8.613913754322418, "learning_rate": 9.97538575616255e-06, "loss": 2.676271438598633, "step": 489 }, { "epoch": 0.38704581358609796, "grad_norm": 11.876597542420834, "learning_rate": 9.974928209642225e-06, "loss": 2.7614307403564453, "step": 490 }, { "epoch": 0.38783570300157977, "grad_norm": 11.093132079984253, "learning_rate": 9.974466460286168e-06, "loss": 2.624708414077759, "step": 491 }, { "epoch": 0.3886255924170616, "grad_norm": 10.27538264079467, "learning_rate": 9.974000508484464e-06, "loss": 2.5740928649902344, "step": 492 }, { "epoch": 0.38941548183254343, "grad_norm": 12.05113290396417, "learning_rate": 9.97353035463075e-06, "loss": 3.132807731628418, "step": 493 }, { "epoch": 0.3902053712480253, "grad_norm": 10.616071459580557, "learning_rate": 9.973055999122217e-06, "loss": 3.1886236667633057, "step": 494 }, { "epoch": 0.3909952606635071, "grad_norm": 7.166433394997447, "learning_rate": 9.972577442359596e-06, "loss": 2.458066463470459, "step": 495 }, { "epoch": 0.39178515007898895, "grad_norm": 18.273152599081556, "learning_rate": 9.97209468474718e-06, "loss": 2.686516761779785, "step": 496 }, { "epoch": 0.39257503949447076, "grad_norm": 18.046414595846677, "learning_rate": 9.9716077266928e-06, "loss": 3.268564224243164, "step": 497 }, { "epoch": 0.3933649289099526, "grad_norm": 20.306060211042794, "learning_rate": 9.971116568607843e-06, "loss": 2.7214527130126953, "step": 498 }, { "epoch": 0.3941548183254344, "grad_norm": 12.50174696099143, "learning_rate": 9.970621210907236e-06, "loss": 2.9584507942199707, "step": 499 }, { "epoch": 0.3949447077409163, "grad_norm": 12.497953323135684, "learning_rate": 9.970121654009464e-06, "loss": 2.7275800704956055, "step": 500 }, { "epoch": 0.3957345971563981, "grad_norm": 8.96830588462741, "learning_rate": 9.969617898336552e-06, "loss": 2.4311466217041016, "step": 501 }, { "epoch": 0.39652448657187994, "grad_norm": 13.616711842115256, "learning_rate": 9.969109944314075e-06, "loss": 2.9500246047973633, "step": 502 }, { "epoch": 0.39731437598736175, "grad_norm": 17.251138991228274, "learning_rate": 9.968597792371151e-06, "loss": 3.416146755218506, "step": 503 }, { "epoch": 0.3981042654028436, "grad_norm": 19.00639226186363, "learning_rate": 9.968081442940454e-06, "loss": 3.451007604598999, "step": 504 }, { "epoch": 0.3988941548183254, "grad_norm": 7.711636407435598, "learning_rate": 9.967560896458192e-06, "loss": 2.7228689193725586, "step": 505 }, { "epoch": 0.39968404423380727, "grad_norm": 12.583460827994319, "learning_rate": 9.967036153364127e-06, "loss": 2.8506970405578613, "step": 506 }, { "epoch": 0.4004739336492891, "grad_norm": 12.221260607836053, "learning_rate": 9.966507214101565e-06, "loss": 2.9692885875701904, "step": 507 }, { "epoch": 0.40126382306477093, "grad_norm": 12.905892162067822, "learning_rate": 9.965974079117351e-06, "loss": 3.444052219390869, "step": 508 }, { "epoch": 0.4020537124802528, "grad_norm": 11.169864619254174, "learning_rate": 9.965436748861883e-06, "loss": 3.00361967086792, "step": 509 }, { "epoch": 0.4028436018957346, "grad_norm": 18.923855282243036, "learning_rate": 9.9648952237891e-06, "loss": 2.179131507873535, "step": 510 }, { "epoch": 0.40363349131121645, "grad_norm": 17.18325282035086, "learning_rate": 9.964349504356481e-06, "loss": 2.724170446395874, "step": 511 }, { "epoch": 0.40442338072669826, "grad_norm": 9.348305791417955, "learning_rate": 9.963799591025054e-06, "loss": 2.658226490020752, "step": 512 }, { "epoch": 0.4052132701421801, "grad_norm": 14.084370825683834, "learning_rate": 9.963245484259384e-06, "loss": 3.301179885864258, "step": 513 }, { "epoch": 0.4060031595576619, "grad_norm": 9.131570579938595, "learning_rate": 9.96268718452759e-06, "loss": 2.7031455039978027, "step": 514 }, { "epoch": 0.4067930489731438, "grad_norm": 7.293530938243252, "learning_rate": 9.962124692301315e-06, "loss": 2.310668468475342, "step": 515 }, { "epoch": 0.4075829383886256, "grad_norm": 28.464034341123952, "learning_rate": 9.961558008055764e-06, "loss": 2.6063344478607178, "step": 516 }, { "epoch": 0.40837282780410744, "grad_norm": 7.628841921327853, "learning_rate": 9.960987132269668e-06, "loss": 2.0414226055145264, "step": 517 }, { "epoch": 0.40916271721958924, "grad_norm": 14.44432111132087, "learning_rate": 9.960412065425308e-06, "loss": 2.770200252532959, "step": 518 }, { "epoch": 0.4099526066350711, "grad_norm": 11.901956808416232, "learning_rate": 9.959832808008498e-06, "loss": 2.8997509479522705, "step": 519 }, { "epoch": 0.4107424960505529, "grad_norm": 14.40790924911134, "learning_rate": 9.959249360508598e-06, "loss": 2.9758782386779785, "step": 520 }, { "epoch": 0.41153238546603477, "grad_norm": 9.725811994478915, "learning_rate": 9.95866172341851e-06, "loss": 2.986323356628418, "step": 521 }, { "epoch": 0.41232227488151657, "grad_norm": 16.22979712148631, "learning_rate": 9.95806989723467e-06, "loss": 2.5895464420318604, "step": 522 }, { "epoch": 0.41311216429699843, "grad_norm": 13.923489867833995, "learning_rate": 9.957473882457051e-06, "loss": 2.687991142272949, "step": 523 }, { "epoch": 0.41390205371248023, "grad_norm": 26.95251995610371, "learning_rate": 9.956873679589173e-06, "loss": 2.4715166091918945, "step": 524 }, { "epoch": 0.4146919431279621, "grad_norm": 14.00634199685074, "learning_rate": 9.956269289138088e-06, "loss": 2.5624163150787354, "step": 525 }, { "epoch": 0.4154818325434439, "grad_norm": 29.26485590244888, "learning_rate": 9.955660711614386e-06, "loss": 2.6949751377105713, "step": 526 }, { "epoch": 0.41627172195892576, "grad_norm": 13.78447550231186, "learning_rate": 9.955047947532194e-06, "loss": 3.0492568016052246, "step": 527 }, { "epoch": 0.41706161137440756, "grad_norm": 9.027477437625507, "learning_rate": 9.954430997409181e-06, "loss": 3.8118910789489746, "step": 528 }, { "epoch": 0.4178515007898894, "grad_norm": 8.039786133247507, "learning_rate": 9.953809861766547e-06, "loss": 3.058897018432617, "step": 529 }, { "epoch": 0.4186413902053712, "grad_norm": 16.995294036547257, "learning_rate": 9.953184541129029e-06, "loss": 3.157442808151245, "step": 530 }, { "epoch": 0.4194312796208531, "grad_norm": 14.009340053986007, "learning_rate": 9.952555036024898e-06, "loss": 2.9258034229278564, "step": 531 }, { "epoch": 0.42022116903633494, "grad_norm": 12.583256188813682, "learning_rate": 9.951921346985966e-06, "loss": 2.772176742553711, "step": 532 }, { "epoch": 0.42101105845181674, "grad_norm": 14.778680678932332, "learning_rate": 9.951283474547574e-06, "loss": 3.1442911624908447, "step": 533 }, { "epoch": 0.4218009478672986, "grad_norm": 10.191686358833875, "learning_rate": 9.950641419248595e-06, "loss": 2.6074397563934326, "step": 534 }, { "epoch": 0.4225908372827804, "grad_norm": 13.596508027495249, "learning_rate": 9.949995181631444e-06, "loss": 2.861325740814209, "step": 535 }, { "epoch": 0.42338072669826227, "grad_norm": 16.87570821527581, "learning_rate": 9.949344762242064e-06, "loss": 2.9847991466522217, "step": 536 }, { "epoch": 0.42417061611374407, "grad_norm": 15.098978095560872, "learning_rate": 9.94869016162993e-06, "loss": 3.360105037689209, "step": 537 }, { "epoch": 0.42496050552922593, "grad_norm": 11.916401679971019, "learning_rate": 9.948031380348051e-06, "loss": 2.6311533451080322, "step": 538 }, { "epoch": 0.42575039494470773, "grad_norm": 15.393668192114527, "learning_rate": 9.94736841895297e-06, "loss": 2.572305202484131, "step": 539 }, { "epoch": 0.4265402843601896, "grad_norm": 21.88619737730292, "learning_rate": 9.946701278004755e-06, "loss": 2.6645431518554688, "step": 540 }, { "epoch": 0.4273301737756714, "grad_norm": 14.623832761629139, "learning_rate": 9.946029958067012e-06, "loss": 2.8375582695007324, "step": 541 }, { "epoch": 0.42812006319115326, "grad_norm": 11.959588905376497, "learning_rate": 9.945354459706873e-06, "loss": 2.8177828788757324, "step": 542 }, { "epoch": 0.42890995260663506, "grad_norm": 11.801706214734535, "learning_rate": 9.944674783495e-06, "loss": 3.4021530151367188, "step": 543 }, { "epoch": 0.4296998420221169, "grad_norm": 8.145218059279367, "learning_rate": 9.94399093000559e-06, "loss": 2.4974822998046875, "step": 544 }, { "epoch": 0.4304897314375987, "grad_norm": 9.708825005813761, "learning_rate": 9.94330289981636e-06, "loss": 2.775845527648926, "step": 545 }, { "epoch": 0.4312796208530806, "grad_norm": 22.963925015075688, "learning_rate": 9.942610693508564e-06, "loss": 2.5559940338134766, "step": 546 }, { "epoch": 0.4320695102685624, "grad_norm": 9.353349856037912, "learning_rate": 9.941914311666976e-06, "loss": 2.7999205589294434, "step": 547 }, { "epoch": 0.43285939968404424, "grad_norm": 13.107963046441109, "learning_rate": 9.941213754879904e-06, "loss": 2.6367478370666504, "step": 548 }, { "epoch": 0.43364928909952605, "grad_norm": 9.146316137469308, "learning_rate": 9.940509023739181e-06, "loss": 2.6994175910949707, "step": 549 }, { "epoch": 0.4344391785150079, "grad_norm": 6.883523518701926, "learning_rate": 9.939800118840167e-06, "loss": 2.807130813598633, "step": 550 }, { "epoch": 0.4352290679304897, "grad_norm": 9.612172104441717, "learning_rate": 9.939087040781743e-06, "loss": 2.729193687438965, "step": 551 }, { "epoch": 0.43601895734597157, "grad_norm": 24.67008747020927, "learning_rate": 9.938369790166325e-06, "loss": 2.557534694671631, "step": 552 }, { "epoch": 0.4368088467614534, "grad_norm": 8.054347498191312, "learning_rate": 9.937648367599845e-06, "loss": 2.8205268383026123, "step": 553 }, { "epoch": 0.43759873617693523, "grad_norm": 14.422963472461976, "learning_rate": 9.936922773691764e-06, "loss": 2.7715141773223877, "step": 554 }, { "epoch": 0.43838862559241704, "grad_norm": 7.484053892132038, "learning_rate": 9.93619300905507e-06, "loss": 2.254258155822754, "step": 555 }, { "epoch": 0.4391785150078989, "grad_norm": 11.183683079441906, "learning_rate": 9.935459074306261e-06, "loss": 2.682985544204712, "step": 556 }, { "epoch": 0.4399684044233807, "grad_norm": 9.16127826816926, "learning_rate": 9.934720970065379e-06, "loss": 2.539468288421631, "step": 557 }, { "epoch": 0.44075829383886256, "grad_norm": 7.141623413765421, "learning_rate": 9.93397869695597e-06, "loss": 2.5426435470581055, "step": 558 }, { "epoch": 0.4415481832543444, "grad_norm": 10.23954487534604, "learning_rate": 9.93323225560511e-06, "loss": 3.2652475833892822, "step": 559 }, { "epoch": 0.4423380726698262, "grad_norm": 16.167362326761815, "learning_rate": 9.932481646643395e-06, "loss": 2.5560061931610107, "step": 560 }, { "epoch": 0.4431279620853081, "grad_norm": 24.50508286675025, "learning_rate": 9.931726870704943e-06, "loss": 2.4910902976989746, "step": 561 }, { "epoch": 0.4439178515007899, "grad_norm": 13.272899493408493, "learning_rate": 9.930967928427389e-06, "loss": 3.1928012371063232, "step": 562 }, { "epoch": 0.44470774091627174, "grad_norm": 11.029176665335946, "learning_rate": 9.930204820451892e-06, "loss": 2.045280933380127, "step": 563 }, { "epoch": 0.44549763033175355, "grad_norm": 19.40400206586639, "learning_rate": 9.92943754742313e-06, "loss": 2.734166145324707, "step": 564 }, { "epoch": 0.4462875197472354, "grad_norm": 7.706073509198339, "learning_rate": 9.928666109989294e-06, "loss": 2.8022024631500244, "step": 565 }, { "epoch": 0.4470774091627172, "grad_norm": 14.62203101954205, "learning_rate": 9.927890508802096e-06, "loss": 2.7379016876220703, "step": 566 }, { "epoch": 0.44786729857819907, "grad_norm": 17.181389624231375, "learning_rate": 9.92711074451677e-06, "loss": 2.999567985534668, "step": 567 }, { "epoch": 0.4486571879936809, "grad_norm": 14.992846352483394, "learning_rate": 9.926326817792065e-06, "loss": 2.635314464569092, "step": 568 }, { "epoch": 0.44944707740916273, "grad_norm": 7.6279407925730816, "learning_rate": 9.925538729290239e-06, "loss": 2.3740317821502686, "step": 569 }, { "epoch": 0.45023696682464454, "grad_norm": 15.752498964379894, "learning_rate": 9.924746479677075e-06, "loss": 2.9476394653320312, "step": 570 }, { "epoch": 0.4510268562401264, "grad_norm": 7.93326356210501, "learning_rate": 9.923950069621868e-06, "loss": 3.3303630352020264, "step": 571 }, { "epoch": 0.4518167456556082, "grad_norm": 29.90614936882604, "learning_rate": 9.923149499797429e-06, "loss": 3.194509267807007, "step": 572 }, { "epoch": 0.45260663507109006, "grad_norm": 13.371064507421321, "learning_rate": 9.92234477088008e-06, "loss": 2.94869327545166, "step": 573 }, { "epoch": 0.45339652448657186, "grad_norm": 15.897418580785546, "learning_rate": 9.921535883549658e-06, "loss": 2.7056546211242676, "step": 574 }, { "epoch": 0.4541864139020537, "grad_norm": 26.124918845816804, "learning_rate": 9.920722838489515e-06, "loss": 3.060375452041626, "step": 575 }, { "epoch": 0.4549763033175355, "grad_norm": 18.882545145062025, "learning_rate": 9.919905636386516e-06, "loss": 3.0005345344543457, "step": 576 }, { "epoch": 0.4557661927330174, "grad_norm": 13.37328310128464, "learning_rate": 9.919084277931033e-06, "loss": 2.5772323608398438, "step": 577 }, { "epoch": 0.4565560821484992, "grad_norm": 17.390743534176387, "learning_rate": 9.918258763816954e-06, "loss": 3.439105749130249, "step": 578 }, { "epoch": 0.45734597156398105, "grad_norm": 13.795578608418804, "learning_rate": 9.917429094741676e-06, "loss": 2.9797146320343018, "step": 579 }, { "epoch": 0.45813586097946285, "grad_norm": 14.581579263670818, "learning_rate": 9.916595271406104e-06, "loss": 3.3291659355163574, "step": 580 }, { "epoch": 0.4589257503949447, "grad_norm": 23.055562889090734, "learning_rate": 9.915757294514658e-06, "loss": 3.4787819385528564, "step": 581 }, { "epoch": 0.4597156398104265, "grad_norm": 11.454956025296493, "learning_rate": 9.91491516477526e-06, "loss": 2.543201446533203, "step": 582 }, { "epoch": 0.46050552922590837, "grad_norm": 27.44348974944536, "learning_rate": 9.91406888289935e-06, "loss": 3.8477420806884766, "step": 583 }, { "epoch": 0.46129541864139023, "grad_norm": 13.748326155704126, "learning_rate": 9.913218449601862e-06, "loss": 3.204080581665039, "step": 584 }, { "epoch": 0.46208530805687204, "grad_norm": 14.852830560317845, "learning_rate": 9.912363865601252e-06, "loss": 2.0701780319213867, "step": 585 }, { "epoch": 0.4628751974723539, "grad_norm": 9.286607736299267, "learning_rate": 9.911505131619467e-06, "loss": 2.616168975830078, "step": 586 }, { "epoch": 0.4636650868878357, "grad_norm": 27.1312162902194, "learning_rate": 9.910642248381978e-06, "loss": 3.5931811332702637, "step": 587 }, { "epoch": 0.46445497630331756, "grad_norm": 32.41215406453979, "learning_rate": 9.909775216617746e-06, "loss": 2.6403136253356934, "step": 588 }, { "epoch": 0.46524486571879936, "grad_norm": 16.462651257709005, "learning_rate": 9.908904037059242e-06, "loss": 1.806509017944336, "step": 589 }, { "epoch": 0.4660347551342812, "grad_norm": 8.054217161807381, "learning_rate": 9.908028710442443e-06, "loss": 2.955305337905884, "step": 590 }, { "epoch": 0.466824644549763, "grad_norm": 13.079572202568693, "learning_rate": 9.907149237506825e-06, "loss": 3.071561098098755, "step": 591 }, { "epoch": 0.4676145339652449, "grad_norm": 17.039788808514484, "learning_rate": 9.906265618995375e-06, "loss": 3.0196356773376465, "step": 592 }, { "epoch": 0.4684044233807267, "grad_norm": 13.364691523127064, "learning_rate": 9.905377855654574e-06, "loss": 2.70352840423584, "step": 593 }, { "epoch": 0.46919431279620855, "grad_norm": 10.365123705943594, "learning_rate": 9.904485948234406e-06, "loss": 2.782586097717285, "step": 594 }, { "epoch": 0.46998420221169035, "grad_norm": 17.914445810601254, "learning_rate": 9.903589897488358e-06, "loss": 3.854835271835327, "step": 595 }, { "epoch": 0.4707740916271722, "grad_norm": 14.706414319504669, "learning_rate": 9.902689704173418e-06, "loss": 3.0983946323394775, "step": 596 }, { "epoch": 0.471563981042654, "grad_norm": 11.041528398876812, "learning_rate": 9.901785369050073e-06, "loss": 3.148883819580078, "step": 597 }, { "epoch": 0.47235387045813587, "grad_norm": 12.764570521775047, "learning_rate": 9.900876892882303e-06, "loss": 3.0621113777160645, "step": 598 }, { "epoch": 0.4731437598736177, "grad_norm": 12.917586958199212, "learning_rate": 9.899964276437596e-06, "loss": 2.7622828483581543, "step": 599 }, { "epoch": 0.47393364928909953, "grad_norm": 11.372886554785106, "learning_rate": 9.899047520486935e-06, "loss": 2.379685878753662, "step": 600 }, { "epoch": 0.47472353870458134, "grad_norm": 8.25416016120882, "learning_rate": 9.898126625804796e-06, "loss": 2.2554409503936768, "step": 601 }, { "epoch": 0.4755134281200632, "grad_norm": 13.238723315824029, "learning_rate": 9.897201593169153e-06, "loss": 2.7117209434509277, "step": 602 }, { "epoch": 0.476303317535545, "grad_norm": 11.785948359638766, "learning_rate": 9.896272423361479e-06, "loss": 2.219001531600952, "step": 603 }, { "epoch": 0.47709320695102686, "grad_norm": 16.426783052438104, "learning_rate": 9.895339117166737e-06, "loss": 3.105238199234009, "step": 604 }, { "epoch": 0.47788309636650866, "grad_norm": 9.983370358512682, "learning_rate": 9.894401675373388e-06, "loss": 2.7501213550567627, "step": 605 }, { "epoch": 0.4786729857819905, "grad_norm": 11.613745339741977, "learning_rate": 9.89346009877339e-06, "loss": 3.157028913497925, "step": 606 }, { "epoch": 0.4794628751974723, "grad_norm": 16.033920286391126, "learning_rate": 9.892514388162183e-06, "loss": 2.930591106414795, "step": 607 }, { "epoch": 0.4802527646129542, "grad_norm": 26.306838660431477, "learning_rate": 9.89156454433871e-06, "loss": 2.365173816680908, "step": 608 }, { "epoch": 0.48104265402843605, "grad_norm": 8.551782014277038, "learning_rate": 9.890610568105401e-06, "loss": 2.737978935241699, "step": 609 }, { "epoch": 0.48183254344391785, "grad_norm": 6.191554963386605, "learning_rate": 9.889652460268183e-06, "loss": 1.0541880130767822, "step": 610 }, { "epoch": 0.4826224328593997, "grad_norm": 10.343101459770976, "learning_rate": 9.888690221636462e-06, "loss": 3.482835054397583, "step": 611 }, { "epoch": 0.4834123222748815, "grad_norm": 7.536217244705736, "learning_rate": 9.887723853023144e-06, "loss": 2.714404582977295, "step": 612 }, { "epoch": 0.48420221169036337, "grad_norm": 11.909381848673933, "learning_rate": 9.88675335524462e-06, "loss": 2.518251895904541, "step": 613 }, { "epoch": 0.4849921011058452, "grad_norm": 19.55484435219003, "learning_rate": 9.885778729120771e-06, "loss": 3.3546159267425537, "step": 614 }, { "epoch": 0.48578199052132703, "grad_norm": 13.61585851649587, "learning_rate": 9.884799975474961e-06, "loss": 3.282747745513916, "step": 615 }, { "epoch": 0.48657187993680884, "grad_norm": 7.245085293733777, "learning_rate": 9.883817095134048e-06, "loss": 2.7314577102661133, "step": 616 }, { "epoch": 0.4873617693522907, "grad_norm": 9.753488601463083, "learning_rate": 9.882830088928368e-06, "loss": 2.8968541622161865, "step": 617 }, { "epoch": 0.4881516587677725, "grad_norm": 10.570209792818952, "learning_rate": 9.881838957691752e-06, "loss": 2.766514539718628, "step": 618 }, { "epoch": 0.48894154818325436, "grad_norm": 10.820407163130954, "learning_rate": 9.880843702261506e-06, "loss": 2.4016904830932617, "step": 619 }, { "epoch": 0.48973143759873616, "grad_norm": 11.038883967400233, "learning_rate": 9.87984432347843e-06, "loss": 2.8720149993896484, "step": 620 }, { "epoch": 0.490521327014218, "grad_norm": 11.813323160870127, "learning_rate": 9.8788408221868e-06, "loss": 2.9305214881896973, "step": 621 }, { "epoch": 0.4913112164296998, "grad_norm": 11.366049566856011, "learning_rate": 9.877833199234378e-06, "loss": 2.9653875827789307, "step": 622 }, { "epoch": 0.4921011058451817, "grad_norm": 12.438353507803086, "learning_rate": 9.876821455472405e-06, "loss": 2.3867058753967285, "step": 623 }, { "epoch": 0.4928909952606635, "grad_norm": 8.498213125601112, "learning_rate": 9.875805591755608e-06, "loss": 3.2036352157592773, "step": 624 }, { "epoch": 0.49368088467614535, "grad_norm": 16.30479309681846, "learning_rate": 9.874785608942192e-06, "loss": 3.305636167526245, "step": 625 }, { "epoch": 0.49447077409162715, "grad_norm": 10.081140854214283, "learning_rate": 9.87376150789384e-06, "loss": 3.041412353515625, "step": 626 }, { "epoch": 0.495260663507109, "grad_norm": 64.71900103986658, "learning_rate": 9.872733289475717e-06, "loss": 4.324435710906982, "step": 627 }, { "epoch": 0.4960505529225908, "grad_norm": 11.539415904080359, "learning_rate": 9.871700954556464e-06, "loss": 3.1219332218170166, "step": 628 }, { "epoch": 0.4968404423380727, "grad_norm": 16.008989800922443, "learning_rate": 9.870664504008205e-06, "loss": 2.5995893478393555, "step": 629 }, { "epoch": 0.4976303317535545, "grad_norm": 11.383945681171209, "learning_rate": 9.869623938706531e-06, "loss": 2.9473705291748047, "step": 630 }, { "epoch": 0.49842022116903634, "grad_norm": 14.730134044674989, "learning_rate": 9.868579259530519e-06, "loss": 3.243873357772827, "step": 631 }, { "epoch": 0.49921011058451814, "grad_norm": 8.390549227817651, "learning_rate": 9.867530467362718e-06, "loss": 2.4504904747009277, "step": 632 }, { "epoch": 0.5, "grad_norm": 8.424703770575638, "learning_rate": 9.866477563089148e-06, "loss": 2.6318535804748535, "step": 633 }, { "epoch": 0.5007898894154819, "grad_norm": 9.38333524987037, "learning_rate": 9.865420547599308e-06, "loss": 2.7379918098449707, "step": 634 }, { "epoch": 0.5015797788309637, "grad_norm": 9.704149060081368, "learning_rate": 9.864359421786168e-06, "loss": 2.304293632507324, "step": 635 }, { "epoch": 0.5023696682464455, "grad_norm": 24.08311236031377, "learning_rate": 9.863294186546168e-06, "loss": 2.332653522491455, "step": 636 }, { "epoch": 0.5031595576619273, "grad_norm": 16.210594683251617, "learning_rate": 9.862224842779225e-06, "loss": 3.0899691581726074, "step": 637 }, { "epoch": 0.5039494470774092, "grad_norm": 14.171269286760387, "learning_rate": 9.861151391388726e-06, "loss": 2.624315023422241, "step": 638 }, { "epoch": 0.504739336492891, "grad_norm": 17.155749747348096, "learning_rate": 9.86007383328152e-06, "loss": 3.289152145385742, "step": 639 }, { "epoch": 0.5055292259083728, "grad_norm": 8.077042869666613, "learning_rate": 9.858992169367939e-06, "loss": 2.8146300315856934, "step": 640 }, { "epoch": 0.5063191153238547, "grad_norm": 14.900885791368967, "learning_rate": 9.857906400561771e-06, "loss": 3.192298173904419, "step": 641 }, { "epoch": 0.5071090047393365, "grad_norm": 9.42772244547937, "learning_rate": 9.856816527780279e-06, "loss": 2.958979845046997, "step": 642 }, { "epoch": 0.5078988941548184, "grad_norm": 19.45725727165277, "learning_rate": 9.855722551944192e-06, "loss": 2.5792651176452637, "step": 643 }, { "epoch": 0.5086887835703001, "grad_norm": 18.71191352944223, "learning_rate": 9.854624473977702e-06, "loss": 2.2687480449676514, "step": 644 }, { "epoch": 0.509478672985782, "grad_norm": 5.973081965855647, "learning_rate": 9.85352229480847e-06, "loss": 2.5880727767944336, "step": 645 }, { "epoch": 0.5102685624012638, "grad_norm": 7.964099363507532, "learning_rate": 9.852416015367622e-06, "loss": 3.0370497703552246, "step": 646 }, { "epoch": 0.5110584518167457, "grad_norm": 6.554920362859979, "learning_rate": 9.851305636589745e-06, "loss": 2.9287662506103516, "step": 647 }, { "epoch": 0.5118483412322274, "grad_norm": 8.324682375342665, "learning_rate": 9.85019115941289e-06, "loss": 2.7224721908569336, "step": 648 }, { "epoch": 0.5126382306477093, "grad_norm": 15.108696475255462, "learning_rate": 9.849072584778572e-06, "loss": 3.304979085922241, "step": 649 }, { "epoch": 0.5134281200631912, "grad_norm": 13.13698436229945, "learning_rate": 9.847949913631767e-06, "loss": 1.872714877128601, "step": 650 }, { "epoch": 0.514218009478673, "grad_norm": 14.549850333742206, "learning_rate": 9.84682314692091e-06, "loss": 3.086406707763672, "step": 651 }, { "epoch": 0.5150078988941548, "grad_norm": 14.573017384190178, "learning_rate": 9.845692285597898e-06, "loss": 3.119309186935425, "step": 652 }, { "epoch": 0.5157977883096366, "grad_norm": 14.70328848938873, "learning_rate": 9.844557330618087e-06, "loss": 3.3144378662109375, "step": 653 }, { "epoch": 0.5165876777251185, "grad_norm": 13.577075929710624, "learning_rate": 9.843418282940291e-06, "loss": 3.093888282775879, "step": 654 }, { "epoch": 0.5173775671406003, "grad_norm": 16.48466621944885, "learning_rate": 9.842275143526779e-06, "loss": 3.2132608890533447, "step": 655 }, { "epoch": 0.5181674565560821, "grad_norm": 21.816699436754334, "learning_rate": 9.841127913343281e-06, "loss": 2.8770318031311035, "step": 656 }, { "epoch": 0.518957345971564, "grad_norm": 18.527709516001913, "learning_rate": 9.83997659335898e-06, "loss": 3.0780622959136963, "step": 657 }, { "epoch": 0.5197472353870458, "grad_norm": 12.06728106207623, "learning_rate": 9.838821184546513e-06, "loss": 2.7250850200653076, "step": 658 }, { "epoch": 0.5205371248025277, "grad_norm": 24.07545499285517, "learning_rate": 9.837661687881976e-06, "loss": 2.6896378993988037, "step": 659 }, { "epoch": 0.5213270142180095, "grad_norm": 14.021969692199526, "learning_rate": 9.836498104344916e-06, "loss": 2.549968719482422, "step": 660 }, { "epoch": 0.5221169036334913, "grad_norm": 10.925462746063516, "learning_rate": 9.835330434918329e-06, "loss": 2.809274673461914, "step": 661 }, { "epoch": 0.5229067930489731, "grad_norm": 13.630283976038868, "learning_rate": 9.83415868058867e-06, "loss": 3.4539241790771484, "step": 662 }, { "epoch": 0.523696682464455, "grad_norm": 12.004271103191083, "learning_rate": 9.832982842345838e-06, "loss": 3.2775259017944336, "step": 663 }, { "epoch": 0.5244865718799369, "grad_norm": 12.05995942930735, "learning_rate": 9.831802921183184e-06, "loss": 2.543905735015869, "step": 664 }, { "epoch": 0.5252764612954186, "grad_norm": 8.986701835621098, "learning_rate": 9.830618918097514e-06, "loss": 2.7053022384643555, "step": 665 }, { "epoch": 0.5260663507109005, "grad_norm": 9.733936760368861, "learning_rate": 9.829430834089072e-06, "loss": 2.9009079933166504, "step": 666 }, { "epoch": 0.5268562401263823, "grad_norm": 9.705816192138302, "learning_rate": 9.82823867016156e-06, "loss": 2.320451259613037, "step": 667 }, { "epoch": 0.5276461295418642, "grad_norm": 15.366368478511447, "learning_rate": 9.82704242732212e-06, "loss": 3.4952645301818848, "step": 668 }, { "epoch": 0.5284360189573459, "grad_norm": 15.006524588925533, "learning_rate": 9.825842106581343e-06, "loss": 2.6732113361358643, "step": 669 }, { "epoch": 0.5292259083728278, "grad_norm": 9.72530845064569, "learning_rate": 9.824637708953262e-06, "loss": 2.7073092460632324, "step": 670 }, { "epoch": 0.5300157977883097, "grad_norm": 57.6393618147052, "learning_rate": 9.823429235455357e-06, "loss": 2.821194887161255, "step": 671 }, { "epoch": 0.5308056872037915, "grad_norm": 15.587439690063317, "learning_rate": 9.822216687108549e-06, "loss": 3.2857871055603027, "step": 672 }, { "epoch": 0.5315955766192733, "grad_norm": 5.803845483605398, "learning_rate": 9.821000064937205e-06, "loss": 2.699526309967041, "step": 673 }, { "epoch": 0.5323854660347551, "grad_norm": 8.386330014964896, "learning_rate": 9.81977936996913e-06, "loss": 2.623192071914673, "step": 674 }, { "epoch": 0.533175355450237, "grad_norm": 11.975531675577631, "learning_rate": 9.818554603235574e-06, "loss": 2.8475778102874756, "step": 675 }, { "epoch": 0.5339652448657188, "grad_norm": 8.008863647269184, "learning_rate": 9.81732576577122e-06, "loss": 2.2658133506774902, "step": 676 }, { "epoch": 0.5347551342812006, "grad_norm": 22.741549269978968, "learning_rate": 9.816092858614197e-06, "loss": 3.2006266117095947, "step": 677 }, { "epoch": 0.5355450236966824, "grad_norm": 11.748680083922181, "learning_rate": 9.814855882806068e-06, "loss": 2.7071900367736816, "step": 678 }, { "epoch": 0.5363349131121643, "grad_norm": 15.879587406556047, "learning_rate": 9.813614839391831e-06, "loss": 3.224722385406494, "step": 679 }, { "epoch": 0.5371248025276462, "grad_norm": 9.796469130582556, "learning_rate": 9.812369729419928e-06, "loss": 3.0697150230407715, "step": 680 }, { "epoch": 0.5379146919431279, "grad_norm": 14.580990160396306, "learning_rate": 9.811120553942232e-06, "loss": 3.0138320922851562, "step": 681 }, { "epoch": 0.5387045813586098, "grad_norm": 8.267187675857448, "learning_rate": 9.809867314014047e-06, "loss": 2.831322193145752, "step": 682 }, { "epoch": 0.5394944707740916, "grad_norm": 7.636617838042198, "learning_rate": 9.808610010694118e-06, "loss": 3.239677906036377, "step": 683 }, { "epoch": 0.5402843601895735, "grad_norm": 15.391979519107375, "learning_rate": 9.807348645044617e-06, "loss": 2.747056245803833, "step": 684 }, { "epoch": 0.5410742496050553, "grad_norm": 8.961485412404242, "learning_rate": 9.806083218131148e-06, "loss": 2.910431385040283, "step": 685 }, { "epoch": 0.5418641390205371, "grad_norm": 14.197136726081288, "learning_rate": 9.804813731022753e-06, "loss": 3.1174066066741943, "step": 686 }, { "epoch": 0.542654028436019, "grad_norm": 17.168792272156345, "learning_rate": 9.803540184791894e-06, "loss": 3.201021671295166, "step": 687 }, { "epoch": 0.5434439178515008, "grad_norm": 16.249151769463698, "learning_rate": 9.80226258051447e-06, "loss": 3.176429510116577, "step": 688 }, { "epoch": 0.5442338072669827, "grad_norm": 13.891045745287295, "learning_rate": 9.800980919269803e-06, "loss": 2.7803795337677, "step": 689 }, { "epoch": 0.5450236966824644, "grad_norm": 10.877077331774473, "learning_rate": 9.799695202140647e-06, "loss": 2.882291555404663, "step": 690 }, { "epoch": 0.5458135860979463, "grad_norm": 9.592760171635328, "learning_rate": 9.798405430213177e-06, "loss": 2.8458828926086426, "step": 691 }, { "epoch": 0.5466034755134281, "grad_norm": 7.884425647446356, "learning_rate": 9.797111604577e-06, "loss": 2.5656301975250244, "step": 692 }, { "epoch": 0.54739336492891, "grad_norm": 10.716521460356288, "learning_rate": 9.795813726325142e-06, "loss": 2.761523485183716, "step": 693 }, { "epoch": 0.5481832543443917, "grad_norm": 11.360091917539375, "learning_rate": 9.794511796554055e-06, "loss": 2.7499184608459473, "step": 694 }, { "epoch": 0.5489731437598736, "grad_norm": 23.055753815263646, "learning_rate": 9.793205816363616e-06, "loss": 2.517162799835205, "step": 695 }, { "epoch": 0.5497630331753555, "grad_norm": 7.78885839561945, "learning_rate": 9.791895786857118e-06, "loss": 2.723165273666382, "step": 696 }, { "epoch": 0.5505529225908373, "grad_norm": 17.967883170098016, "learning_rate": 9.79058170914128e-06, "loss": 2.6987109184265137, "step": 697 }, { "epoch": 0.5513428120063191, "grad_norm": 11.709595647250415, "learning_rate": 9.789263584326238e-06, "loss": 2.288817882537842, "step": 698 }, { "epoch": 0.5521327014218009, "grad_norm": 7.605046056867549, "learning_rate": 9.78794141352555e-06, "loss": 2.7125701904296875, "step": 699 }, { "epoch": 0.5529225908372828, "grad_norm": 10.47514502255061, "learning_rate": 9.786615197856188e-06, "loss": 2.7359495162963867, "step": 700 }, { "epoch": 0.5537124802527646, "grad_norm": 14.91063318894891, "learning_rate": 9.785284938438545e-06, "loss": 3.3785290718078613, "step": 701 }, { "epoch": 0.5545023696682464, "grad_norm": 8.501053428871433, "learning_rate": 9.783950636396429e-06, "loss": 2.2032179832458496, "step": 702 }, { "epoch": 0.5552922590837283, "grad_norm": 8.382387828104997, "learning_rate": 9.78261229285706e-06, "loss": 2.5567541122436523, "step": 703 }, { "epoch": 0.5560821484992101, "grad_norm": 6.833003481377768, "learning_rate": 9.781269908951079e-06, "loss": 2.9519448280334473, "step": 704 }, { "epoch": 0.556872037914692, "grad_norm": 10.767129048941534, "learning_rate": 9.779923485812534e-06, "loss": 2.9880781173706055, "step": 705 }, { "epoch": 0.5576619273301737, "grad_norm": 17.977394830366933, "learning_rate": 9.778573024578886e-06, "loss": 2.571939706802368, "step": 706 }, { "epoch": 0.5584518167456556, "grad_norm": 8.839985291634292, "learning_rate": 9.777218526391013e-06, "loss": 2.6147358417510986, "step": 707 }, { "epoch": 0.5592417061611374, "grad_norm": 16.290139197222373, "learning_rate": 9.775859992393198e-06, "loss": 2.765791654586792, "step": 708 }, { "epoch": 0.5600315955766193, "grad_norm": 9.812939254834504, "learning_rate": 9.774497423733134e-06, "loss": 2.9214420318603516, "step": 709 }, { "epoch": 0.5608214849921012, "grad_norm": 10.32637589864547, "learning_rate": 9.773130821561923e-06, "loss": 2.793147563934326, "step": 710 }, { "epoch": 0.5616113744075829, "grad_norm": 8.09381965198076, "learning_rate": 9.771760187034076e-06, "loss": 3.287661552429199, "step": 711 }, { "epoch": 0.5624012638230648, "grad_norm": 35.57847572787897, "learning_rate": 9.770385521307511e-06, "loss": 2.639596462249756, "step": 712 }, { "epoch": 0.5631911532385466, "grad_norm": 10.298454720332643, "learning_rate": 9.769006825543547e-06, "loss": 2.0149660110473633, "step": 713 }, { "epoch": 0.5639810426540285, "grad_norm": 9.21174002933949, "learning_rate": 9.767624100906915e-06, "loss": 2.675302743911743, "step": 714 }, { "epoch": 0.5647709320695102, "grad_norm": 7.382352205544447, "learning_rate": 9.766237348565741e-06, "loss": 1.6485764980316162, "step": 715 }, { "epoch": 0.5655608214849921, "grad_norm": 10.607210159075297, "learning_rate": 9.76484656969156e-06, "loss": 2.4519925117492676, "step": 716 }, { "epoch": 0.566350710900474, "grad_norm": 10.50163998316087, "learning_rate": 9.763451765459307e-06, "loss": 2.3074722290039062, "step": 717 }, { "epoch": 0.5671406003159558, "grad_norm": 10.624503499088616, "learning_rate": 9.762052937047318e-06, "loss": 2.695051670074463, "step": 718 }, { "epoch": 0.5679304897314376, "grad_norm": 15.616632678838926, "learning_rate": 9.760650085637322e-06, "loss": 3.360673666000366, "step": 719 }, { "epoch": 0.5687203791469194, "grad_norm": 8.375766262844625, "learning_rate": 9.75924321241446e-06, "loss": 2.7661333084106445, "step": 720 }, { "epoch": 0.5695102685624013, "grad_norm": 16.894936615307905, "learning_rate": 9.75783231856726e-06, "loss": 2.6800551414489746, "step": 721 }, { "epoch": 0.5703001579778831, "grad_norm": 9.306984501448646, "learning_rate": 9.756417405287649e-06, "loss": 3.1795547008514404, "step": 722 }, { "epoch": 0.5710900473933649, "grad_norm": 9.638215383453232, "learning_rate": 9.754998473770952e-06, "loss": 3.0728039741516113, "step": 723 }, { "epoch": 0.5718799368088467, "grad_norm": 18.971875030373713, "learning_rate": 9.753575525215885e-06, "loss": 3.101027250289917, "step": 724 }, { "epoch": 0.5726698262243286, "grad_norm": 9.703129422537145, "learning_rate": 9.752148560824562e-06, "loss": 2.3094897270202637, "step": 725 }, { "epoch": 0.5734597156398105, "grad_norm": 15.734256139808767, "learning_rate": 9.750717581802486e-06, "loss": 2.910053253173828, "step": 726 }, { "epoch": 0.5742496050552922, "grad_norm": 16.302208461415216, "learning_rate": 9.749282589358553e-06, "loss": 3.1184496879577637, "step": 727 }, { "epoch": 0.5750394944707741, "grad_norm": 12.9735057677299, "learning_rate": 9.747843584705047e-06, "loss": 2.873502731323242, "step": 728 }, { "epoch": 0.5758293838862559, "grad_norm": 22.638733634704586, "learning_rate": 9.746400569057648e-06, "loss": 2.4251301288604736, "step": 729 }, { "epoch": 0.5766192733017378, "grad_norm": 7.163678590109422, "learning_rate": 9.744953543635417e-06, "loss": 2.799077033996582, "step": 730 }, { "epoch": 0.5774091627172195, "grad_norm": 15.90505006968332, "learning_rate": 9.74350250966081e-06, "loss": 3.6660590171813965, "step": 731 }, { "epoch": 0.5781990521327014, "grad_norm": 7.61368151252301, "learning_rate": 9.742047468359661e-06, "loss": 3.0964913368225098, "step": 732 }, { "epoch": 0.5789889415481833, "grad_norm": 7.331962193745934, "learning_rate": 9.740588420961194e-06, "loss": 2.7701282501220703, "step": 733 }, { "epoch": 0.5797788309636651, "grad_norm": 10.939975135494327, "learning_rate": 9.739125368698019e-06, "loss": 2.936520576477051, "step": 734 }, { "epoch": 0.580568720379147, "grad_norm": 11.923772537267181, "learning_rate": 9.737658312806128e-06, "loss": 2.714221477508545, "step": 735 }, { "epoch": 0.5813586097946287, "grad_norm": 8.343105964786538, "learning_rate": 9.73618725452489e-06, "loss": 2.6335134506225586, "step": 736 }, { "epoch": 0.5821484992101106, "grad_norm": 11.578813272698921, "learning_rate": 9.734712195097068e-06, "loss": 3.0548324584960938, "step": 737 }, { "epoch": 0.5829383886255924, "grad_norm": 8.178172803096258, "learning_rate": 9.733233135768789e-06, "loss": 2.455691337585449, "step": 738 }, { "epoch": 0.5837282780410743, "grad_norm": 8.208384722056353, "learning_rate": 9.731750077789572e-06, "loss": 2.85522198677063, "step": 739 }, { "epoch": 0.584518167456556, "grad_norm": 14.357432181582325, "learning_rate": 9.730263022412307e-06, "loss": 2.7620186805725098, "step": 740 }, { "epoch": 0.5853080568720379, "grad_norm": 19.304824374340818, "learning_rate": 9.728771970893262e-06, "loss": 2.078908920288086, "step": 741 }, { "epoch": 0.5860979462875198, "grad_norm": 8.634363379681329, "learning_rate": 9.727276924492088e-06, "loss": 2.5789947509765625, "step": 742 }, { "epoch": 0.5868878357030016, "grad_norm": 13.987495203502483, "learning_rate": 9.725777884471798e-06, "loss": 2.6600892543792725, "step": 743 }, { "epoch": 0.5876777251184834, "grad_norm": 21.907120600463696, "learning_rate": 9.724274852098792e-06, "loss": 3.122257709503174, "step": 744 }, { "epoch": 0.5884676145339652, "grad_norm": 12.86185834923555, "learning_rate": 9.722767828642831e-06, "loss": 2.9660885334014893, "step": 745 }, { "epoch": 0.5892575039494471, "grad_norm": 15.825928885727478, "learning_rate": 9.721256815377059e-06, "loss": 2.9355366230010986, "step": 746 }, { "epoch": 0.590047393364929, "grad_norm": 17.506752397926537, "learning_rate": 9.719741813577982e-06, "loss": 2.7380142211914062, "step": 747 }, { "epoch": 0.5908372827804107, "grad_norm": 13.905446136498991, "learning_rate": 9.718222824525476e-06, "loss": 2.624443292617798, "step": 748 }, { "epoch": 0.5916271721958926, "grad_norm": 10.950226102749792, "learning_rate": 9.716699849502794e-06, "loss": 3.353207588195801, "step": 749 }, { "epoch": 0.5924170616113744, "grad_norm": 10.01361583202192, "learning_rate": 9.715172889796546e-06, "loss": 2.4462380409240723, "step": 750 }, { "epoch": 0.5932069510268563, "grad_norm": 10.868695455875331, "learning_rate": 9.713641946696713e-06, "loss": 2.470088005065918, "step": 751 }, { "epoch": 0.593996840442338, "grad_norm": 12.24308594192275, "learning_rate": 9.712107021496641e-06, "loss": 2.746387004852295, "step": 752 }, { "epoch": 0.5947867298578199, "grad_norm": 10.293898753873405, "learning_rate": 9.710568115493041e-06, "loss": 2.893784284591675, "step": 753 }, { "epoch": 0.5955766192733017, "grad_norm": 8.791452259605602, "learning_rate": 9.709025229985986e-06, "loss": 3.845496654510498, "step": 754 }, { "epoch": 0.5963665086887836, "grad_norm": 7.952120961759487, "learning_rate": 9.707478366278911e-06, "loss": 3.286113739013672, "step": 755 }, { "epoch": 0.5971563981042654, "grad_norm": 11.052387334163443, "learning_rate": 9.705927525678608e-06, "loss": 2.54490327835083, "step": 756 }, { "epoch": 0.5979462875197472, "grad_norm": 28.59255694909707, "learning_rate": 9.704372709495237e-06, "loss": 2.303287982940674, "step": 757 }, { "epoch": 0.5987361769352291, "grad_norm": 12.717484973265515, "learning_rate": 9.702813919042308e-06, "loss": 2.5255141258239746, "step": 758 }, { "epoch": 0.5995260663507109, "grad_norm": 10.644626496040491, "learning_rate": 9.701251155636696e-06, "loss": 2.6174449920654297, "step": 759 }, { "epoch": 0.6003159557661928, "grad_norm": 10.420680692777719, "learning_rate": 9.699684420598622e-06, "loss": 3.0751430988311768, "step": 760 }, { "epoch": 0.6011058451816745, "grad_norm": 17.7407887464059, "learning_rate": 9.698113715251678e-06, "loss": 3.1690831184387207, "step": 761 }, { "epoch": 0.6018957345971564, "grad_norm": 6.8133884021840165, "learning_rate": 9.696539040922794e-06, "loss": 2.992917060852051, "step": 762 }, { "epoch": 0.6026856240126383, "grad_norm": 14.288096164471353, "learning_rate": 9.694960398942264e-06, "loss": 2.862287998199463, "step": 763 }, { "epoch": 0.6034755134281201, "grad_norm": 8.475183410580591, "learning_rate": 9.693377790643728e-06, "loss": 2.5695481300354004, "step": 764 }, { "epoch": 0.6042654028436019, "grad_norm": 12.006246518434427, "learning_rate": 9.69179121736418e-06, "loss": 2.456130266189575, "step": 765 }, { "epoch": 0.6050552922590837, "grad_norm": 10.187805065838019, "learning_rate": 9.69020068044396e-06, "loss": 2.798001766204834, "step": 766 }, { "epoch": 0.6058451816745656, "grad_norm": 12.030587021942077, "learning_rate": 9.68860618122676e-06, "loss": 3.1254353523254395, "step": 767 }, { "epoch": 0.6066350710900474, "grad_norm": 8.960697365970546, "learning_rate": 9.68700772105962e-06, "loss": 2.784362316131592, "step": 768 }, { "epoch": 0.6074249605055292, "grad_norm": 12.104781783849731, "learning_rate": 9.685405301292924e-06, "loss": 2.487422466278076, "step": 769 }, { "epoch": 0.608214849921011, "grad_norm": 11.735082585197036, "learning_rate": 9.683798923280398e-06, "loss": 2.749908208847046, "step": 770 }, { "epoch": 0.6090047393364929, "grad_norm": 8.39933378207941, "learning_rate": 9.68218858837912e-06, "loss": 2.5542333126068115, "step": 771 }, { "epoch": 0.6097946287519748, "grad_norm": 13.203001273703372, "learning_rate": 9.680574297949503e-06, "loss": 2.381009578704834, "step": 772 }, { "epoch": 0.6105845181674565, "grad_norm": 10.227737175443485, "learning_rate": 9.678956053355306e-06, "loss": 2.797962188720703, "step": 773 }, { "epoch": 0.6113744075829384, "grad_norm": 19.644909570407606, "learning_rate": 9.677333855963627e-06, "loss": 2.3713326454162598, "step": 774 }, { "epoch": 0.6121642969984202, "grad_norm": 10.802978377889708, "learning_rate": 9.675707707144906e-06, "loss": 3.7946083545684814, "step": 775 }, { "epoch": 0.6129541864139021, "grad_norm": 9.800404823560639, "learning_rate": 9.674077608272916e-06, "loss": 2.2313640117645264, "step": 776 }, { "epoch": 0.6137440758293838, "grad_norm": 10.197415931944706, "learning_rate": 9.67244356072477e-06, "loss": 2.564563035964966, "step": 777 }, { "epoch": 0.6145339652448657, "grad_norm": 13.443842890508776, "learning_rate": 9.670805565880919e-06, "loss": 2.851914405822754, "step": 778 }, { "epoch": 0.6153238546603476, "grad_norm": 19.85673680273832, "learning_rate": 9.669163625125143e-06, "loss": 2.2651009559631348, "step": 779 }, { "epoch": 0.6161137440758294, "grad_norm": 12.884481490463953, "learning_rate": 9.667517739844563e-06, "loss": 2.5570054054260254, "step": 780 }, { "epoch": 0.6169036334913112, "grad_norm": 17.453234670736126, "learning_rate": 9.665867911429625e-06, "loss": 2.5283799171447754, "step": 781 }, { "epoch": 0.617693522906793, "grad_norm": 18.936216590713382, "learning_rate": 9.664214141274111e-06, "loss": 3.2821831703186035, "step": 782 }, { "epoch": 0.6184834123222749, "grad_norm": 12.29731371387329, "learning_rate": 9.662556430775132e-06, "loss": 3.238713264465332, "step": 783 }, { "epoch": 0.6192733017377567, "grad_norm": 13.021393099946007, "learning_rate": 9.660894781333126e-06, "loss": 2.938838005065918, "step": 784 }, { "epoch": 0.6200631911532386, "grad_norm": 6.931741915013856, "learning_rate": 9.65922919435186e-06, "loss": 2.7250375747680664, "step": 785 }, { "epoch": 0.6208530805687204, "grad_norm": 12.605958216464781, "learning_rate": 9.657559671238428e-06, "loss": 2.9273529052734375, "step": 786 }, { "epoch": 0.6216429699842022, "grad_norm": 24.241779337105008, "learning_rate": 9.65588621340325e-06, "loss": 2.6192431449890137, "step": 787 }, { "epoch": 0.6224328593996841, "grad_norm": 15.774873829312497, "learning_rate": 9.654208822260064e-06, "loss": 2.6683297157287598, "step": 788 }, { "epoch": 0.6232227488151659, "grad_norm": 13.55562535680502, "learning_rate": 9.65252749922594e-06, "loss": 3.453798294067383, "step": 789 }, { "epoch": 0.6240126382306477, "grad_norm": 10.437339665014067, "learning_rate": 9.650842245721265e-06, "loss": 2.660048007965088, "step": 790 }, { "epoch": 0.6248025276461295, "grad_norm": 7.373849740564372, "learning_rate": 9.649153063169747e-06, "loss": 3.181802272796631, "step": 791 }, { "epoch": 0.6255924170616114, "grad_norm": 10.97034956730053, "learning_rate": 9.647459952998409e-06, "loss": 2.794236183166504, "step": 792 }, { "epoch": 0.6263823064770933, "grad_norm": 13.289064458186381, "learning_rate": 9.6457629166376e-06, "loss": 2.934234142303467, "step": 793 }, { "epoch": 0.627172195892575, "grad_norm": 6.66173575936569, "learning_rate": 9.644061955520981e-06, "loss": 2.53916072845459, "step": 794 }, { "epoch": 0.6279620853080569, "grad_norm": 8.21096858166868, "learning_rate": 9.642357071085527e-06, "loss": 3.4347705841064453, "step": 795 }, { "epoch": 0.6287519747235387, "grad_norm": 10.267840042109265, "learning_rate": 9.640648264771532e-06, "loss": 2.589984655380249, "step": 796 }, { "epoch": 0.6295418641390206, "grad_norm": 23.740776784719007, "learning_rate": 9.638935538022605e-06, "loss": 2.2766027450561523, "step": 797 }, { "epoch": 0.6303317535545023, "grad_norm": 10.175389822962396, "learning_rate": 9.637218892285656e-06, "loss": 2.140416383743286, "step": 798 }, { "epoch": 0.6311216429699842, "grad_norm": 10.006330545127017, "learning_rate": 9.635498329010918e-06, "loss": 2.6404151916503906, "step": 799 }, { "epoch": 0.631911532385466, "grad_norm": 15.860393911321083, "learning_rate": 9.633773849651926e-06, "loss": 3.4515304565429688, "step": 800 }, { "epoch": 0.6327014218009479, "grad_norm": 10.447911887203587, "learning_rate": 9.632045455665528e-06, "loss": 2.6439762115478516, "step": 801 }, { "epoch": 0.6334913112164297, "grad_norm": 9.938315980267259, "learning_rate": 9.630313148511876e-06, "loss": 2.633496046066284, "step": 802 }, { "epoch": 0.6342812006319115, "grad_norm": 14.127497750541355, "learning_rate": 9.628576929654427e-06, "loss": 3.0772128105163574, "step": 803 }, { "epoch": 0.6350710900473934, "grad_norm": 9.475057560148757, "learning_rate": 9.626836800559948e-06, "loss": 2.8075199127197266, "step": 804 }, { "epoch": 0.6358609794628752, "grad_norm": 15.2657530938932, "learning_rate": 9.625092762698502e-06, "loss": 2.2376973628997803, "step": 805 }, { "epoch": 0.636650868878357, "grad_norm": 13.5038856684049, "learning_rate": 9.623344817543462e-06, "loss": 3.09859037399292, "step": 806 }, { "epoch": 0.6374407582938388, "grad_norm": 15.533786385781745, "learning_rate": 9.621592966571493e-06, "loss": 2.4869344234466553, "step": 807 }, { "epoch": 0.6382306477093207, "grad_norm": 13.72530582074668, "learning_rate": 9.619837211262569e-06, "loss": 3.0674853324890137, "step": 808 }, { "epoch": 0.6390205371248026, "grad_norm": 23.557899308984517, "learning_rate": 9.618077553099954e-06, "loss": 3.3668880462646484, "step": 809 }, { "epoch": 0.6398104265402843, "grad_norm": 18.763852919675887, "learning_rate": 9.616313993570215e-06, "loss": 2.933554172515869, "step": 810 }, { "epoch": 0.6406003159557662, "grad_norm": 8.365194309767189, "learning_rate": 9.614546534163214e-06, "loss": 2.367485523223877, "step": 811 }, { "epoch": 0.641390205371248, "grad_norm": 8.781343171527238, "learning_rate": 9.612775176372104e-06, "loss": 2.1504476070404053, "step": 812 }, { "epoch": 0.6421800947867299, "grad_norm": 14.453301784955004, "learning_rate": 9.610999921693335e-06, "loss": 3.482938766479492, "step": 813 }, { "epoch": 0.6429699842022117, "grad_norm": 11.750899386703743, "learning_rate": 9.60922077162665e-06, "loss": 2.383328437805176, "step": 814 }, { "epoch": 0.6437598736176935, "grad_norm": 9.466726112052974, "learning_rate": 9.607437727675077e-06, "loss": 2.781550884246826, "step": 815 }, { "epoch": 0.6445497630331753, "grad_norm": 12.278841985932612, "learning_rate": 9.60565079134494e-06, "loss": 2.6321635246276855, "step": 816 }, { "epoch": 0.6453396524486572, "grad_norm": 16.454023020237027, "learning_rate": 9.60385996414585e-06, "loss": 3.094892978668213, "step": 817 }, { "epoch": 0.6461295418641391, "grad_norm": 7.492293232454839, "learning_rate": 9.6020652475907e-06, "loss": 2.353990077972412, "step": 818 }, { "epoch": 0.6469194312796208, "grad_norm": 7.938504133286718, "learning_rate": 9.600266643195675e-06, "loss": 2.719548225402832, "step": 819 }, { "epoch": 0.6477093206951027, "grad_norm": 34.78257617710777, "learning_rate": 9.598464152480241e-06, "loss": 2.486771821975708, "step": 820 }, { "epoch": 0.6484992101105845, "grad_norm": 7.695149398961293, "learning_rate": 9.596657776967149e-06, "loss": 2.359746217727661, "step": 821 }, { "epoch": 0.6492890995260664, "grad_norm": 10.381251787843585, "learning_rate": 9.594847518182428e-06, "loss": 2.8774003982543945, "step": 822 }, { "epoch": 0.6500789889415481, "grad_norm": 21.800739755225305, "learning_rate": 9.593033377655396e-06, "loss": 2.1589415073394775, "step": 823 }, { "epoch": 0.65086887835703, "grad_norm": 16.66199847491689, "learning_rate": 9.59121535691864e-06, "loss": 3.30254864692688, "step": 824 }, { "epoch": 0.6516587677725119, "grad_norm": 10.566975533918288, "learning_rate": 9.589393457508032e-06, "loss": 2.679553508758545, "step": 825 }, { "epoch": 0.6524486571879937, "grad_norm": 19.121309989507864, "learning_rate": 9.587567680962716e-06, "loss": 3.172027111053467, "step": 826 }, { "epoch": 0.6532385466034755, "grad_norm": 11.16230347524207, "learning_rate": 9.58573802882512e-06, "loss": 2.9337282180786133, "step": 827 }, { "epoch": 0.6540284360189573, "grad_norm": 7.133910740967563, "learning_rate": 9.583904502640936e-06, "loss": 2.826122283935547, "step": 828 }, { "epoch": 0.6548183254344392, "grad_norm": 14.35666338571649, "learning_rate": 9.582067103959131e-06, "loss": 3.0313868522644043, "step": 829 }, { "epoch": 0.655608214849921, "grad_norm": 11.532262508822264, "learning_rate": 9.58022583433195e-06, "loss": 2.7884521484375, "step": 830 }, { "epoch": 0.6563981042654028, "grad_norm": 6.874943848075909, "learning_rate": 9.5783806953149e-06, "loss": 2.96806001663208, "step": 831 }, { "epoch": 0.6571879936808847, "grad_norm": 12.146810880091056, "learning_rate": 9.576531688466762e-06, "loss": 2.976937770843506, "step": 832 }, { "epoch": 0.6579778830963665, "grad_norm": 7.921587511162627, "learning_rate": 9.574678815349585e-06, "loss": 2.6038804054260254, "step": 833 }, { "epoch": 0.6587677725118484, "grad_norm": 17.969038197005215, "learning_rate": 9.572822077528678e-06, "loss": 3.1494526863098145, "step": 834 }, { "epoch": 0.6595576619273301, "grad_norm": 8.31536269495529, "learning_rate": 9.570961476572624e-06, "loss": 2.9516241550445557, "step": 835 }, { "epoch": 0.660347551342812, "grad_norm": 6.4993286688590715, "learning_rate": 9.56909701405326e-06, "loss": 2.8607451915740967, "step": 836 }, { "epoch": 0.6611374407582938, "grad_norm": 7.552272245609234, "learning_rate": 9.567228691545696e-06, "loss": 2.5649495124816895, "step": 837 }, { "epoch": 0.6619273301737757, "grad_norm": 14.48881948712848, "learning_rate": 9.565356510628291e-06, "loss": 2.513335943222046, "step": 838 }, { "epoch": 0.6627172195892576, "grad_norm": 9.081903072300518, "learning_rate": 9.563480472882673e-06, "loss": 2.9949398040771484, "step": 839 }, { "epoch": 0.6635071090047393, "grad_norm": 12.074800830284559, "learning_rate": 9.561600579893723e-06, "loss": 2.6771364212036133, "step": 840 }, { "epoch": 0.6642969984202212, "grad_norm": 11.321384919112033, "learning_rate": 9.559716833249583e-06, "loss": 2.8205018043518066, "step": 841 }, { "epoch": 0.665086887835703, "grad_norm": 9.775240458791433, "learning_rate": 9.557829234541647e-06, "loss": 2.9774630069732666, "step": 842 }, { "epoch": 0.6658767772511849, "grad_norm": 11.521035463074744, "learning_rate": 9.555937785364563e-06, "loss": 2.579075574874878, "step": 843 }, { "epoch": 0.6666666666666666, "grad_norm": 9.432485493960403, "learning_rate": 9.554042487316237e-06, "loss": 2.726024627685547, "step": 844 }, { "epoch": 0.6674565560821485, "grad_norm": 14.134144139013555, "learning_rate": 9.552143341997822e-06, "loss": 2.8715529441833496, "step": 845 }, { "epoch": 0.6682464454976303, "grad_norm": 8.902377494487911, "learning_rate": 9.55024035101372e-06, "loss": 2.533745527267456, "step": 846 }, { "epoch": 0.6690363349131122, "grad_norm": 12.832395817425043, "learning_rate": 9.548333515971587e-06, "loss": 2.761075496673584, "step": 847 }, { "epoch": 0.669826224328594, "grad_norm": 10.20576917282644, "learning_rate": 9.546422838482322e-06, "loss": 2.5824503898620605, "step": 848 }, { "epoch": 0.6706161137440758, "grad_norm": 6.880173889228289, "learning_rate": 9.54450832016007e-06, "loss": 2.5947561264038086, "step": 849 }, { "epoch": 0.6714060031595577, "grad_norm": 14.836403016663432, "learning_rate": 9.542589962622225e-06, "loss": 2.1935033798217773, "step": 850 }, { "epoch": 0.6721958925750395, "grad_norm": 6.943720267173772, "learning_rate": 9.540667767489421e-06, "loss": 2.6050100326538086, "step": 851 }, { "epoch": 0.6729857819905213, "grad_norm": 15.12169878975551, "learning_rate": 9.538741736385534e-06, "loss": 3.6529133319854736, "step": 852 }, { "epoch": 0.6737756714060031, "grad_norm": 5.867087588241686, "learning_rate": 9.536811870937684e-06, "loss": 2.063253164291382, "step": 853 }, { "epoch": 0.674565560821485, "grad_norm": 7.798417844400532, "learning_rate": 9.534878172776224e-06, "loss": 2.7908072471618652, "step": 854 }, { "epoch": 0.6753554502369669, "grad_norm": 11.818473782559336, "learning_rate": 9.532940643534751e-06, "loss": 2.4319844245910645, "step": 855 }, { "epoch": 0.6761453396524486, "grad_norm": 12.80740078253414, "learning_rate": 9.530999284850095e-06, "loss": 3.1545660495758057, "step": 856 }, { "epoch": 0.6769352290679305, "grad_norm": 9.292936008984638, "learning_rate": 9.529054098362322e-06, "loss": 2.947558641433716, "step": 857 }, { "epoch": 0.6777251184834123, "grad_norm": 16.62193321855355, "learning_rate": 9.527105085714734e-06, "loss": 2.610852003097534, "step": 858 }, { "epoch": 0.6785150078988942, "grad_norm": 14.554767788526535, "learning_rate": 9.525152248553862e-06, "loss": 2.979235887527466, "step": 859 }, { "epoch": 0.6793048973143759, "grad_norm": 11.910814876723402, "learning_rate": 9.523195588529468e-06, "loss": 2.6078577041625977, "step": 860 }, { "epoch": 0.6800947867298578, "grad_norm": 8.286047769780788, "learning_rate": 9.521235107294548e-06, "loss": 2.068547010421753, "step": 861 }, { "epoch": 0.6808846761453397, "grad_norm": 10.506290416192853, "learning_rate": 9.51927080650532e-06, "loss": 2.794530153274536, "step": 862 }, { "epoch": 0.6816745655608215, "grad_norm": 11.391589488737578, "learning_rate": 9.517302687821231e-06, "loss": 2.5470008850097656, "step": 863 }, { "epoch": 0.6824644549763034, "grad_norm": 7.668217055524585, "learning_rate": 9.515330752904956e-06, "loss": 2.6968884468078613, "step": 864 }, { "epoch": 0.6832543443917851, "grad_norm": 8.551258441901858, "learning_rate": 9.513355003422396e-06, "loss": 2.8228256702423096, "step": 865 }, { "epoch": 0.684044233807267, "grad_norm": 11.671477572882841, "learning_rate": 9.511375441042663e-06, "loss": 3.5630812644958496, "step": 866 }, { "epoch": 0.6848341232227488, "grad_norm": 15.900567407044479, "learning_rate": 9.5093920674381e-06, "loss": 2.9535064697265625, "step": 867 }, { "epoch": 0.6856240126382307, "grad_norm": 8.643390525668298, "learning_rate": 9.507404884284273e-06, "loss": 2.741084337234497, "step": 868 }, { "epoch": 0.6864139020537124, "grad_norm": 10.692778433484234, "learning_rate": 9.505413893259956e-06, "loss": 3.3046531677246094, "step": 869 }, { "epoch": 0.6872037914691943, "grad_norm": 16.535562300261393, "learning_rate": 9.503419096047144e-06, "loss": 3.2300820350646973, "step": 870 }, { "epoch": 0.6879936808846762, "grad_norm": 24.170107530956294, "learning_rate": 9.501420494331052e-06, "loss": 2.438554048538208, "step": 871 }, { "epoch": 0.688783570300158, "grad_norm": 8.690027829520277, "learning_rate": 9.499418089800102e-06, "loss": 2.4033608436584473, "step": 872 }, { "epoch": 0.6895734597156398, "grad_norm": 9.282954177762111, "learning_rate": 9.497411884145933e-06, "loss": 2.7961714267730713, "step": 873 }, { "epoch": 0.6903633491311216, "grad_norm": 8.534548336142196, "learning_rate": 9.495401879063395e-06, "loss": 2.739697217941284, "step": 874 }, { "epoch": 0.6911532385466035, "grad_norm": 14.710104471833855, "learning_rate": 9.493388076250546e-06, "loss": 3.3953442573547363, "step": 875 }, { "epoch": 0.6919431279620853, "grad_norm": 10.793976678843643, "learning_rate": 9.491370477408655e-06, "loss": 3.082679271697998, "step": 876 }, { "epoch": 0.6927330173775671, "grad_norm": 11.424816104504234, "learning_rate": 9.489349084242192e-06, "loss": 2.755612850189209, "step": 877 }, { "epoch": 0.693522906793049, "grad_norm": 7.915901709561951, "learning_rate": 9.487323898458841e-06, "loss": 2.766568660736084, "step": 878 }, { "epoch": 0.6943127962085308, "grad_norm": 17.848385230595895, "learning_rate": 9.485294921769484e-06, "loss": 2.5398964881896973, "step": 879 }, { "epoch": 0.6951026856240127, "grad_norm": 29.674496967139927, "learning_rate": 9.483262155888207e-06, "loss": 3.093564987182617, "step": 880 }, { "epoch": 0.6958925750394944, "grad_norm": 14.90079966406828, "learning_rate": 9.481225602532296e-06, "loss": 2.9575257301330566, "step": 881 }, { "epoch": 0.6966824644549763, "grad_norm": 26.431365807353334, "learning_rate": 9.47918526342224e-06, "loss": 3.3621833324432373, "step": 882 }, { "epoch": 0.6974723538704581, "grad_norm": 18.17594227056942, "learning_rate": 9.477141140281724e-06, "loss": 2.7552647590637207, "step": 883 }, { "epoch": 0.69826224328594, "grad_norm": 9.17468592273275, "learning_rate": 9.475093234837629e-06, "loss": 2.6432392597198486, "step": 884 }, { "epoch": 0.6990521327014217, "grad_norm": 11.724571460894934, "learning_rate": 9.473041548820034e-06, "loss": 2.863342523574829, "step": 885 }, { "epoch": 0.6998420221169036, "grad_norm": 13.647582206639745, "learning_rate": 9.470986083962208e-06, "loss": 3.1229562759399414, "step": 886 }, { "epoch": 0.7006319115323855, "grad_norm": 10.904081018729473, "learning_rate": 9.468926842000614e-06, "loss": 2.8623602390289307, "step": 887 }, { "epoch": 0.7014218009478673, "grad_norm": 14.77540518637624, "learning_rate": 9.46686382467491e-06, "loss": 2.9971213340759277, "step": 888 }, { "epoch": 0.7022116903633492, "grad_norm": 17.160481522672626, "learning_rate": 9.464797033727937e-06, "loss": 2.8732876777648926, "step": 889 }, { "epoch": 0.7030015797788309, "grad_norm": 7.662847037910413, "learning_rate": 9.462726470905727e-06, "loss": 2.4884605407714844, "step": 890 }, { "epoch": 0.7037914691943128, "grad_norm": 15.496194298757855, "learning_rate": 9.460652137957497e-06, "loss": 2.5895493030548096, "step": 891 }, { "epoch": 0.7045813586097947, "grad_norm": 9.210243074506902, "learning_rate": 9.458574036635656e-06, "loss": 3.070889472961426, "step": 892 }, { "epoch": 0.7053712480252765, "grad_norm": 13.22477817543685, "learning_rate": 9.456492168695783e-06, "loss": 2.6131277084350586, "step": 893 }, { "epoch": 0.7061611374407583, "grad_norm": 10.736756209485726, "learning_rate": 9.454406535896653e-06, "loss": 2.7342894077301025, "step": 894 }, { "epoch": 0.7069510268562401, "grad_norm": 15.649909827229424, "learning_rate": 9.452317140000213e-06, "loss": 2.709885835647583, "step": 895 }, { "epoch": 0.707740916271722, "grad_norm": 17.03495887885535, "learning_rate": 9.45022398277159e-06, "loss": 2.229793071746826, "step": 896 }, { "epoch": 0.7085308056872038, "grad_norm": 18.286957303019204, "learning_rate": 9.448127065979093e-06, "loss": 2.3719115257263184, "step": 897 }, { "epoch": 0.7093206951026856, "grad_norm": 9.674060014502675, "learning_rate": 9.446026391394203e-06, "loss": 3.1232872009277344, "step": 898 }, { "epoch": 0.7101105845181674, "grad_norm": 8.294415989977118, "learning_rate": 9.443921960791578e-06, "loss": 2.2887797355651855, "step": 899 }, { "epoch": 0.7109004739336493, "grad_norm": 10.7843258557463, "learning_rate": 9.441813775949045e-06, "loss": 2.947249174118042, "step": 900 }, { "epoch": 0.7116903633491312, "grad_norm": 55.81560616750336, "learning_rate": 9.439701838647607e-06, "loss": 2.6564688682556152, "step": 901 }, { "epoch": 0.7124802527646129, "grad_norm": 15.475637484457012, "learning_rate": 9.437586150671438e-06, "loss": 3.2652010917663574, "step": 902 }, { "epoch": 0.7132701421800948, "grad_norm": 12.6346062418189, "learning_rate": 9.435466713807875e-06, "loss": 3.212409257888794, "step": 903 }, { "epoch": 0.7140600315955766, "grad_norm": 12.653119224973862, "learning_rate": 9.433343529847426e-06, "loss": 2.8347318172454834, "step": 904 }, { "epoch": 0.7148499210110585, "grad_norm": 8.174222426024595, "learning_rate": 9.431216600583764e-06, "loss": 2.7938289642333984, "step": 905 }, { "epoch": 0.7156398104265402, "grad_norm": 10.498223049643716, "learning_rate": 9.429085927813725e-06, "loss": 2.8059895038604736, "step": 906 }, { "epoch": 0.7164296998420221, "grad_norm": 11.610537771337127, "learning_rate": 9.42695151333731e-06, "loss": 2.670276403427124, "step": 907 }, { "epoch": 0.717219589257504, "grad_norm": 15.926885320452712, "learning_rate": 9.424813358957678e-06, "loss": 2.8029661178588867, "step": 908 }, { "epoch": 0.7180094786729858, "grad_norm": 12.370001760911942, "learning_rate": 9.42267146648115e-06, "loss": 2.344736099243164, "step": 909 }, { "epoch": 0.7187993680884676, "grad_norm": 15.272394441293677, "learning_rate": 9.420525837717205e-06, "loss": 1.8855293989181519, "step": 910 }, { "epoch": 0.7195892575039494, "grad_norm": 10.81989245618176, "learning_rate": 9.418376474478474e-06, "loss": 2.810041666030884, "step": 911 }, { "epoch": 0.7203791469194313, "grad_norm": 17.67649470148584, "learning_rate": 9.416223378580747e-06, "loss": 2.526409864425659, "step": 912 }, { "epoch": 0.7211690363349131, "grad_norm": 10.055598633901095, "learning_rate": 9.414066551842969e-06, "loss": 2.868654489517212, "step": 913 }, { "epoch": 0.721958925750395, "grad_norm": 9.513800374151746, "learning_rate": 9.41190599608723e-06, "loss": 2.928063154220581, "step": 914 }, { "epoch": 0.7227488151658767, "grad_norm": 13.871125687274514, "learning_rate": 9.40974171313878e-06, "loss": 2.4988300800323486, "step": 915 }, { "epoch": 0.7235387045813586, "grad_norm": 11.896211423240262, "learning_rate": 9.407573704826008e-06, "loss": 1.8240364789962769, "step": 916 }, { "epoch": 0.7243285939968405, "grad_norm": 9.077450079878284, "learning_rate": 9.405401972980457e-06, "loss": 2.8183727264404297, "step": 917 }, { "epoch": 0.7251184834123223, "grad_norm": 13.826245036289404, "learning_rate": 9.40322651943681e-06, "loss": 2.8091042041778564, "step": 918 }, { "epoch": 0.7259083728278041, "grad_norm": 7.372371185613573, "learning_rate": 9.4010473460329e-06, "loss": 2.093374252319336, "step": 919 }, { "epoch": 0.7266982622432859, "grad_norm": 8.058209243668498, "learning_rate": 9.398864454609702e-06, "loss": 2.278440475463867, "step": 920 }, { "epoch": 0.7274881516587678, "grad_norm": 12.147448734615645, "learning_rate": 9.396677847011326e-06, "loss": 2.403252601623535, "step": 921 }, { "epoch": 0.7282780410742496, "grad_norm": 8.711345300212125, "learning_rate": 9.394487525085027e-06, "loss": 2.735347270965576, "step": 922 }, { "epoch": 0.7290679304897314, "grad_norm": 8.80572208933033, "learning_rate": 9.392293490681195e-06, "loss": 2.715076446533203, "step": 923 }, { "epoch": 0.7298578199052133, "grad_norm": 7.247758343473638, "learning_rate": 9.390095745653359e-06, "loss": 2.7396597862243652, "step": 924 }, { "epoch": 0.7306477093206951, "grad_norm": 7.245885716473276, "learning_rate": 9.38789429185818e-06, "loss": 2.7173843383789062, "step": 925 }, { "epoch": 0.731437598736177, "grad_norm": 19.0606393622214, "learning_rate": 9.385689131155456e-06, "loss": 3.2145304679870605, "step": 926 }, { "epoch": 0.7322274881516587, "grad_norm": 14.229401707613647, "learning_rate": 9.383480265408109e-06, "loss": 2.976992130279541, "step": 927 }, { "epoch": 0.7330173775671406, "grad_norm": 11.037803882230573, "learning_rate": 9.3812676964822e-06, "loss": 3.361060619354248, "step": 928 }, { "epoch": 0.7338072669826224, "grad_norm": 23.852840951499623, "learning_rate": 9.379051426246914e-06, "loss": 3.223222255706787, "step": 929 }, { "epoch": 0.7345971563981043, "grad_norm": 14.223529634226185, "learning_rate": 9.376831456574561e-06, "loss": 2.8687520027160645, "step": 930 }, { "epoch": 0.735387045813586, "grad_norm": 7.156756071444025, "learning_rate": 9.374607789340584e-06, "loss": 2.873199701309204, "step": 931 }, { "epoch": 0.7361769352290679, "grad_norm": 7.616209133800571, "learning_rate": 9.37238042642354e-06, "loss": 3.0081300735473633, "step": 932 }, { "epoch": 0.7369668246445498, "grad_norm": 7.149090801992643, "learning_rate": 9.370149369705112e-06, "loss": 2.981412887573242, "step": 933 }, { "epoch": 0.7377567140600316, "grad_norm": 15.58505127158866, "learning_rate": 9.367914621070107e-06, "loss": 2.8132896423339844, "step": 934 }, { "epoch": 0.7385466034755134, "grad_norm": 13.465860481670774, "learning_rate": 9.365676182406446e-06, "loss": 3.4976980686187744, "step": 935 }, { "epoch": 0.7393364928909952, "grad_norm": 11.899120310195212, "learning_rate": 9.36343405560517e-06, "loss": 2.8405492305755615, "step": 936 }, { "epoch": 0.7401263823064771, "grad_norm": 11.739242983649948, "learning_rate": 9.361188242560436e-06, "loss": 2.7775559425354004, "step": 937 }, { "epoch": 0.740916271721959, "grad_norm": 10.611580913268543, "learning_rate": 9.358938745169512e-06, "loss": 2.7165842056274414, "step": 938 }, { "epoch": 0.7417061611374408, "grad_norm": 16.286711299762892, "learning_rate": 9.356685565332783e-06, "loss": 2.8377950191497803, "step": 939 }, { "epoch": 0.7424960505529226, "grad_norm": 10.06467306357769, "learning_rate": 9.354428704953743e-06, "loss": 2.605860471725464, "step": 940 }, { "epoch": 0.7432859399684044, "grad_norm": 9.106574090331993, "learning_rate": 9.352168165938992e-06, "loss": 3.039595127105713, "step": 941 }, { "epoch": 0.7440758293838863, "grad_norm": 13.910557878586525, "learning_rate": 9.349903950198243e-06, "loss": 3.1908493041992188, "step": 942 }, { "epoch": 0.7448657187993681, "grad_norm": 15.785181835421662, "learning_rate": 9.347636059644313e-06, "loss": 3.6178295612335205, "step": 943 }, { "epoch": 0.7456556082148499, "grad_norm": 13.627805304388191, "learning_rate": 9.345364496193124e-06, "loss": 2.30802059173584, "step": 944 }, { "epoch": 0.7464454976303317, "grad_norm": 13.113283530630058, "learning_rate": 9.343089261763698e-06, "loss": 2.7968385219573975, "step": 945 }, { "epoch": 0.7472353870458136, "grad_norm": 54.4044476972888, "learning_rate": 9.340810358278163e-06, "loss": 2.887650728225708, "step": 946 }, { "epoch": 0.7480252764612955, "grad_norm": 12.476106430670102, "learning_rate": 9.338527787661743e-06, "loss": 2.8030970096588135, "step": 947 }, { "epoch": 0.7488151658767772, "grad_norm": 11.33742883881337, "learning_rate": 9.336241551842759e-06, "loss": 2.91349196434021, "step": 948 }, { "epoch": 0.7496050552922591, "grad_norm": 10.310783405801457, "learning_rate": 9.333951652752636e-06, "loss": 2.592141628265381, "step": 949 }, { "epoch": 0.7503949447077409, "grad_norm": 19.674293659067043, "learning_rate": 9.331658092325884e-06, "loss": 3.334771156311035, "step": 950 }, { "epoch": 0.7511848341232228, "grad_norm": 9.403755720421595, "learning_rate": 9.32936087250011e-06, "loss": 2.669703960418701, "step": 951 }, { "epoch": 0.7519747235387045, "grad_norm": 7.772849025991002, "learning_rate": 9.327059995216017e-06, "loss": 2.772550344467163, "step": 952 }, { "epoch": 0.7527646129541864, "grad_norm": 12.063324109253381, "learning_rate": 9.32475546241739e-06, "loss": 3.1131880283355713, "step": 953 }, { "epoch": 0.7535545023696683, "grad_norm": 8.755986042063324, "learning_rate": 9.322447276051106e-06, "loss": 2.6408510208129883, "step": 954 }, { "epoch": 0.7543443917851501, "grad_norm": 16.519428236399992, "learning_rate": 9.32013543806713e-06, "loss": 2.889667510986328, "step": 955 }, { "epoch": 0.7551342812006319, "grad_norm": 10.18189972859802, "learning_rate": 9.31781995041851e-06, "loss": 1.998913049697876, "step": 956 }, { "epoch": 0.7559241706161137, "grad_norm": 12.945059813106072, "learning_rate": 9.315500815061378e-06, "loss": 2.572543144226074, "step": 957 }, { "epoch": 0.7567140600315956, "grad_norm": 7.760044557288449, "learning_rate": 9.313178033954946e-06, "loss": 2.8043367862701416, "step": 958 }, { "epoch": 0.7575039494470774, "grad_norm": 12.169397261499922, "learning_rate": 9.310851609061507e-06, "loss": 2.6561851501464844, "step": 959 }, { "epoch": 0.7582938388625592, "grad_norm": 8.032984175481454, "learning_rate": 9.308521542346434e-06, "loss": 3.0927743911743164, "step": 960 }, { "epoch": 0.759083728278041, "grad_norm": 11.370911942807972, "learning_rate": 9.306187835778173e-06, "loss": 3.112912178039551, "step": 961 }, { "epoch": 0.7598736176935229, "grad_norm": 30.68759001827329, "learning_rate": 9.30385049132825e-06, "loss": 2.32753324508667, "step": 962 }, { "epoch": 0.7606635071090048, "grad_norm": 19.20736554439743, "learning_rate": 9.301509510971259e-06, "loss": 2.4722962379455566, "step": 963 }, { "epoch": 0.7614533965244866, "grad_norm": 7.896910852894032, "learning_rate": 9.299164896684867e-06, "loss": 2.8172154426574707, "step": 964 }, { "epoch": 0.7622432859399684, "grad_norm": 8.285847211519757, "learning_rate": 9.296816650449813e-06, "loss": 3.217062473297119, "step": 965 }, { "epoch": 0.7630331753554502, "grad_norm": 14.348208287781882, "learning_rate": 9.294464774249905e-06, "loss": 3.099119186401367, "step": 966 }, { "epoch": 0.7638230647709321, "grad_norm": 17.781084869379775, "learning_rate": 9.292109270072013e-06, "loss": 3.2744314670562744, "step": 967 }, { "epoch": 0.764612954186414, "grad_norm": 8.411579984707192, "learning_rate": 9.289750139906075e-06, "loss": 3.0986344814300537, "step": 968 }, { "epoch": 0.7654028436018957, "grad_norm": 12.640585889095618, "learning_rate": 9.287387385745094e-06, "loss": 2.9320476055145264, "step": 969 }, { "epoch": 0.7661927330173776, "grad_norm": 18.268696929650034, "learning_rate": 9.28502100958513e-06, "loss": 1.89057457447052, "step": 970 }, { "epoch": 0.7669826224328594, "grad_norm": 8.318894507210741, "learning_rate": 9.282651013425309e-06, "loss": 2.681485891342163, "step": 971 }, { "epoch": 0.7677725118483413, "grad_norm": 12.92970042225882, "learning_rate": 9.280277399267808e-06, "loss": 3.2145707607269287, "step": 972 }, { "epoch": 0.768562401263823, "grad_norm": 8.005504028109495, "learning_rate": 9.277900169117864e-06, "loss": 2.4123406410217285, "step": 973 }, { "epoch": 0.7693522906793049, "grad_norm": 12.062434756711202, "learning_rate": 9.27551932498377e-06, "loss": 2.878951072692871, "step": 974 }, { "epoch": 0.7701421800947867, "grad_norm": 9.094718919698682, "learning_rate": 9.273134868876872e-06, "loss": 2.949256420135498, "step": 975 }, { "epoch": 0.7709320695102686, "grad_norm": 21.141805602331043, "learning_rate": 9.270746802811566e-06, "loss": 2.42653226852417, "step": 976 }, { "epoch": 0.7717219589257504, "grad_norm": 14.843894359981512, "learning_rate": 9.268355128805298e-06, "loss": 2.678997039794922, "step": 977 }, { "epoch": 0.7725118483412322, "grad_norm": 12.746073187269445, "learning_rate": 9.265959848878558e-06, "loss": 2.422623634338379, "step": 978 }, { "epoch": 0.7733017377567141, "grad_norm": 11.40428902421513, "learning_rate": 9.263560965054894e-06, "loss": 2.154381036758423, "step": 979 }, { "epoch": 0.7740916271721959, "grad_norm": 20.956261055506054, "learning_rate": 9.261158479360884e-06, "loss": 2.777848720550537, "step": 980 }, { "epoch": 0.7748815165876777, "grad_norm": 9.70811631729052, "learning_rate": 9.25875239382616e-06, "loss": 2.6050872802734375, "step": 981 }, { "epoch": 0.7756714060031595, "grad_norm": 10.241699921518476, "learning_rate": 9.25634271048339e-06, "loss": 2.080990791320801, "step": 982 }, { "epoch": 0.7764612954186414, "grad_norm": 11.768131588715727, "learning_rate": 9.253929431368282e-06, "loss": 3.0087733268737793, "step": 983 }, { "epoch": 0.7772511848341233, "grad_norm": 12.464298821154987, "learning_rate": 9.251512558519582e-06, "loss": 2.852950096130371, "step": 984 }, { "epoch": 0.778041074249605, "grad_norm": 11.838439498108045, "learning_rate": 9.24909209397907e-06, "loss": 3.7374510765075684, "step": 985 }, { "epoch": 0.7788309636650869, "grad_norm": 15.277389692233058, "learning_rate": 9.246668039791568e-06, "loss": 2.7718963623046875, "step": 986 }, { "epoch": 0.7796208530805687, "grad_norm": 6.6742475596758535, "learning_rate": 9.244240398004922e-06, "loss": 2.6798787117004395, "step": 987 }, { "epoch": 0.7804107424960506, "grad_norm": 10.32795974684294, "learning_rate": 9.241809170670008e-06, "loss": 2.7594618797302246, "step": 988 }, { "epoch": 0.7812006319115324, "grad_norm": 9.018400283983219, "learning_rate": 9.239374359840742e-06, "loss": 3.114830493927002, "step": 989 }, { "epoch": 0.7819905213270142, "grad_norm": 16.724516344511926, "learning_rate": 9.236935967574054e-06, "loss": 3.228322982788086, "step": 990 }, { "epoch": 0.782780410742496, "grad_norm": 23.82315306020477, "learning_rate": 9.234493995929912e-06, "loss": 3.004939079284668, "step": 991 }, { "epoch": 0.7835703001579779, "grad_norm": 13.99018159956102, "learning_rate": 9.232048446971296e-06, "loss": 2.6142897605895996, "step": 992 }, { "epoch": 0.7843601895734598, "grad_norm": 10.608540414298071, "learning_rate": 9.229599322764215e-06, "loss": 2.6615846157073975, "step": 993 }, { "epoch": 0.7851500789889415, "grad_norm": 23.10494541404644, "learning_rate": 9.227146625377699e-06, "loss": 2.831402063369751, "step": 994 }, { "epoch": 0.7859399684044234, "grad_norm": 9.659599628220574, "learning_rate": 9.224690356883793e-06, "loss": 3.2252285480499268, "step": 995 }, { "epoch": 0.7867298578199052, "grad_norm": 7.285958976275862, "learning_rate": 9.222230519357562e-06, "loss": 2.635441780090332, "step": 996 }, { "epoch": 0.7875197472353871, "grad_norm": 7.160771261905654, "learning_rate": 9.219767114877086e-06, "loss": 2.6156837940216064, "step": 997 }, { "epoch": 0.7883096366508688, "grad_norm": 14.57854463656351, "learning_rate": 9.217300145523453e-06, "loss": 2.979773998260498, "step": 998 }, { "epoch": 0.7890995260663507, "grad_norm": 10.862479398287723, "learning_rate": 9.214829613380772e-06, "loss": 2.811668872833252, "step": 999 }, { "epoch": 0.7898894154818326, "grad_norm": 14.273686758443457, "learning_rate": 9.212355520536153e-06, "loss": 3.670020818710327, "step": 1000 }, { "epoch": 0.7906793048973144, "grad_norm": 7.748159510449536, "learning_rate": 9.209877869079719e-06, "loss": 2.974087953567505, "step": 1001 }, { "epoch": 0.7914691943127962, "grad_norm": 7.596423622962509, "learning_rate": 9.207396661104599e-06, "loss": 2.0368399620056152, "step": 1002 }, { "epoch": 0.792259083728278, "grad_norm": 15.086522167851689, "learning_rate": 9.204911898706925e-06, "loss": 3.096889019012451, "step": 1003 }, { "epoch": 0.7930489731437599, "grad_norm": 11.981971765032, "learning_rate": 9.202423583985832e-06, "loss": 2.6723742485046387, "step": 1004 }, { "epoch": 0.7938388625592417, "grad_norm": 8.646415889552525, "learning_rate": 9.199931719043456e-06, "loss": 2.6578660011291504, "step": 1005 }, { "epoch": 0.7946287519747235, "grad_norm": 14.68042449542436, "learning_rate": 9.197436305984933e-06, "loss": 2.629140853881836, "step": 1006 }, { "epoch": 0.7954186413902053, "grad_norm": 9.552656598953957, "learning_rate": 9.194937346918398e-06, "loss": 2.732150077819824, "step": 1007 }, { "epoch": 0.7962085308056872, "grad_norm": 8.954639954969533, "learning_rate": 9.192434843954977e-06, "loss": 2.240567445755005, "step": 1008 }, { "epoch": 0.7969984202211691, "grad_norm": 7.418598530709051, "learning_rate": 9.189928799208794e-06, "loss": 2.6821372509002686, "step": 1009 }, { "epoch": 0.7977883096366508, "grad_norm": 6.460842787120627, "learning_rate": 9.187419214796967e-06, "loss": 2.697354793548584, "step": 1010 }, { "epoch": 0.7985781990521327, "grad_norm": 6.9324406657677695, "learning_rate": 9.184906092839596e-06, "loss": 1.7275753021240234, "step": 1011 }, { "epoch": 0.7993680884676145, "grad_norm": 15.212694250890982, "learning_rate": 9.182389435459776e-06, "loss": 2.717050075531006, "step": 1012 }, { "epoch": 0.8001579778830964, "grad_norm": 8.870533933713203, "learning_rate": 9.17986924478359e-06, "loss": 2.5672261714935303, "step": 1013 }, { "epoch": 0.8009478672985783, "grad_norm": 12.582683742658494, "learning_rate": 9.177345522940102e-06, "loss": 2.8706305027008057, "step": 1014 }, { "epoch": 0.80173775671406, "grad_norm": 9.481753489579146, "learning_rate": 9.174818272061358e-06, "loss": 2.880066156387329, "step": 1015 }, { "epoch": 0.8025276461295419, "grad_norm": 7.842796420054131, "learning_rate": 9.172287494282393e-06, "loss": 1.724432349205017, "step": 1016 }, { "epoch": 0.8033175355450237, "grad_norm": 13.467226411075792, "learning_rate": 9.169753191741211e-06, "loss": 3.1031036376953125, "step": 1017 }, { "epoch": 0.8041074249605056, "grad_norm": 9.642248663487798, "learning_rate": 9.167215366578804e-06, "loss": 2.266718626022339, "step": 1018 }, { "epoch": 0.8048973143759873, "grad_norm": 9.565467328925202, "learning_rate": 9.16467402093913e-06, "loss": 2.8157501220703125, "step": 1019 }, { "epoch": 0.8056872037914692, "grad_norm": 15.209367854706489, "learning_rate": 9.162129156969131e-06, "loss": 2.440033197402954, "step": 1020 }, { "epoch": 0.806477093206951, "grad_norm": 9.68569819521668, "learning_rate": 9.159580776818715e-06, "loss": 2.4779491424560547, "step": 1021 }, { "epoch": 0.8072669826224329, "grad_norm": 12.736707657945674, "learning_rate": 9.15702888264076e-06, "loss": 2.2091784477233887, "step": 1022 }, { "epoch": 0.8080568720379147, "grad_norm": 13.514115226011919, "learning_rate": 9.154473476591114e-06, "loss": 3.1801180839538574, "step": 1023 }, { "epoch": 0.8088467614533965, "grad_norm": 20.122470837459804, "learning_rate": 9.151914560828598e-06, "loss": 3.0873842239379883, "step": 1024 }, { "epoch": 0.8096366508688784, "grad_norm": 13.480557474542609, "learning_rate": 9.149352137514987e-06, "loss": 2.603421449661255, "step": 1025 }, { "epoch": 0.8104265402843602, "grad_norm": 21.95371373023252, "learning_rate": 9.146786208815026e-06, "loss": 2.6986594200134277, "step": 1026 }, { "epoch": 0.811216429699842, "grad_norm": 38.65128008605009, "learning_rate": 9.144216776896422e-06, "loss": 2.905870199203491, "step": 1027 }, { "epoch": 0.8120063191153238, "grad_norm": 10.540992057323065, "learning_rate": 9.141643843929837e-06, "loss": 2.4717659950256348, "step": 1028 }, { "epoch": 0.8127962085308057, "grad_norm": 15.24208436151514, "learning_rate": 9.139067412088895e-06, "loss": 2.477531909942627, "step": 1029 }, { "epoch": 0.8135860979462876, "grad_norm": 9.30424965654663, "learning_rate": 9.136487483550172e-06, "loss": 3.016301393508911, "step": 1030 }, { "epoch": 0.8143759873617693, "grad_norm": 22.185168693739104, "learning_rate": 9.1339040604932e-06, "loss": 2.662216901779175, "step": 1031 }, { "epoch": 0.8151658767772512, "grad_norm": 29.979197475408252, "learning_rate": 9.131317145100469e-06, "loss": 2.694211959838867, "step": 1032 }, { "epoch": 0.815955766192733, "grad_norm": 11.06100764534907, "learning_rate": 9.128726739557408e-06, "loss": 2.5702898502349854, "step": 1033 }, { "epoch": 0.8167456556082149, "grad_norm": 7.880841439580581, "learning_rate": 9.126132846052401e-06, "loss": 2.8700671195983887, "step": 1034 }, { "epoch": 0.8175355450236966, "grad_norm": 27.058763319079304, "learning_rate": 9.123535466776778e-06, "loss": 3.244725227355957, "step": 1035 }, { "epoch": 0.8183254344391785, "grad_norm": 16.382446856894965, "learning_rate": 9.120934603924816e-06, "loss": 2.4301857948303223, "step": 1036 }, { "epoch": 0.8191153238546603, "grad_norm": 16.26715685664894, "learning_rate": 9.118330259693728e-06, "loss": 4.2284698486328125, "step": 1037 }, { "epoch": 0.8199052132701422, "grad_norm": 21.996363546185417, "learning_rate": 9.115722436283676e-06, "loss": 3.5227627754211426, "step": 1038 }, { "epoch": 0.8206951026856241, "grad_norm": 20.19577186521635, "learning_rate": 9.113111135897757e-06, "loss": 3.057605743408203, "step": 1039 }, { "epoch": 0.8214849921011058, "grad_norm": 13.600963392770467, "learning_rate": 9.110496360742006e-06, "loss": 3.0911357402801514, "step": 1040 }, { "epoch": 0.8222748815165877, "grad_norm": 12.336475393471844, "learning_rate": 9.107878113025393e-06, "loss": 3.0051560401916504, "step": 1041 }, { "epoch": 0.8230647709320695, "grad_norm": 10.984835326798942, "learning_rate": 9.105256394959822e-06, "loss": 2.4400599002838135, "step": 1042 }, { "epoch": 0.8238546603475514, "grad_norm": 20.947414774581414, "learning_rate": 9.102631208760131e-06, "loss": 3.4805350303649902, "step": 1043 }, { "epoch": 0.8246445497630331, "grad_norm": 14.360309838396736, "learning_rate": 9.100002556644086e-06, "loss": 2.588095188140869, "step": 1044 }, { "epoch": 0.825434439178515, "grad_norm": 10.20829475484016, "learning_rate": 9.097370440832378e-06, "loss": 2.78764271736145, "step": 1045 }, { "epoch": 0.8262243285939969, "grad_norm": 10.885822678518133, "learning_rate": 9.094734863548629e-06, "loss": 2.8764867782592773, "step": 1046 }, { "epoch": 0.8270142180094787, "grad_norm": 8.714569458458401, "learning_rate": 9.092095827019385e-06, "loss": 2.7947893142700195, "step": 1047 }, { "epoch": 0.8278041074249605, "grad_norm": 16.531291726752407, "learning_rate": 9.08945333347411e-06, "loss": 3.4402239322662354, "step": 1048 }, { "epoch": 0.8285939968404423, "grad_norm": 11.649383119473496, "learning_rate": 9.086807385145193e-06, "loss": 2.5275776386260986, "step": 1049 }, { "epoch": 0.8293838862559242, "grad_norm": 37.04934005274269, "learning_rate": 9.084157984267939e-06, "loss": 3.1136865615844727, "step": 1050 }, { "epoch": 0.830173775671406, "grad_norm": 27.375499924589366, "learning_rate": 9.08150513308057e-06, "loss": 2.692000389099121, "step": 1051 }, { "epoch": 0.8309636650868878, "grad_norm": 15.165913666522817, "learning_rate": 9.078848833824226e-06, "loss": 3.2129108905792236, "step": 1052 }, { "epoch": 0.8317535545023697, "grad_norm": 16.540229701542952, "learning_rate": 9.076189088742955e-06, "loss": 3.135190963745117, "step": 1053 }, { "epoch": 0.8325434439178515, "grad_norm": 11.775957201988549, "learning_rate": 9.073525900083717e-06, "loss": 2.9450531005859375, "step": 1054 }, { "epoch": 0.8333333333333334, "grad_norm": 28.18681618707373, "learning_rate": 9.070859270096385e-06, "loss": 3.7651073932647705, "step": 1055 }, { "epoch": 0.8341232227488151, "grad_norm": 11.37393230420539, "learning_rate": 9.06818920103374e-06, "loss": 2.674816846847534, "step": 1056 }, { "epoch": 0.834913112164297, "grad_norm": 5.773096989056217, "learning_rate": 9.065515695151459e-06, "loss": 2.8101024627685547, "step": 1057 }, { "epoch": 0.8357030015797788, "grad_norm": 9.404139830590696, "learning_rate": 9.06283875470813e-06, "loss": 2.883345603942871, "step": 1058 }, { "epoch": 0.8364928909952607, "grad_norm": 10.053270788353137, "learning_rate": 9.060158381965242e-06, "loss": 3.0177576541900635, "step": 1059 }, { "epoch": 0.8372827804107424, "grad_norm": 9.56854149440088, "learning_rate": 9.057474579187184e-06, "loss": 2.5740466117858887, "step": 1060 }, { "epoch": 0.8380726698262243, "grad_norm": 25.48696327806149, "learning_rate": 9.05478734864124e-06, "loss": 2.5997400283813477, "step": 1061 }, { "epoch": 0.8388625592417062, "grad_norm": 8.822888785903398, "learning_rate": 9.052096692597594e-06, "loss": 2.480900287628174, "step": 1062 }, { "epoch": 0.839652448657188, "grad_norm": 12.233243827649204, "learning_rate": 9.049402613329316e-06, "loss": 2.372171401977539, "step": 1063 }, { "epoch": 0.8404423380726699, "grad_norm": 10.92795851598965, "learning_rate": 9.046705113112375e-06, "loss": 3.3010194301605225, "step": 1064 }, { "epoch": 0.8412322274881516, "grad_norm": 14.04201976986867, "learning_rate": 9.04400419422563e-06, "loss": 1.9576343297958374, "step": 1065 }, { "epoch": 0.8420221169036335, "grad_norm": 11.652105566428002, "learning_rate": 9.041299858950824e-06, "loss": 2.61598539352417, "step": 1066 }, { "epoch": 0.8428120063191153, "grad_norm": 8.98594247433151, "learning_rate": 9.03859210957259e-06, "loss": 2.4816157817840576, "step": 1067 }, { "epoch": 0.8436018957345972, "grad_norm": 15.298066202780824, "learning_rate": 9.035880948378443e-06, "loss": 2.8336338996887207, "step": 1068 }, { "epoch": 0.844391785150079, "grad_norm": 10.357852449251151, "learning_rate": 9.03316637765878e-06, "loss": 2.5921220779418945, "step": 1069 }, { "epoch": 0.8451816745655608, "grad_norm": 8.480809498263813, "learning_rate": 9.030448399706881e-06, "loss": 1.7483251094818115, "step": 1070 }, { "epoch": 0.8459715639810427, "grad_norm": 27.723578315488755, "learning_rate": 9.0277270168189e-06, "loss": 2.880528688430786, "step": 1071 }, { "epoch": 0.8467614533965245, "grad_norm": 6.917974588538861, "learning_rate": 9.025002231293874e-06, "loss": 2.048827648162842, "step": 1072 }, { "epoch": 0.8475513428120063, "grad_norm": 12.11627041441912, "learning_rate": 9.022274045433706e-06, "loss": 2.47039794921875, "step": 1073 }, { "epoch": 0.8483412322274881, "grad_norm": 44.924170557742606, "learning_rate": 9.019542461543181e-06, "loss": 2.97735857963562, "step": 1074 }, { "epoch": 0.84913112164297, "grad_norm": 11.486390013948144, "learning_rate": 9.016807481929948e-06, "loss": 3.1126694679260254, "step": 1075 }, { "epoch": 0.8499210110584519, "grad_norm": 6.894815895299225, "learning_rate": 9.014069108904526e-06, "loss": 2.364047050476074, "step": 1076 }, { "epoch": 0.8507109004739336, "grad_norm": 23.140984836947318, "learning_rate": 9.011327344780306e-06, "loss": 2.1283740997314453, "step": 1077 }, { "epoch": 0.8515007898894155, "grad_norm": 10.70156784462954, "learning_rate": 9.008582191873531e-06, "loss": 2.7940797805786133, "step": 1078 }, { "epoch": 0.8522906793048973, "grad_norm": 7.795231096778107, "learning_rate": 9.005833652503323e-06, "loss": 2.638899803161621, "step": 1079 }, { "epoch": 0.8530805687203792, "grad_norm": 8.536488429058528, "learning_rate": 9.003081728991654e-06, "loss": 2.5842251777648926, "step": 1080 }, { "epoch": 0.8538704581358609, "grad_norm": 12.351330943815283, "learning_rate": 9.000326423663356e-06, "loss": 2.5075812339782715, "step": 1081 }, { "epoch": 0.8546603475513428, "grad_norm": 7.148232698465105, "learning_rate": 8.997567738846126e-06, "loss": 2.6017603874206543, "step": 1082 }, { "epoch": 0.8554502369668247, "grad_norm": 7.110606099151946, "learning_rate": 8.994805676870504e-06, "loss": 2.1451048851013184, "step": 1083 }, { "epoch": 0.8562401263823065, "grad_norm": 7.011285288703447, "learning_rate": 8.992040240069892e-06, "loss": 2.408576488494873, "step": 1084 }, { "epoch": 0.8570300157977883, "grad_norm": 6.646872041848021, "learning_rate": 8.98927143078054e-06, "loss": 2.678819179534912, "step": 1085 }, { "epoch": 0.8578199052132701, "grad_norm": 14.417901398540348, "learning_rate": 8.986499251341545e-06, "loss": 2.4109766483306885, "step": 1086 }, { "epoch": 0.858609794628752, "grad_norm": 18.976709689582428, "learning_rate": 8.983723704094856e-06, "loss": 2.79660701751709, "step": 1087 }, { "epoch": 0.8593996840442338, "grad_norm": 13.97392247737316, "learning_rate": 8.980944791385262e-06, "loss": 2.5753228664398193, "step": 1088 }, { "epoch": 0.8601895734597157, "grad_norm": 10.646221312923302, "learning_rate": 8.9781625155604e-06, "loss": 3.145460367202759, "step": 1089 }, { "epoch": 0.8609794628751974, "grad_norm": 12.125091970169855, "learning_rate": 8.975376878970744e-06, "loss": 3.016714334487915, "step": 1090 }, { "epoch": 0.8617693522906793, "grad_norm": 10.247379269546157, "learning_rate": 8.972587883969612e-06, "loss": 3.0661256313323975, "step": 1091 }, { "epoch": 0.8625592417061612, "grad_norm": 7.510993195440685, "learning_rate": 8.969795532913152e-06, "loss": 3.1030752658843994, "step": 1092 }, { "epoch": 0.863349131121643, "grad_norm": 11.085936006300932, "learning_rate": 8.966999828160355e-06, "loss": 2.8964810371398926, "step": 1093 }, { "epoch": 0.8641390205371248, "grad_norm": 10.635255649754786, "learning_rate": 8.96420077207304e-06, "loss": 2.5778634548187256, "step": 1094 }, { "epoch": 0.8649289099526066, "grad_norm": 11.699323935123369, "learning_rate": 8.961398367015857e-06, "loss": 2.6623075008392334, "step": 1095 }, { "epoch": 0.8657187993680885, "grad_norm": 17.05618719334767, "learning_rate": 8.95859261535629e-06, "loss": 3.076087474822998, "step": 1096 }, { "epoch": 0.8665086887835703, "grad_norm": 9.989617355430017, "learning_rate": 8.955783519464644e-06, "loss": 2.579759120941162, "step": 1097 }, { "epoch": 0.8672985781990521, "grad_norm": 13.983057400258621, "learning_rate": 8.952971081714056e-06, "loss": 2.9468941688537598, "step": 1098 }, { "epoch": 0.868088467614534, "grad_norm": 11.807806126259434, "learning_rate": 8.950155304480482e-06, "loss": 2.851876974105835, "step": 1099 }, { "epoch": 0.8688783570300158, "grad_norm": 9.121474991816212, "learning_rate": 8.947336190142696e-06, "loss": 2.568575382232666, "step": 1100 }, { "epoch": 0.8696682464454977, "grad_norm": 25.714257060738078, "learning_rate": 8.9445137410823e-06, "loss": 4.189000129699707, "step": 1101 }, { "epoch": 0.8704581358609794, "grad_norm": 9.883995956029263, "learning_rate": 8.941687959683707e-06, "loss": 2.751539707183838, "step": 1102 }, { "epoch": 0.8712480252764613, "grad_norm": 25.188255479459396, "learning_rate": 8.938858848334144e-06, "loss": 3.325855255126953, "step": 1103 }, { "epoch": 0.8720379146919431, "grad_norm": 13.67010382519282, "learning_rate": 8.936026409423656e-06, "loss": 2.7301278114318848, "step": 1104 }, { "epoch": 0.872827804107425, "grad_norm": 11.48889119309666, "learning_rate": 8.933190645345096e-06, "loss": 2.7599031925201416, "step": 1105 }, { "epoch": 0.8736176935229067, "grad_norm": 14.87256495310459, "learning_rate": 8.930351558494128e-06, "loss": 3.0885415077209473, "step": 1106 }, { "epoch": 0.8744075829383886, "grad_norm": 10.449701555194096, "learning_rate": 8.92750915126922e-06, "loss": 2.876091718673706, "step": 1107 }, { "epoch": 0.8751974723538705, "grad_norm": 14.84773136100038, "learning_rate": 8.924663426071647e-06, "loss": 3.0341625213623047, "step": 1108 }, { "epoch": 0.8759873617693523, "grad_norm": 8.15947775366893, "learning_rate": 8.921814385305489e-06, "loss": 2.277728796005249, "step": 1109 }, { "epoch": 0.8767772511848341, "grad_norm": 12.99890714367993, "learning_rate": 8.918962031377622e-06, "loss": 2.8230514526367188, "step": 1110 }, { "epoch": 0.8775671406003159, "grad_norm": 9.545433136406718, "learning_rate": 8.916106366697728e-06, "loss": 2.9621667861938477, "step": 1111 }, { "epoch": 0.8783570300157978, "grad_norm": 10.39494016316492, "learning_rate": 8.913247393678278e-06, "loss": 2.3225202560424805, "step": 1112 }, { "epoch": 0.8791469194312796, "grad_norm": 8.192269609832815, "learning_rate": 8.910385114734544e-06, "loss": 2.7106945514678955, "step": 1113 }, { "epoch": 0.8799368088467614, "grad_norm": 10.432502214764677, "learning_rate": 8.907519532284589e-06, "loss": 2.4334917068481445, "step": 1114 }, { "epoch": 0.8807266982622433, "grad_norm": 12.431139105375072, "learning_rate": 8.904650648749264e-06, "loss": 2.7972915172576904, "step": 1115 }, { "epoch": 0.8815165876777251, "grad_norm": 9.209050505947106, "learning_rate": 8.901778466552215e-06, "loss": 2.729956865310669, "step": 1116 }, { "epoch": 0.882306477093207, "grad_norm": 34.636350949112945, "learning_rate": 8.898902988119869e-06, "loss": 4.157067775726318, "step": 1117 }, { "epoch": 0.8830963665086888, "grad_norm": 10.426826060433754, "learning_rate": 8.896024215881439e-06, "loss": 2.5612125396728516, "step": 1118 }, { "epoch": 0.8838862559241706, "grad_norm": 15.903953548700464, "learning_rate": 8.89314215226892e-06, "loss": 2.477797031402588, "step": 1119 }, { "epoch": 0.8846761453396524, "grad_norm": 14.039357640579942, "learning_rate": 8.890256799717092e-06, "loss": 2.5441317558288574, "step": 1120 }, { "epoch": 0.8854660347551343, "grad_norm": 12.158639837027732, "learning_rate": 8.88736816066351e-06, "loss": 2.358978748321533, "step": 1121 }, { "epoch": 0.8862559241706162, "grad_norm": 23.052840320992853, "learning_rate": 8.884476237548503e-06, "loss": 2.7650527954101562, "step": 1122 }, { "epoch": 0.8870458135860979, "grad_norm": 8.158346405347508, "learning_rate": 8.88158103281518e-06, "loss": 2.303537368774414, "step": 1123 }, { "epoch": 0.8878357030015798, "grad_norm": 20.354107714614678, "learning_rate": 8.87868254890942e-06, "loss": 2.8699512481689453, "step": 1124 }, { "epoch": 0.8886255924170616, "grad_norm": 12.005895316989173, "learning_rate": 8.875780788279868e-06, "loss": 2.5772287845611572, "step": 1125 }, { "epoch": 0.8894154818325435, "grad_norm": 12.654751625675802, "learning_rate": 8.872875753377943e-06, "loss": 2.408010244369507, "step": 1126 }, { "epoch": 0.8902053712480252, "grad_norm": 11.36505652800493, "learning_rate": 8.86996744665783e-06, "loss": 2.7588155269622803, "step": 1127 }, { "epoch": 0.8909952606635071, "grad_norm": 10.687132877981576, "learning_rate": 8.867055870576474e-06, "loss": 2.394656181335449, "step": 1128 }, { "epoch": 0.891785150078989, "grad_norm": 16.83799896924904, "learning_rate": 8.864141027593585e-06, "loss": 2.3354270458221436, "step": 1129 }, { "epoch": 0.8925750394944708, "grad_norm": 13.157340664207496, "learning_rate": 8.86122292017163e-06, "loss": 3.017643690109253, "step": 1130 }, { "epoch": 0.8933649289099526, "grad_norm": 11.447127195057677, "learning_rate": 8.858301550775836e-06, "loss": 2.929154396057129, "step": 1131 }, { "epoch": 0.8941548183254344, "grad_norm": 9.601561380597447, "learning_rate": 8.855376921874188e-06, "loss": 2.9352359771728516, "step": 1132 }, { "epoch": 0.8949447077409163, "grad_norm": 12.643220814871029, "learning_rate": 8.85244903593742e-06, "loss": 2.5534372329711914, "step": 1133 }, { "epoch": 0.8957345971563981, "grad_norm": 13.712786164898073, "learning_rate": 8.849517895439022e-06, "loss": 2.969341516494751, "step": 1134 }, { "epoch": 0.8965244865718799, "grad_norm": 27.475418365680294, "learning_rate": 8.846583502855229e-06, "loss": 2.770808696746826, "step": 1135 }, { "epoch": 0.8973143759873617, "grad_norm": 19.579432202817973, "learning_rate": 8.843645860665024e-06, "loss": 2.384641408920288, "step": 1136 }, { "epoch": 0.8981042654028436, "grad_norm": 19.78607010018951, "learning_rate": 8.84070497135014e-06, "loss": 2.5281355381011963, "step": 1137 }, { "epoch": 0.8988941548183255, "grad_norm": 14.094462449831422, "learning_rate": 8.83776083739505e-06, "loss": 2.215435028076172, "step": 1138 }, { "epoch": 0.8996840442338072, "grad_norm": 17.933215742543844, "learning_rate": 8.834813461286965e-06, "loss": 2.291853904724121, "step": 1139 }, { "epoch": 0.9004739336492891, "grad_norm": 8.843018958262709, "learning_rate": 8.831862845515842e-06, "loss": 2.638589382171631, "step": 1140 }, { "epoch": 0.9012638230647709, "grad_norm": 9.967486938214911, "learning_rate": 8.828908992574366e-06, "loss": 2.7979438304901123, "step": 1141 }, { "epoch": 0.9020537124802528, "grad_norm": 13.195252873575834, "learning_rate": 8.825951904957967e-06, "loss": 3.2847375869750977, "step": 1142 }, { "epoch": 0.9028436018957346, "grad_norm": 8.059436353539473, "learning_rate": 8.822991585164799e-06, "loss": 3.350722312927246, "step": 1143 }, { "epoch": 0.9036334913112164, "grad_norm": 8.411890662561238, "learning_rate": 8.82002803569575e-06, "loss": 2.7170395851135254, "step": 1144 }, { "epoch": 0.9044233807266983, "grad_norm": 6.173660264218937, "learning_rate": 8.81706125905444e-06, "loss": 2.3877620697021484, "step": 1145 }, { "epoch": 0.9052132701421801, "grad_norm": 7.345392187325678, "learning_rate": 8.814091257747206e-06, "loss": 2.355260133743286, "step": 1146 }, { "epoch": 0.906003159557662, "grad_norm": 18.230032497512713, "learning_rate": 8.811118034283118e-06, "loss": 2.5785627365112305, "step": 1147 }, { "epoch": 0.9067930489731437, "grad_norm": 69.44999711709127, "learning_rate": 8.808141591173966e-06, "loss": 2.761636734008789, "step": 1148 }, { "epoch": 0.9075829383886256, "grad_norm": 12.183701745113998, "learning_rate": 8.805161930934256e-06, "loss": 2.5530524253845215, "step": 1149 }, { "epoch": 0.9083728278041074, "grad_norm": 7.936573557522932, "learning_rate": 8.802179056081217e-06, "loss": 2.907682418823242, "step": 1150 }, { "epoch": 0.9091627172195893, "grad_norm": 10.173205195310263, "learning_rate": 8.799192969134792e-06, "loss": 2.7638745307922363, "step": 1151 }, { "epoch": 0.909952606635071, "grad_norm": 11.128588471862935, "learning_rate": 8.796203672617634e-06, "loss": 2.4181337356567383, "step": 1152 }, { "epoch": 0.9107424960505529, "grad_norm": 9.655844045925859, "learning_rate": 8.793211169055114e-06, "loss": 2.773463249206543, "step": 1153 }, { "epoch": 0.9115323854660348, "grad_norm": 11.03401768931024, "learning_rate": 8.790215460975307e-06, "loss": 2.9100022315979004, "step": 1154 }, { "epoch": 0.9123222748815166, "grad_norm": 7.123266399146448, "learning_rate": 8.787216550908997e-06, "loss": 2.4781482219696045, "step": 1155 }, { "epoch": 0.9131121642969984, "grad_norm": 17.622221819502133, "learning_rate": 8.784214441389675e-06, "loss": 2.452256202697754, "step": 1156 }, { "epoch": 0.9139020537124802, "grad_norm": 13.113971999138425, "learning_rate": 8.78120913495353e-06, "loss": 2.896636486053467, "step": 1157 }, { "epoch": 0.9146919431279621, "grad_norm": 9.000975431480043, "learning_rate": 8.778200634139456e-06, "loss": 2.7890336513519287, "step": 1158 }, { "epoch": 0.915481832543444, "grad_norm": 7.481715343714205, "learning_rate": 8.775188941489046e-06, "loss": 2.496453046798706, "step": 1159 }, { "epoch": 0.9162717219589257, "grad_norm": 10.923526680550703, "learning_rate": 8.772174059546587e-06, "loss": 2.843217372894287, "step": 1160 }, { "epoch": 0.9170616113744076, "grad_norm": 11.10478528656359, "learning_rate": 8.76915599085906e-06, "loss": 2.435269355773926, "step": 1161 }, { "epoch": 0.9178515007898894, "grad_norm": 6.772268942857849, "learning_rate": 8.76613473797614e-06, "loss": 2.2593870162963867, "step": 1162 }, { "epoch": 0.9186413902053713, "grad_norm": 9.493749507534623, "learning_rate": 8.76311030345019e-06, "loss": 2.758202075958252, "step": 1163 }, { "epoch": 0.919431279620853, "grad_norm": 14.911631831896822, "learning_rate": 8.760082689836267e-06, "loss": 3.124772071838379, "step": 1164 }, { "epoch": 0.9202211690363349, "grad_norm": 12.853328106271473, "learning_rate": 8.757051899692104e-06, "loss": 2.7403624057769775, "step": 1165 }, { "epoch": 0.9210110584518167, "grad_norm": 16.513030408815556, "learning_rate": 8.754017935578124e-06, "loss": 2.6747732162475586, "step": 1166 }, { "epoch": 0.9218009478672986, "grad_norm": 19.976818001028782, "learning_rate": 8.75098080005743e-06, "loss": 3.0514602661132812, "step": 1167 }, { "epoch": 0.9225908372827805, "grad_norm": 15.44765720208878, "learning_rate": 8.747940495695804e-06, "loss": 3.660196304321289, "step": 1168 }, { "epoch": 0.9233807266982622, "grad_norm": 46.251367241411415, "learning_rate": 8.744897025061704e-06, "loss": 3.5469841957092285, "step": 1169 }, { "epoch": 0.9241706161137441, "grad_norm": 10.692755449897618, "learning_rate": 8.741850390726268e-06, "loss": 3.707672595977783, "step": 1170 }, { "epoch": 0.9249605055292259, "grad_norm": 17.772477199253103, "learning_rate": 8.7388005952633e-06, "loss": 2.4242305755615234, "step": 1171 }, { "epoch": 0.9257503949447078, "grad_norm": 18.70690435962618, "learning_rate": 8.735747641249276e-06, "loss": 2.7367851734161377, "step": 1172 }, { "epoch": 0.9265402843601895, "grad_norm": 14.123094504926451, "learning_rate": 8.732691531263344e-06, "loss": 2.673311471939087, "step": 1173 }, { "epoch": 0.9273301737756714, "grad_norm": 6.632016401084768, "learning_rate": 8.729632267887313e-06, "loss": 2.4245405197143555, "step": 1174 }, { "epoch": 0.9281200631911533, "grad_norm": 12.470479655533174, "learning_rate": 8.726569853705662e-06, "loss": 2.9810891151428223, "step": 1175 }, { "epoch": 0.9289099526066351, "grad_norm": 10.985987841138897, "learning_rate": 8.723504291305526e-06, "loss": 2.6249923706054688, "step": 1176 }, { "epoch": 0.9296998420221169, "grad_norm": 10.933662910087982, "learning_rate": 8.720435583276706e-06, "loss": 2.765192985534668, "step": 1177 }, { "epoch": 0.9304897314375987, "grad_norm": 11.327209932917212, "learning_rate": 8.71736373221165e-06, "loss": 2.9828245639801025, "step": 1178 }, { "epoch": 0.9312796208530806, "grad_norm": 13.902491029618451, "learning_rate": 8.714288740705475e-06, "loss": 2.574052572250366, "step": 1179 }, { "epoch": 0.9320695102685624, "grad_norm": 7.981484058770302, "learning_rate": 8.711210611355938e-06, "loss": 3.03520131111145, "step": 1180 }, { "epoch": 0.9328593996840442, "grad_norm": 7.784551362187513, "learning_rate": 8.708129346763457e-06, "loss": 2.7354891300201416, "step": 1181 }, { "epoch": 0.933649289099526, "grad_norm": 8.702455827026565, "learning_rate": 8.705044949531092e-06, "loss": 2.977090358734131, "step": 1182 }, { "epoch": 0.9344391785150079, "grad_norm": 10.8130926994093, "learning_rate": 8.701957422264555e-06, "loss": 2.7915494441986084, "step": 1183 }, { "epoch": 0.9352290679304898, "grad_norm": 8.086616773290197, "learning_rate": 8.698866767572196e-06, "loss": 2.89163875579834, "step": 1184 }, { "epoch": 0.9360189573459715, "grad_norm": 13.946757867057391, "learning_rate": 8.695772988065011e-06, "loss": 2.9507193565368652, "step": 1185 }, { "epoch": 0.9368088467614534, "grad_norm": 12.841477106561502, "learning_rate": 8.692676086356637e-06, "loss": 2.862083673477173, "step": 1186 }, { "epoch": 0.9375987361769352, "grad_norm": 14.373739928948876, "learning_rate": 8.689576065063343e-06, "loss": 2.9636409282684326, "step": 1187 }, { "epoch": 0.9383886255924171, "grad_norm": 11.002313560997857, "learning_rate": 8.686472926804041e-06, "loss": 2.7443132400512695, "step": 1188 }, { "epoch": 0.9391785150078988, "grad_norm": 18.302833638551288, "learning_rate": 8.683366674200271e-06, "loss": 2.8994405269622803, "step": 1189 }, { "epoch": 0.9399684044233807, "grad_norm": 20.60950939701956, "learning_rate": 8.680257309876205e-06, "loss": 2.9267029762268066, "step": 1190 }, { "epoch": 0.9407582938388626, "grad_norm": 8.17531589588896, "learning_rate": 8.677144836458645e-06, "loss": 2.567711591720581, "step": 1191 }, { "epoch": 0.9415481832543444, "grad_norm": 18.719866253308787, "learning_rate": 8.674029256577016e-06, "loss": 3.6237592697143555, "step": 1192 }, { "epoch": 0.9423380726698263, "grad_norm": 5.9652452107958736, "learning_rate": 8.670910572863376e-06, "loss": 2.7660140991210938, "step": 1193 }, { "epoch": 0.943127962085308, "grad_norm": 13.08925807761372, "learning_rate": 8.667788787952395e-06, "loss": 2.949338674545288, "step": 1194 }, { "epoch": 0.9439178515007899, "grad_norm": 8.927437335126726, "learning_rate": 8.664663904481367e-06, "loss": 2.4335386753082275, "step": 1195 }, { "epoch": 0.9447077409162717, "grad_norm": 13.919010389840851, "learning_rate": 8.661535925090205e-06, "loss": 2.838738441467285, "step": 1196 }, { "epoch": 0.9454976303317536, "grad_norm": 11.103546933243214, "learning_rate": 8.658404852421436e-06, "loss": 2.6665523052215576, "step": 1197 }, { "epoch": 0.9462875197472354, "grad_norm": 8.53073315483788, "learning_rate": 8.655270689120201e-06, "loss": 2.4120519161224365, "step": 1198 }, { "epoch": 0.9470774091627172, "grad_norm": 8.850565917179576, "learning_rate": 8.652133437834251e-06, "loss": 2.8354744911193848, "step": 1199 }, { "epoch": 0.9478672985781991, "grad_norm": 11.217378073646433, "learning_rate": 8.648993101213944e-06, "loss": 2.960960626602173, "step": 1200 }, { "epoch": 0.9486571879936809, "grad_norm": 10.898057483385264, "learning_rate": 8.645849681912253e-06, "loss": 2.597686767578125, "step": 1201 }, { "epoch": 0.9494470774091627, "grad_norm": 6.67140736518561, "learning_rate": 8.642703182584743e-06, "loss": 2.3367583751678467, "step": 1202 }, { "epoch": 0.9502369668246445, "grad_norm": 9.820716233427401, "learning_rate": 8.639553605889588e-06, "loss": 2.967700481414795, "step": 1203 }, { "epoch": 0.9510268562401264, "grad_norm": 7.868965032787582, "learning_rate": 8.636400954487563e-06, "loss": 3.142509937286377, "step": 1204 }, { "epoch": 0.9518167456556083, "grad_norm": 9.187837043838497, "learning_rate": 8.633245231042038e-06, "loss": 2.2670068740844727, "step": 1205 }, { "epoch": 0.95260663507109, "grad_norm": 9.246511337142001, "learning_rate": 8.630086438218976e-06, "loss": 2.476592540740967, "step": 1206 }, { "epoch": 0.9533965244865719, "grad_norm": 11.415361373055639, "learning_rate": 8.626924578686937e-06, "loss": 3.100846767425537, "step": 1207 }, { "epoch": 0.9541864139020537, "grad_norm": 21.60356512140913, "learning_rate": 8.623759655117072e-06, "loss": 2.767620801925659, "step": 1208 }, { "epoch": 0.9549763033175356, "grad_norm": 12.131835094302646, "learning_rate": 8.620591670183116e-06, "loss": 2.683656692504883, "step": 1209 }, { "epoch": 0.9557661927330173, "grad_norm": 14.171005073002354, "learning_rate": 8.617420626561394e-06, "loss": 1.9756850004196167, "step": 1210 }, { "epoch": 0.9565560821484992, "grad_norm": 18.6056036289689, "learning_rate": 8.614246526930816e-06, "loss": 2.502706527709961, "step": 1211 }, { "epoch": 0.957345971563981, "grad_norm": 9.743897509731939, "learning_rate": 8.61106937397287e-06, "loss": 2.774007797241211, "step": 1212 }, { "epoch": 0.9581358609794629, "grad_norm": 8.660394055988597, "learning_rate": 8.607889170371627e-06, "loss": 3.046370506286621, "step": 1213 }, { "epoch": 0.9589257503949447, "grad_norm": 15.639249371215595, "learning_rate": 8.604705918813729e-06, "loss": 2.9602065086364746, "step": 1214 }, { "epoch": 0.9597156398104265, "grad_norm": 18.947552868983692, "learning_rate": 8.601519621988402e-06, "loss": 2.3588128089904785, "step": 1215 }, { "epoch": 0.9605055292259084, "grad_norm": 8.36234759685658, "learning_rate": 8.59833028258744e-06, "loss": 2.4646201133728027, "step": 1216 }, { "epoch": 0.9612954186413902, "grad_norm": 12.494361272602035, "learning_rate": 8.595137903305205e-06, "loss": 2.6268255710601807, "step": 1217 }, { "epoch": 0.9620853080568721, "grad_norm": 24.950930936425536, "learning_rate": 8.591942486838629e-06, "loss": 2.661611318588257, "step": 1218 }, { "epoch": 0.9628751974723538, "grad_norm": 19.123936642513613, "learning_rate": 8.588744035887214e-06, "loss": 2.785714626312256, "step": 1219 }, { "epoch": 0.9636650868878357, "grad_norm": 11.666101454211423, "learning_rate": 8.585542553153017e-06, "loss": 3.2729837894439697, "step": 1220 }, { "epoch": 0.9644549763033176, "grad_norm": 5.6088413152939856, "learning_rate": 8.582338041340668e-06, "loss": 2.532158613204956, "step": 1221 }, { "epoch": 0.9652448657187994, "grad_norm": 10.219761775257519, "learning_rate": 8.579130503157343e-06, "loss": 3.0070722103118896, "step": 1222 }, { "epoch": 0.9660347551342812, "grad_norm": 9.057639040127485, "learning_rate": 8.575919941312782e-06, "loss": 2.773895263671875, "step": 1223 }, { "epoch": 0.966824644549763, "grad_norm": 7.267537688630795, "learning_rate": 8.57270635851928e-06, "loss": 3.1114962100982666, "step": 1224 }, { "epoch": 0.9676145339652449, "grad_norm": 24.757703365939474, "learning_rate": 8.569489757491681e-06, "loss": 3.395624876022339, "step": 1225 }, { "epoch": 0.9684044233807267, "grad_norm": 11.88925219508561, "learning_rate": 8.566270140947381e-06, "loss": 2.6596829891204834, "step": 1226 }, { "epoch": 0.9691943127962085, "grad_norm": 7.189319095204579, "learning_rate": 8.563047511606322e-06, "loss": 2.3300154209136963, "step": 1227 }, { "epoch": 0.9699842022116903, "grad_norm": 7.840035719762905, "learning_rate": 8.559821872190993e-06, "loss": 2.5409016609191895, "step": 1228 }, { "epoch": 0.9707740916271722, "grad_norm": 13.281718877860365, "learning_rate": 8.556593225426424e-06, "loss": 2.4998364448547363, "step": 1229 }, { "epoch": 0.9715639810426541, "grad_norm": 5.968185731325506, "learning_rate": 8.553361574040185e-06, "loss": 2.6161770820617676, "step": 1230 }, { "epoch": 0.9723538704581358, "grad_norm": 8.962503347087875, "learning_rate": 8.550126920762389e-06, "loss": 2.6376500129699707, "step": 1231 }, { "epoch": 0.9731437598736177, "grad_norm": 15.044203306326212, "learning_rate": 8.546889268325678e-06, "loss": 2.7106094360351562, "step": 1232 }, { "epoch": 0.9739336492890995, "grad_norm": 9.519163730532522, "learning_rate": 8.543648619465232e-06, "loss": 2.801136016845703, "step": 1233 }, { "epoch": 0.9747235387045814, "grad_norm": 15.500075730991261, "learning_rate": 8.540404976918766e-06, "loss": 3.1230263710021973, "step": 1234 }, { "epoch": 0.9755134281200631, "grad_norm": 11.401461873064811, "learning_rate": 8.537158343426515e-06, "loss": 2.6839194297790527, "step": 1235 }, { "epoch": 0.976303317535545, "grad_norm": 19.111412494475047, "learning_rate": 8.53390872173125e-06, "loss": 2.720089912414551, "step": 1236 }, { "epoch": 0.9770932069510269, "grad_norm": 8.343576881810694, "learning_rate": 8.530656114578258e-06, "loss": 2.9516029357910156, "step": 1237 }, { "epoch": 0.9778830963665087, "grad_norm": 12.186373113060878, "learning_rate": 8.527400524715355e-06, "loss": 3.0438735485076904, "step": 1238 }, { "epoch": 0.9786729857819905, "grad_norm": 12.434900360729275, "learning_rate": 8.524141954892872e-06, "loss": 2.8403427600860596, "step": 1239 }, { "epoch": 0.9794628751974723, "grad_norm": 8.116131181660196, "learning_rate": 8.52088040786366e-06, "loss": 2.6644649505615234, "step": 1240 }, { "epoch": 0.9802527646129542, "grad_norm": 9.291988122273889, "learning_rate": 8.517615886383087e-06, "loss": 2.3029625415802, "step": 1241 }, { "epoch": 0.981042654028436, "grad_norm": 14.010297592734409, "learning_rate": 8.514348393209029e-06, "loss": 3.050496816635132, "step": 1242 }, { "epoch": 0.9818325434439179, "grad_norm": 7.98145981401455, "learning_rate": 8.511077931101875e-06, "loss": 2.6487746238708496, "step": 1243 }, { "epoch": 0.9826224328593997, "grad_norm": 12.609272032565443, "learning_rate": 8.507804502824524e-06, "loss": 2.459247589111328, "step": 1244 }, { "epoch": 0.9834123222748815, "grad_norm": 16.863961424074052, "learning_rate": 8.504528111142376e-06, "loss": 2.5375590324401855, "step": 1245 }, { "epoch": 0.9842022116903634, "grad_norm": 11.57902809675688, "learning_rate": 8.501248758823342e-06, "loss": 2.7621288299560547, "step": 1246 }, { "epoch": 0.9849921011058452, "grad_norm": 11.15937321516801, "learning_rate": 8.497966448637825e-06, "loss": 2.9258103370666504, "step": 1247 }, { "epoch": 0.985781990521327, "grad_norm": 19.29175817663025, "learning_rate": 8.494681183358735e-06, "loss": 2.59159255027771, "step": 1248 }, { "epoch": 0.9865718799368088, "grad_norm": 8.989580829357356, "learning_rate": 8.491392965761472e-06, "loss": 2.8336706161499023, "step": 1249 }, { "epoch": 0.9873617693522907, "grad_norm": 24.49208358302412, "learning_rate": 8.488101798623934e-06, "loss": 3.0984175205230713, "step": 1250 }, { "epoch": 0.9881516587677726, "grad_norm": 11.127902851400203, "learning_rate": 8.484807684726513e-06, "loss": 3.016453981399536, "step": 1251 }, { "epoch": 0.9889415481832543, "grad_norm": 11.677023812873243, "learning_rate": 8.481510626852082e-06, "loss": 2.415400266647339, "step": 1252 }, { "epoch": 0.9897314375987362, "grad_norm": 6.718579596480926, "learning_rate": 8.478210627786008e-06, "loss": 3.216947078704834, "step": 1253 }, { "epoch": 0.990521327014218, "grad_norm": 16.963884976243023, "learning_rate": 8.474907690316143e-06, "loss": 2.9188220500946045, "step": 1254 }, { "epoch": 0.9913112164296999, "grad_norm": 18.12845817805615, "learning_rate": 8.47160181723282e-06, "loss": 2.854201078414917, "step": 1255 }, { "epoch": 0.9921011058451816, "grad_norm": 10.387630225459594, "learning_rate": 8.468293011328848e-06, "loss": 2.473886489868164, "step": 1256 }, { "epoch": 0.9928909952606635, "grad_norm": 10.817521575505426, "learning_rate": 8.46498127539952e-06, "loss": 2.68951153755188, "step": 1257 }, { "epoch": 0.9936808846761453, "grad_norm": 8.916073091749583, "learning_rate": 8.4616666122426e-06, "loss": 3.3685083389282227, "step": 1258 }, { "epoch": 0.9944707740916272, "grad_norm": 18.44181632696859, "learning_rate": 8.458349024658326e-06, "loss": 3.140796184539795, "step": 1259 }, { "epoch": 0.995260663507109, "grad_norm": 15.378117707047336, "learning_rate": 8.455028515449407e-06, "loss": 3.070150852203369, "step": 1260 }, { "epoch": 0.9960505529225908, "grad_norm": 14.235029232259741, "learning_rate": 8.451705087421023e-06, "loss": 2.6762990951538086, "step": 1261 }, { "epoch": 0.9968404423380727, "grad_norm": 14.728149790719765, "learning_rate": 8.448378743380816e-06, "loss": 2.3728647232055664, "step": 1262 }, { "epoch": 0.9976303317535545, "grad_norm": 16.863014645031626, "learning_rate": 8.445049486138887e-06, "loss": 2.7437081336975098, "step": 1263 }, { "epoch": 0.9984202211690363, "grad_norm": 14.200104485327296, "learning_rate": 8.441717318507811e-06, "loss": 2.6188509464263916, "step": 1264 }, { "epoch": 0.9992101105845181, "grad_norm": 10.67666931812201, "learning_rate": 8.438382243302609e-06, "loss": 2.8497185707092285, "step": 1265 }, { "epoch": 1.0, "grad_norm": 13.692512039732208, "learning_rate": 8.435044263340766e-06, "loss": 3.2503585815429688, "step": 1266 }, { "epoch": 1.0007898894154819, "grad_norm": 8.63492881542288, "learning_rate": 8.431703381442219e-06, "loss": 2.442643165588379, "step": 1267 }, { "epoch": 1.0015797788309637, "grad_norm": 12.845854999880341, "learning_rate": 8.428359600429352e-06, "loss": 1.3568103313446045, "step": 1268 }, { "epoch": 1.0023696682464456, "grad_norm": 9.925730237026748, "learning_rate": 8.425012923127007e-06, "loss": 1.8518157005310059, "step": 1269 }, { "epoch": 1.0031595576619274, "grad_norm": 11.088071862507528, "learning_rate": 8.421663352362465e-06, "loss": 1.3870903253555298, "step": 1270 }, { "epoch": 1.003949447077409, "grad_norm": 10.346802750253469, "learning_rate": 8.418310890965456e-06, "loss": 1.3119181394577026, "step": 1271 }, { "epoch": 1.004739336492891, "grad_norm": 9.99087600807621, "learning_rate": 8.414955541768148e-06, "loss": 1.453743577003479, "step": 1272 }, { "epoch": 1.0055292259083728, "grad_norm": 15.017657166973448, "learning_rate": 8.411597307605156e-06, "loss": 1.2929190397262573, "step": 1273 }, { "epoch": 1.0063191153238547, "grad_norm": 7.93537553692124, "learning_rate": 8.408236191313523e-06, "loss": 1.5082969665527344, "step": 1274 }, { "epoch": 1.0071090047393365, "grad_norm": 15.016597946841564, "learning_rate": 8.40487219573273e-06, "loss": 0.6322309374809265, "step": 1275 }, { "epoch": 1.0078988941548184, "grad_norm": 20.805758505260766, "learning_rate": 8.401505323704697e-06, "loss": 1.5762568712234497, "step": 1276 }, { "epoch": 1.0086887835703002, "grad_norm": 32.14895799539077, "learning_rate": 8.398135578073763e-06, "loss": 1.2138099670410156, "step": 1277 }, { "epoch": 1.009478672985782, "grad_norm": 19.797132562379517, "learning_rate": 8.394762961686706e-06, "loss": 2.1060357093811035, "step": 1278 }, { "epoch": 1.0102685624012637, "grad_norm": 9.137353495256644, "learning_rate": 8.391387477392718e-06, "loss": 1.1503310203552246, "step": 1279 }, { "epoch": 1.0110584518167456, "grad_norm": 10.486340395654485, "learning_rate": 8.38800912804342e-06, "loss": 1.0871237516403198, "step": 1280 }, { "epoch": 1.0118483412322274, "grad_norm": 13.381876680770137, "learning_rate": 8.384627916492856e-06, "loss": 2.127761125564575, "step": 1281 }, { "epoch": 1.0126382306477093, "grad_norm": 7.526496737374128, "learning_rate": 8.381243845597482e-06, "loss": 1.72287917137146, "step": 1282 }, { "epoch": 1.0134281200631912, "grad_norm": 11.304822146622485, "learning_rate": 8.377856918216171e-06, "loss": 1.1881051063537598, "step": 1283 }, { "epoch": 1.014218009478673, "grad_norm": 9.044097061649525, "learning_rate": 8.374467137210216e-06, "loss": 1.359776496887207, "step": 1284 }, { "epoch": 1.0150078988941549, "grad_norm": 17.550879838319734, "learning_rate": 8.371074505443309e-06, "loss": 1.7276735305786133, "step": 1285 }, { "epoch": 1.0157977883096367, "grad_norm": 15.204121265895848, "learning_rate": 8.367679025781559e-06, "loss": 1.7062349319458008, "step": 1286 }, { "epoch": 1.0165876777251184, "grad_norm": 11.379360194095462, "learning_rate": 8.36428070109348e-06, "loss": 1.3219001293182373, "step": 1287 }, { "epoch": 1.0173775671406002, "grad_norm": 16.20401037301683, "learning_rate": 8.360879534249984e-06, "loss": 1.7655143737792969, "step": 1288 }, { "epoch": 1.018167456556082, "grad_norm": 17.858221806964767, "learning_rate": 8.35747552812439e-06, "loss": 0.791549563407898, "step": 1289 }, { "epoch": 1.018957345971564, "grad_norm": 12.734062958157567, "learning_rate": 8.354068685592414e-06, "loss": 2.4911937713623047, "step": 1290 }, { "epoch": 1.0197472353870458, "grad_norm": 12.18144169507221, "learning_rate": 8.350659009532169e-06, "loss": 1.9642527103424072, "step": 1291 }, { "epoch": 1.0205371248025277, "grad_norm": 8.090065379000105, "learning_rate": 8.347246502824158e-06, "loss": 1.8792918920516968, "step": 1292 }, { "epoch": 1.0213270142180095, "grad_norm": 10.304570516520158, "learning_rate": 8.343831168351279e-06, "loss": 1.270374059677124, "step": 1293 }, { "epoch": 1.0221169036334914, "grad_norm": 11.563660724782284, "learning_rate": 8.340413008998818e-06, "loss": 1.5317769050598145, "step": 1294 }, { "epoch": 1.0229067930489733, "grad_norm": 15.047299484001131, "learning_rate": 8.336992027654446e-06, "loss": 1.1459553241729736, "step": 1295 }, { "epoch": 1.0236966824644549, "grad_norm": 8.413193349303429, "learning_rate": 8.333568227208221e-06, "loss": 1.7619214057922363, "step": 1296 }, { "epoch": 1.0244865718799367, "grad_norm": 17.455533064998168, "learning_rate": 8.330141610552582e-06, "loss": 1.3511468172073364, "step": 1297 }, { "epoch": 1.0252764612954186, "grad_norm": 14.604139592871947, "learning_rate": 8.326712180582343e-06, "loss": 1.5933293104171753, "step": 1298 }, { "epoch": 1.0260663507109005, "grad_norm": 27.973131188038252, "learning_rate": 8.323279940194697e-06, "loss": 1.5758417844772339, "step": 1299 }, { "epoch": 1.0268562401263823, "grad_norm": 8.089809601055391, "learning_rate": 8.319844892289218e-06, "loss": 1.7407735586166382, "step": 1300 }, { "epoch": 1.0276461295418642, "grad_norm": 10.840887730701892, "learning_rate": 8.316407039767839e-06, "loss": 1.3549420833587646, "step": 1301 }, { "epoch": 1.028436018957346, "grad_norm": 12.182733951436646, "learning_rate": 8.312966385534873e-06, "loss": 0.9703428745269775, "step": 1302 }, { "epoch": 1.029225908372828, "grad_norm": 14.482542096417097, "learning_rate": 8.309522932496994e-06, "loss": 1.5941872596740723, "step": 1303 }, { "epoch": 1.0300157977883095, "grad_norm": 11.163985480995702, "learning_rate": 8.306076683563245e-06, "loss": 1.6860015392303467, "step": 1304 }, { "epoch": 1.0308056872037914, "grad_norm": 18.11399534891752, "learning_rate": 8.302627641645025e-06, "loss": 2.1102190017700195, "step": 1305 }, { "epoch": 1.0315955766192733, "grad_norm": 9.377953260206045, "learning_rate": 8.299175809656099e-06, "loss": 1.077817440032959, "step": 1306 }, { "epoch": 1.0323854660347551, "grad_norm": 14.295491793789918, "learning_rate": 8.295721190512584e-06, "loss": 1.9387364387512207, "step": 1307 }, { "epoch": 1.033175355450237, "grad_norm": 11.182947577292902, "learning_rate": 8.292263787132955e-06, "loss": 1.3131287097930908, "step": 1308 }, { "epoch": 1.0339652448657188, "grad_norm": 9.345350781275434, "learning_rate": 8.288803602438037e-06, "loss": 1.1507320404052734, "step": 1309 }, { "epoch": 1.0347551342812007, "grad_norm": 9.05362446143137, "learning_rate": 8.285340639351005e-06, "loss": 1.400825023651123, "step": 1310 }, { "epoch": 1.0355450236966826, "grad_norm": 12.34226551872451, "learning_rate": 8.281874900797384e-06, "loss": 1.3061918020248413, "step": 1311 }, { "epoch": 1.0363349131121642, "grad_norm": 12.863696082340267, "learning_rate": 8.278406389705038e-06, "loss": 1.2435519695281982, "step": 1312 }, { "epoch": 1.037124802527646, "grad_norm": 9.912824621386932, "learning_rate": 8.274935109004179e-06, "loss": 1.968496561050415, "step": 1313 }, { "epoch": 1.037914691943128, "grad_norm": 6.595291570397836, "learning_rate": 8.271461061627356e-06, "loss": 1.566641092300415, "step": 1314 }, { "epoch": 1.0387045813586098, "grad_norm": 13.422406591433509, "learning_rate": 8.267984250509456e-06, "loss": 1.5585157871246338, "step": 1315 }, { "epoch": 1.0394944707740916, "grad_norm": 15.367990128433833, "learning_rate": 8.264504678587697e-06, "loss": 1.4819953441619873, "step": 1316 }, { "epoch": 1.0402843601895735, "grad_norm": 14.804150275595717, "learning_rate": 8.261022348801638e-06, "loss": 1.8838303089141846, "step": 1317 }, { "epoch": 1.0410742496050553, "grad_norm": 10.730588920504516, "learning_rate": 8.257537264093158e-06, "loss": 1.101423978805542, "step": 1318 }, { "epoch": 1.0418641390205372, "grad_norm": 20.04793554392532, "learning_rate": 8.254049427406472e-06, "loss": 1.2617835998535156, "step": 1319 }, { "epoch": 1.042654028436019, "grad_norm": 8.148456102914626, "learning_rate": 8.250558841688114e-06, "loss": 1.9862099885940552, "step": 1320 }, { "epoch": 1.0434439178515007, "grad_norm": 10.559290761333362, "learning_rate": 8.247065509886943e-06, "loss": 1.2836475372314453, "step": 1321 }, { "epoch": 1.0442338072669826, "grad_norm": 10.381133061098705, "learning_rate": 8.243569434954136e-06, "loss": 1.5940964221954346, "step": 1322 }, { "epoch": 1.0450236966824644, "grad_norm": 12.297214798561669, "learning_rate": 8.24007061984319e-06, "loss": 1.687330722808838, "step": 1323 }, { "epoch": 1.0458135860979463, "grad_norm": 13.218655430214906, "learning_rate": 8.236569067509914e-06, "loss": 1.3215546607971191, "step": 1324 }, { "epoch": 1.0466034755134281, "grad_norm": 11.341162960064988, "learning_rate": 8.233064780912431e-06, "loss": 1.0560698509216309, "step": 1325 }, { "epoch": 1.04739336492891, "grad_norm": 22.19384496177019, "learning_rate": 8.229557763011176e-06, "loss": 1.9986159801483154, "step": 1326 }, { "epoch": 1.0481832543443919, "grad_norm": 11.222176676313888, "learning_rate": 8.226048016768888e-06, "loss": 1.0500805377960205, "step": 1327 }, { "epoch": 1.0489731437598737, "grad_norm": 8.465112348761927, "learning_rate": 8.222535545150612e-06, "loss": 1.7865219116210938, "step": 1328 }, { "epoch": 1.0497630331753554, "grad_norm": 10.856718757186945, "learning_rate": 8.219020351123695e-06, "loss": 0.9995932579040527, "step": 1329 }, { "epoch": 1.0505529225908372, "grad_norm": 12.139594241353372, "learning_rate": 8.215502437657785e-06, "loss": 1.1767065525054932, "step": 1330 }, { "epoch": 1.051342812006319, "grad_norm": 21.039747631184284, "learning_rate": 8.211981807724828e-06, "loss": 1.5868887901306152, "step": 1331 }, { "epoch": 1.052132701421801, "grad_norm": 10.89948650178149, "learning_rate": 8.208458464299061e-06, "loss": 1.2937088012695312, "step": 1332 }, { "epoch": 1.0529225908372828, "grad_norm": 11.666317075229031, "learning_rate": 8.20493241035702e-06, "loss": 1.497636318206787, "step": 1333 }, { "epoch": 1.0537124802527646, "grad_norm": 11.771023636832185, "learning_rate": 8.201403648877523e-06, "loss": 1.822361707687378, "step": 1334 }, { "epoch": 1.0545023696682465, "grad_norm": 9.652940358468332, "learning_rate": 8.197872182841684e-06, "loss": 1.5103296041488647, "step": 1335 }, { "epoch": 1.0552922590837284, "grad_norm": 11.614519754388345, "learning_rate": 8.194338015232893e-06, "loss": 1.5001356601715088, "step": 1336 }, { "epoch": 1.05608214849921, "grad_norm": 9.35354809288492, "learning_rate": 8.190801149036828e-06, "loss": 1.7350692749023438, "step": 1337 }, { "epoch": 1.0568720379146919, "grad_norm": 30.14331426380188, "learning_rate": 8.187261587241444e-06, "loss": 1.2237708568572998, "step": 1338 }, { "epoch": 1.0576619273301737, "grad_norm": 16.364193953877205, "learning_rate": 8.183719332836976e-06, "loss": 1.5238933563232422, "step": 1339 }, { "epoch": 1.0584518167456556, "grad_norm": 11.363557290265472, "learning_rate": 8.180174388815933e-06, "loss": 1.208627700805664, "step": 1340 }, { "epoch": 1.0592417061611374, "grad_norm": 14.373459768882784, "learning_rate": 8.176626758173093e-06, "loss": 2.0444703102111816, "step": 1341 }, { "epoch": 1.0600315955766193, "grad_norm": 18.032219552306884, "learning_rate": 8.173076443905506e-06, "loss": 1.2289549112319946, "step": 1342 }, { "epoch": 1.0608214849921012, "grad_norm": 10.187495171136964, "learning_rate": 8.16952344901249e-06, "loss": 1.079569935798645, "step": 1343 }, { "epoch": 1.061611374407583, "grad_norm": 26.979192881381238, "learning_rate": 8.16596777649563e-06, "loss": 3.324129343032837, "step": 1344 }, { "epoch": 1.0624012638230647, "grad_norm": 10.606069599550024, "learning_rate": 8.162409429358764e-06, "loss": 1.3011245727539062, "step": 1345 }, { "epoch": 1.0631911532385465, "grad_norm": 9.756876263614647, "learning_rate": 8.158848410608e-06, "loss": 3.0105724334716797, "step": 1346 }, { "epoch": 1.0639810426540284, "grad_norm": 11.406355450796742, "learning_rate": 8.155284723251697e-06, "loss": 1.46226167678833, "step": 1347 }, { "epoch": 1.0647709320695102, "grad_norm": 13.374416212880506, "learning_rate": 8.15171837030047e-06, "loss": 1.8406553268432617, "step": 1348 }, { "epoch": 1.065560821484992, "grad_norm": 10.14760836082322, "learning_rate": 8.148149354767183e-06, "loss": 2.1282176971435547, "step": 1349 }, { "epoch": 1.066350710900474, "grad_norm": 9.351026637130168, "learning_rate": 8.144577679666955e-06, "loss": 1.2611757516860962, "step": 1350 }, { "epoch": 1.0671406003159558, "grad_norm": 12.303709820191473, "learning_rate": 8.14100334801715e-06, "loss": 1.4453020095825195, "step": 1351 }, { "epoch": 1.0679304897314377, "grad_norm": 8.355205018000369, "learning_rate": 8.13742636283737e-06, "loss": 0.9628135561943054, "step": 1352 }, { "epoch": 1.0687203791469195, "grad_norm": 9.764330703940649, "learning_rate": 8.13384672714947e-06, "loss": 1.3544663190841675, "step": 1353 }, { "epoch": 1.0695102685624012, "grad_norm": 12.402831239510688, "learning_rate": 8.130264443977534e-06, "loss": 2.4541101455688477, "step": 1354 }, { "epoch": 1.070300157977883, "grad_norm": 14.656238462966826, "learning_rate": 8.126679516347887e-06, "loss": 1.269155502319336, "step": 1355 }, { "epoch": 1.0710900473933649, "grad_norm": 15.277650232839177, "learning_rate": 8.123091947289089e-06, "loss": 1.9917043447494507, "step": 1356 }, { "epoch": 1.0718799368088467, "grad_norm": 8.257937346753295, "learning_rate": 8.119501739831929e-06, "loss": 1.520797610282898, "step": 1357 }, { "epoch": 1.0726698262243286, "grad_norm": 12.639910986990902, "learning_rate": 8.11590889700943e-06, "loss": 1.22843599319458, "step": 1358 }, { "epoch": 1.0734597156398105, "grad_norm": 10.525028379122327, "learning_rate": 8.11231342185683e-06, "loss": 1.971350908279419, "step": 1359 }, { "epoch": 1.0742496050552923, "grad_norm": 12.312343896152003, "learning_rate": 8.108715317411606e-06, "loss": 1.4550845623016357, "step": 1360 }, { "epoch": 1.0750394944707742, "grad_norm": 15.40492755500279, "learning_rate": 8.105114586713442e-06, "loss": 1.4124548435211182, "step": 1361 }, { "epoch": 1.0758293838862558, "grad_norm": 11.11985081751917, "learning_rate": 8.101511232804251e-06, "loss": 1.9308778047561646, "step": 1362 }, { "epoch": 1.0766192733017377, "grad_norm": 7.884346798092688, "learning_rate": 8.097905258728158e-06, "loss": 1.8403844833374023, "step": 1363 }, { "epoch": 1.0774091627172195, "grad_norm": 10.235048213024509, "learning_rate": 8.094296667531501e-06, "loss": 2.4593963623046875, "step": 1364 }, { "epoch": 1.0781990521327014, "grad_norm": 15.402993599882011, "learning_rate": 8.090685462262828e-06, "loss": 1.2213027477264404, "step": 1365 }, { "epoch": 1.0789889415481833, "grad_norm": 13.057398754067725, "learning_rate": 8.0870716459729e-06, "loss": 1.3943471908569336, "step": 1366 }, { "epoch": 1.0797788309636651, "grad_norm": 11.831339156526893, "learning_rate": 8.08345522171468e-06, "loss": 1.2609305381774902, "step": 1367 }, { "epoch": 1.080568720379147, "grad_norm": 10.352369944456171, "learning_rate": 8.079836192543335e-06, "loss": 1.6797740459442139, "step": 1368 }, { "epoch": 1.0813586097946288, "grad_norm": 8.167152293191428, "learning_rate": 8.076214561516234e-06, "loss": 1.5496795177459717, "step": 1369 }, { "epoch": 1.0821484992101107, "grad_norm": 10.632786869980947, "learning_rate": 8.07259033169294e-06, "loss": 1.1447343826293945, "step": 1370 }, { "epoch": 1.0829383886255923, "grad_norm": 9.772931829234892, "learning_rate": 8.068963506135214e-06, "loss": 1.840114712715149, "step": 1371 }, { "epoch": 1.0837282780410742, "grad_norm": 12.315109646538895, "learning_rate": 8.065334087907016e-06, "loss": 1.73477041721344, "step": 1372 }, { "epoch": 1.084518167456556, "grad_norm": 18.168778117659386, "learning_rate": 8.061702080074483e-06, "loss": 2.225961923599243, "step": 1373 }, { "epoch": 1.085308056872038, "grad_norm": 32.98324986784259, "learning_rate": 8.05806748570595e-06, "loss": 1.1636614799499512, "step": 1374 }, { "epoch": 1.0860979462875198, "grad_norm": 9.324662001634294, "learning_rate": 8.054430307871939e-06, "loss": 1.8258980512619019, "step": 1375 }, { "epoch": 1.0868878357030016, "grad_norm": 9.190806390754526, "learning_rate": 8.050790549645141e-06, "loss": 1.2510772943496704, "step": 1376 }, { "epoch": 1.0876777251184835, "grad_norm": 16.506042815565262, "learning_rate": 8.047148214100445e-06, "loss": 1.6223645210266113, "step": 1377 }, { "epoch": 1.0884676145339653, "grad_norm": 13.858600832931351, "learning_rate": 8.043503304314901e-06, "loss": 1.3856348991394043, "step": 1378 }, { "epoch": 1.089257503949447, "grad_norm": 10.071290706752858, "learning_rate": 8.039855823367744e-06, "loss": 0.7631658911705017, "step": 1379 }, { "epoch": 1.0900473933649288, "grad_norm": 12.6598160611525, "learning_rate": 8.036205774340378e-06, "loss": 1.197335958480835, "step": 1380 }, { "epoch": 1.0908372827804107, "grad_norm": 11.751428435172853, "learning_rate": 8.032553160316374e-06, "loss": 1.2686131000518799, "step": 1381 }, { "epoch": 1.0916271721958926, "grad_norm": 10.67438658042041, "learning_rate": 8.028897984381475e-06, "loss": 1.5238006114959717, "step": 1382 }, { "epoch": 1.0924170616113744, "grad_norm": 11.743621256195373, "learning_rate": 8.025240249623583e-06, "loss": 1.500381588935852, "step": 1383 }, { "epoch": 1.0932069510268563, "grad_norm": 7.559878550374157, "learning_rate": 8.021579959132768e-06, "loss": 1.3321250677108765, "step": 1384 }, { "epoch": 1.0939968404423381, "grad_norm": 23.292812697121704, "learning_rate": 8.017917116001253e-06, "loss": 2.0412609577178955, "step": 1385 }, { "epoch": 1.09478672985782, "grad_norm": 13.85465573871678, "learning_rate": 8.01425172332342e-06, "loss": 2.5268537998199463, "step": 1386 }, { "epoch": 1.0955766192733019, "grad_norm": 9.779613579870743, "learning_rate": 8.010583784195804e-06, "loss": 1.6717355251312256, "step": 1387 }, { "epoch": 1.0963665086887835, "grad_norm": 8.068370833710562, "learning_rate": 8.006913301717097e-06, "loss": 1.9225399494171143, "step": 1388 }, { "epoch": 1.0971563981042654, "grad_norm": 27.77102947325972, "learning_rate": 8.003240278988127e-06, "loss": 1.9709285497665405, "step": 1389 }, { "epoch": 1.0979462875197472, "grad_norm": 14.301520105323926, "learning_rate": 7.999564719111884e-06, "loss": 1.1914315223693848, "step": 1390 }, { "epoch": 1.098736176935229, "grad_norm": 8.296671084591999, "learning_rate": 7.995886625193484e-06, "loss": 1.6483365297317505, "step": 1391 }, { "epoch": 1.099526066350711, "grad_norm": 13.348402769682588, "learning_rate": 7.9922060003402e-06, "loss": 1.258619785308838, "step": 1392 }, { "epoch": 1.1003159557661928, "grad_norm": 11.776544553295185, "learning_rate": 7.988522847661432e-06, "loss": 1.841566801071167, "step": 1393 }, { "epoch": 1.1011058451816746, "grad_norm": 10.875063836555396, "learning_rate": 7.984837170268725e-06, "loss": 1.3339906930923462, "step": 1394 }, { "epoch": 1.1018957345971563, "grad_norm": 13.070070571580835, "learning_rate": 7.981148971275744e-06, "loss": 1.6276012659072876, "step": 1395 }, { "epoch": 1.1026856240126381, "grad_norm": 8.908917736629908, "learning_rate": 7.977458253798296e-06, "loss": 1.7726579904556274, "step": 1396 }, { "epoch": 1.10347551342812, "grad_norm": 18.167256312241264, "learning_rate": 7.973765020954312e-06, "loss": 1.8422517776489258, "step": 1397 }, { "epoch": 1.1042654028436019, "grad_norm": 7.381573039738737, "learning_rate": 7.970069275863844e-06, "loss": 1.8285367488861084, "step": 1398 }, { "epoch": 1.1050552922590837, "grad_norm": 13.52831771786112, "learning_rate": 7.96637102164907e-06, "loss": 1.3294421434402466, "step": 1399 }, { "epoch": 1.1058451816745656, "grad_norm": 10.887865537597344, "learning_rate": 7.96267026143429e-06, "loss": 1.9184372425079346, "step": 1400 }, { "epoch": 1.1066350710900474, "grad_norm": 14.487647084460335, "learning_rate": 7.95896699834592e-06, "loss": 1.6796505451202393, "step": 1401 }, { "epoch": 1.1074249605055293, "grad_norm": 11.249265183893405, "learning_rate": 7.955261235512483e-06, "loss": 1.4529224634170532, "step": 1402 }, { "epoch": 1.1082148499210112, "grad_norm": 9.645198748151126, "learning_rate": 7.951552976064623e-06, "loss": 1.2036597728729248, "step": 1403 }, { "epoch": 1.1090047393364928, "grad_norm": 6.984305487322073, "learning_rate": 7.947842223135089e-06, "loss": 1.6848506927490234, "step": 1404 }, { "epoch": 1.1097946287519747, "grad_norm": 10.21639035313384, "learning_rate": 7.944128979858739e-06, "loss": 2.247422218322754, "step": 1405 }, { "epoch": 1.1105845181674565, "grad_norm": 11.108091790322238, "learning_rate": 7.940413249372529e-06, "loss": 1.2086236476898193, "step": 1406 }, { "epoch": 1.1113744075829384, "grad_norm": 16.112732227769964, "learning_rate": 7.936695034815527e-06, "loss": 1.3832933902740479, "step": 1407 }, { "epoch": 1.1121642969984202, "grad_norm": 10.731967743134, "learning_rate": 7.932974339328887e-06, "loss": 1.68961763381958, "step": 1408 }, { "epoch": 1.112954186413902, "grad_norm": 9.149310423822971, "learning_rate": 7.929251166055867e-06, "loss": 0.9486649632453918, "step": 1409 }, { "epoch": 1.113744075829384, "grad_norm": 9.704188919512742, "learning_rate": 7.925525518141816e-06, "loss": 1.669637680053711, "step": 1410 }, { "epoch": 1.1145339652448658, "grad_norm": 12.16446463953121, "learning_rate": 7.921797398734174e-06, "loss": 1.8166450262069702, "step": 1411 }, { "epoch": 1.1153238546603474, "grad_norm": 13.23762039499484, "learning_rate": 7.918066810982468e-06, "loss": 2.1429643630981445, "step": 1412 }, { "epoch": 1.1161137440758293, "grad_norm": 11.441036394254365, "learning_rate": 7.914333758038311e-06, "loss": 1.7969956398010254, "step": 1413 }, { "epoch": 1.1169036334913112, "grad_norm": 11.37805305383338, "learning_rate": 7.910598243055396e-06, "loss": 1.394661784172058, "step": 1414 }, { "epoch": 1.117693522906793, "grad_norm": 10.617508342494792, "learning_rate": 7.906860269189504e-06, "loss": 2.4616918563842773, "step": 1415 }, { "epoch": 1.1184834123222749, "grad_norm": 15.908117706983552, "learning_rate": 7.903119839598482e-06, "loss": 1.3925102949142456, "step": 1416 }, { "epoch": 1.1192733017377567, "grad_norm": 6.98257561740011, "learning_rate": 7.899376957442262e-06, "loss": 1.6068451404571533, "step": 1417 }, { "epoch": 1.1200631911532386, "grad_norm": 8.912600940978086, "learning_rate": 7.89563162588284e-06, "loss": 1.1992769241333008, "step": 1418 }, { "epoch": 1.1208530805687205, "grad_norm": 8.211754082626802, "learning_rate": 7.891883848084283e-06, "loss": 1.857825517654419, "step": 1419 }, { "epoch": 1.1216429699842023, "grad_norm": 9.581409453425586, "learning_rate": 7.88813362721273e-06, "loss": 1.6415996551513672, "step": 1420 }, { "epoch": 1.122432859399684, "grad_norm": 18.329912927070012, "learning_rate": 7.884380966436379e-06, "loss": 1.7880502939224243, "step": 1421 }, { "epoch": 1.1232227488151658, "grad_norm": 9.810032143627515, "learning_rate": 7.880625868925487e-06, "loss": 1.8203599452972412, "step": 1422 }, { "epoch": 1.1240126382306477, "grad_norm": 9.06670473315355, "learning_rate": 7.876868337852377e-06, "loss": 1.596606731414795, "step": 1423 }, { "epoch": 1.1248025276461295, "grad_norm": 9.942294890261461, "learning_rate": 7.87310837639142e-06, "loss": 1.4776908159255981, "step": 1424 }, { "epoch": 1.1255924170616114, "grad_norm": 10.54945063589167, "learning_rate": 7.869345987719047e-06, "loss": 1.6829514503479004, "step": 1425 }, { "epoch": 1.1263823064770933, "grad_norm": 11.580449343278849, "learning_rate": 7.865581175013735e-06, "loss": 1.1238837242126465, "step": 1426 }, { "epoch": 1.1271721958925751, "grad_norm": 18.360177466539106, "learning_rate": 7.86181394145601e-06, "loss": 1.6330945491790771, "step": 1427 }, { "epoch": 1.1279620853080567, "grad_norm": 13.657848488790902, "learning_rate": 7.858044290228443e-06, "loss": 1.6493346691131592, "step": 1428 }, { "epoch": 1.1287519747235386, "grad_norm": 8.8436841483474, "learning_rate": 7.854272224515646e-06, "loss": 1.7612037658691406, "step": 1429 }, { "epoch": 1.1295418641390205, "grad_norm": 15.349132583927682, "learning_rate": 7.850497747504278e-06, "loss": 1.4374094009399414, "step": 1430 }, { "epoch": 1.1303317535545023, "grad_norm": 7.20960688510771, "learning_rate": 7.846720862383024e-06, "loss": 1.5491845607757568, "step": 1431 }, { "epoch": 1.1311216429699842, "grad_norm": 8.9481729357505, "learning_rate": 7.842941572342613e-06, "loss": 1.082542896270752, "step": 1432 }, { "epoch": 1.131911532385466, "grad_norm": 11.741346122561193, "learning_rate": 7.839159880575798e-06, "loss": 1.0406155586242676, "step": 1433 }, { "epoch": 1.132701421800948, "grad_norm": 12.852699840775466, "learning_rate": 7.835375790277367e-06, "loss": 1.5804665088653564, "step": 1434 }, { "epoch": 1.1334913112164298, "grad_norm": 11.234205111215255, "learning_rate": 7.831589304644132e-06, "loss": 1.6930797100067139, "step": 1435 }, { "epoch": 1.1342812006319116, "grad_norm": 12.465436878993371, "learning_rate": 7.827800426874927e-06, "loss": 2.350858211517334, "step": 1436 }, { "epoch": 1.1350710900473935, "grad_norm": 10.417673706008966, "learning_rate": 7.824009160170611e-06, "loss": 2.1081159114837646, "step": 1437 }, { "epoch": 1.1358609794628751, "grad_norm": 6.972975321299533, "learning_rate": 7.820215507734057e-06, "loss": 1.550991415977478, "step": 1438 }, { "epoch": 1.136650868878357, "grad_norm": 9.653383800691586, "learning_rate": 7.816419472770156e-06, "loss": 2.1447458267211914, "step": 1439 }, { "epoch": 1.1374407582938388, "grad_norm": 10.486086790783105, "learning_rate": 7.812621058485807e-06, "loss": 1.8012946844100952, "step": 1440 }, { "epoch": 1.1382306477093207, "grad_norm": 18.588695506303544, "learning_rate": 7.808820268089928e-06, "loss": 0.923102617263794, "step": 1441 }, { "epoch": 1.1390205371248026, "grad_norm": 11.289408037095642, "learning_rate": 7.805017104793438e-06, "loss": 1.2512264251708984, "step": 1442 }, { "epoch": 1.1398104265402844, "grad_norm": 17.187322631022983, "learning_rate": 7.801211571809258e-06, "loss": 2.007840633392334, "step": 1443 }, { "epoch": 1.1406003159557663, "grad_norm": 13.141510083418034, "learning_rate": 7.79740367235232e-06, "loss": 1.7152661085128784, "step": 1444 }, { "epoch": 1.141390205371248, "grad_norm": 15.112494999687053, "learning_rate": 7.793593409639547e-06, "loss": 2.0199451446533203, "step": 1445 }, { "epoch": 1.1421800947867298, "grad_norm": 85.2167253034368, "learning_rate": 7.789780786889862e-06, "loss": 2.34489369392395, "step": 1446 }, { "epoch": 1.1429699842022116, "grad_norm": 9.312272447955333, "learning_rate": 7.785965807324182e-06, "loss": 1.8030688762664795, "step": 1447 }, { "epoch": 1.1437598736176935, "grad_norm": 6.989138103509471, "learning_rate": 7.782148474165414e-06, "loss": 1.0236456394195557, "step": 1448 }, { "epoch": 1.1445497630331753, "grad_norm": 13.66043355155644, "learning_rate": 7.778328790638454e-06, "loss": 1.432175636291504, "step": 1449 }, { "epoch": 1.1453396524486572, "grad_norm": 11.738420362584556, "learning_rate": 7.774506759970183e-06, "loss": 1.8172495365142822, "step": 1450 }, { "epoch": 1.146129541864139, "grad_norm": 12.2141920832457, "learning_rate": 7.770682385389461e-06, "loss": 1.540172815322876, "step": 1451 }, { "epoch": 1.146919431279621, "grad_norm": 11.67045045892725, "learning_rate": 7.76685567012714e-06, "loss": 1.0208656787872314, "step": 1452 }, { "epoch": 1.1477093206951028, "grad_norm": 19.223683414914778, "learning_rate": 7.763026617416037e-06, "loss": 2.1578786373138428, "step": 1453 }, { "epoch": 1.1484992101105844, "grad_norm": 11.975180223168232, "learning_rate": 7.759195230490948e-06, "loss": 1.0847947597503662, "step": 1454 }, { "epoch": 1.1492890995260663, "grad_norm": 14.479350754911671, "learning_rate": 7.755361512588641e-06, "loss": 0.8910523653030396, "step": 1455 }, { "epoch": 1.1500789889415481, "grad_norm": 12.322055451503076, "learning_rate": 7.751525466947858e-06, "loss": 1.9108871221542358, "step": 1456 }, { "epoch": 1.15086887835703, "grad_norm": 8.486429198821625, "learning_rate": 7.747687096809298e-06, "loss": 1.3047826290130615, "step": 1457 }, { "epoch": 1.1516587677725119, "grad_norm": 22.216033260635363, "learning_rate": 7.74384640541563e-06, "loss": 1.3196589946746826, "step": 1458 }, { "epoch": 1.1524486571879937, "grad_norm": 13.4404210054278, "learning_rate": 7.740003396011485e-06, "loss": 1.7318623065948486, "step": 1459 }, { "epoch": 1.1532385466034756, "grad_norm": 10.120534079823207, "learning_rate": 7.736158071843446e-06, "loss": 1.0442365407943726, "step": 1460 }, { "epoch": 1.1540284360189574, "grad_norm": 11.889433421917458, "learning_rate": 7.73231043616006e-06, "loss": 1.421532392501831, "step": 1461 }, { "epoch": 1.154818325434439, "grad_norm": 8.828012658172716, "learning_rate": 7.728460492211818e-06, "loss": 1.5032670497894287, "step": 1462 }, { "epoch": 1.155608214849921, "grad_norm": 16.251861602372543, "learning_rate": 7.724608243251168e-06, "loss": 1.5522558689117432, "step": 1463 }, { "epoch": 1.1563981042654028, "grad_norm": 14.565593393435774, "learning_rate": 7.720753692532503e-06, "loss": 1.4599350690841675, "step": 1464 }, { "epoch": 1.1571879936808847, "grad_norm": 8.480351738134685, "learning_rate": 7.716896843312158e-06, "loss": 1.7275468111038208, "step": 1465 }, { "epoch": 1.1579778830963665, "grad_norm": 13.805554305973507, "learning_rate": 7.713037698848414e-06, "loss": 1.4946775436401367, "step": 1466 }, { "epoch": 1.1587677725118484, "grad_norm": 14.030328008782464, "learning_rate": 7.709176262401488e-06, "loss": 1.6568756103515625, "step": 1467 }, { "epoch": 1.1595576619273302, "grad_norm": 18.97968668941531, "learning_rate": 7.705312537233534e-06, "loss": 2.4846177101135254, "step": 1468 }, { "epoch": 1.160347551342812, "grad_norm": 21.074569384938723, "learning_rate": 7.70144652660864e-06, "loss": 1.6268469095230103, "step": 1469 }, { "epoch": 1.161137440758294, "grad_norm": 11.221094852791536, "learning_rate": 7.697578233792824e-06, "loss": 1.4419642686843872, "step": 1470 }, { "epoch": 1.1619273301737756, "grad_norm": 12.283969197549489, "learning_rate": 7.693707662054033e-06, "loss": 2.8970489501953125, "step": 1471 }, { "epoch": 1.1627172195892574, "grad_norm": 10.191761634757446, "learning_rate": 7.68983481466214e-06, "loss": 1.694975733757019, "step": 1472 }, { "epoch": 1.1635071090047393, "grad_norm": 9.871959664749303, "learning_rate": 7.685959694888935e-06, "loss": 1.5678069591522217, "step": 1473 }, { "epoch": 1.1642969984202212, "grad_norm": 13.739000495877802, "learning_rate": 7.682082306008138e-06, "loss": 2.2661375999450684, "step": 1474 }, { "epoch": 1.165086887835703, "grad_norm": 13.029810676854682, "learning_rate": 7.678202651295377e-06, "loss": 1.779642105102539, "step": 1475 }, { "epoch": 1.1658767772511849, "grad_norm": 11.679296000479976, "learning_rate": 7.674320734028193e-06, "loss": 0.8130778074264526, "step": 1476 }, { "epoch": 1.1666666666666667, "grad_norm": 14.423216823781184, "learning_rate": 7.67043655748605e-06, "loss": 1.6942588090896606, "step": 1477 }, { "epoch": 1.1674565560821484, "grad_norm": 11.441884700380314, "learning_rate": 7.66655012495031e-06, "loss": 1.179840087890625, "step": 1478 }, { "epoch": 1.1682464454976302, "grad_norm": 16.02771109914067, "learning_rate": 7.662661439704241e-06, "loss": 2.1548268795013428, "step": 1479 }, { "epoch": 1.169036334913112, "grad_norm": 14.394873535724342, "learning_rate": 7.658770505033022e-06, "loss": 1.1767363548278809, "step": 1480 }, { "epoch": 1.169826224328594, "grad_norm": 16.396153231459706, "learning_rate": 7.654877324223723e-06, "loss": 1.0473990440368652, "step": 1481 }, { "epoch": 1.1706161137440758, "grad_norm": 36.207662154699634, "learning_rate": 7.65098190056532e-06, "loss": 1.4607884883880615, "step": 1482 }, { "epoch": 1.1714060031595577, "grad_norm": 17.99136981853802, "learning_rate": 7.647084237348676e-06, "loss": 1.0910999774932861, "step": 1483 }, { "epoch": 1.1721958925750395, "grad_norm": 17.53587070270741, "learning_rate": 7.643184337866551e-06, "loss": 1.9439430236816406, "step": 1484 }, { "epoch": 1.1729857819905214, "grad_norm": 21.036905024705902, "learning_rate": 7.639282205413595e-06, "loss": 1.2512264251708984, "step": 1485 }, { "epoch": 1.1737756714060033, "grad_norm": 14.69734973276274, "learning_rate": 7.63537784328634e-06, "loss": 1.736374855041504, "step": 1486 }, { "epoch": 1.174565560821485, "grad_norm": 10.589187353935765, "learning_rate": 7.631471254783205e-06, "loss": 2.0629525184631348, "step": 1487 }, { "epoch": 1.1753554502369667, "grad_norm": 12.298150667488217, "learning_rate": 7.627562443204488e-06, "loss": 1.75897216796875, "step": 1488 }, { "epoch": 1.1761453396524486, "grad_norm": 11.144800996692458, "learning_rate": 7.623651411852369e-06, "loss": 1.8783044815063477, "step": 1489 }, { "epoch": 1.1769352290679305, "grad_norm": 18.274971673090146, "learning_rate": 7.619738164030894e-06, "loss": 1.2765934467315674, "step": 1490 }, { "epoch": 1.1777251184834123, "grad_norm": 13.609333078610245, "learning_rate": 7.615822703045993e-06, "loss": 0.9716304540634155, "step": 1491 }, { "epoch": 1.1785150078988942, "grad_norm": 9.06953052854543, "learning_rate": 7.611905032205459e-06, "loss": 1.2346336841583252, "step": 1492 }, { "epoch": 1.179304897314376, "grad_norm": 7.894635099442645, "learning_rate": 7.607985154818951e-06, "loss": 1.3905161619186401, "step": 1493 }, { "epoch": 1.180094786729858, "grad_norm": 11.930990729479253, "learning_rate": 7.6040630741979985e-06, "loss": 1.5520501136779785, "step": 1494 }, { "epoch": 1.1808846761453395, "grad_norm": 13.518366545034592, "learning_rate": 7.6001387936559845e-06, "loss": 1.478689193725586, "step": 1495 }, { "epoch": 1.1816745655608214, "grad_norm": 13.55754756002125, "learning_rate": 7.596212316508155e-06, "loss": 1.195493221282959, "step": 1496 }, { "epoch": 1.1824644549763033, "grad_norm": 9.14918248953396, "learning_rate": 7.592283646071612e-06, "loss": 1.777099609375, "step": 1497 }, { "epoch": 1.1832543443917851, "grad_norm": 12.578589306666348, "learning_rate": 7.588352785665307e-06, "loss": 1.780318260192871, "step": 1498 }, { "epoch": 1.184044233807267, "grad_norm": 16.331287548330316, "learning_rate": 7.5844197386100475e-06, "loss": 0.6511062383651733, "step": 1499 }, { "epoch": 1.1848341232227488, "grad_norm": 16.33379318813503, "learning_rate": 7.58048450822848e-06, "loss": 1.3710471391677856, "step": 1500 }, { "epoch": 1.1856240126382307, "grad_norm": 7.772998081101554, "learning_rate": 7.576547097845105e-06, "loss": 1.412032127380371, "step": 1501 }, { "epoch": 1.1864139020537126, "grad_norm": 8.398419142031567, "learning_rate": 7.572607510786256e-06, "loss": 1.2059210538864136, "step": 1502 }, { "epoch": 1.1872037914691944, "grad_norm": 9.901903204130111, "learning_rate": 7.568665750380111e-06, "loss": 1.7022660970687866, "step": 1503 }, { "epoch": 1.187993680884676, "grad_norm": 19.304850784714255, "learning_rate": 7.56472181995668e-06, "loss": 0.9760904908180237, "step": 1504 }, { "epoch": 1.188783570300158, "grad_norm": 7.87702786044823, "learning_rate": 7.560775722847811e-06, "loss": 1.2958579063415527, "step": 1505 }, { "epoch": 1.1895734597156398, "grad_norm": 8.790058222382367, "learning_rate": 7.556827462387179e-06, "loss": 1.5401803255081177, "step": 1506 }, { "epoch": 1.1903633491311216, "grad_norm": 10.161647102455948, "learning_rate": 7.5528770419102845e-06, "loss": 1.2612812519073486, "step": 1507 }, { "epoch": 1.1911532385466035, "grad_norm": 8.100962154198587, "learning_rate": 7.548924464754462e-06, "loss": 1.4908281564712524, "step": 1508 }, { "epoch": 1.1919431279620853, "grad_norm": 7.739278263317995, "learning_rate": 7.544969734258856e-06, "loss": 1.3893706798553467, "step": 1509 }, { "epoch": 1.1927330173775672, "grad_norm": 9.236436622854567, "learning_rate": 7.54101285376444e-06, "loss": 1.428430438041687, "step": 1510 }, { "epoch": 1.193522906793049, "grad_norm": 9.21444761020881, "learning_rate": 7.537053826613995e-06, "loss": 1.2131574153900146, "step": 1511 }, { "epoch": 1.1943127962085307, "grad_norm": 12.77391408135766, "learning_rate": 7.533092656152123e-06, "loss": 1.4747178554534912, "step": 1512 }, { "epoch": 1.1951026856240126, "grad_norm": 10.144190882358012, "learning_rate": 7.529129345725234e-06, "loss": 1.2598638534545898, "step": 1513 }, { "epoch": 1.1958925750394944, "grad_norm": 11.632673780290576, "learning_rate": 7.525163898681547e-06, "loss": 2.219787120819092, "step": 1514 }, { "epoch": 1.1966824644549763, "grad_norm": 11.100505677689917, "learning_rate": 7.521196318371082e-06, "loss": 1.492173433303833, "step": 1515 }, { "epoch": 1.1974723538704581, "grad_norm": 13.348061220126512, "learning_rate": 7.517226608145665e-06, "loss": 1.3286212682724, "step": 1516 }, { "epoch": 1.19826224328594, "grad_norm": 10.85425623441688, "learning_rate": 7.51325477135892e-06, "loss": 1.711487054824829, "step": 1517 }, { "epoch": 1.1990521327014219, "grad_norm": 10.554910275401799, "learning_rate": 7.509280811366268e-06, "loss": 1.6877734661102295, "step": 1518 }, { "epoch": 1.1998420221169037, "grad_norm": 9.907008317230424, "learning_rate": 7.505304731524925e-06, "loss": 1.5272674560546875, "step": 1519 }, { "epoch": 1.2006319115323856, "grad_norm": 6.706994018715486, "learning_rate": 7.501326535193893e-06, "loss": 1.4311678409576416, "step": 1520 }, { "epoch": 1.2014218009478672, "grad_norm": 10.997085955938047, "learning_rate": 7.497346225733968e-06, "loss": 2.118659734725952, "step": 1521 }, { "epoch": 1.202211690363349, "grad_norm": 11.689892035649317, "learning_rate": 7.493363806507727e-06, "loss": 1.3729009628295898, "step": 1522 }, { "epoch": 1.203001579778831, "grad_norm": 9.397482204646643, "learning_rate": 7.489379280879532e-06, "loss": 1.257511854171753, "step": 1523 }, { "epoch": 1.2037914691943128, "grad_norm": 11.597065678347706, "learning_rate": 7.485392652215524e-06, "loss": 2.0744144916534424, "step": 1524 }, { "epoch": 1.2045813586097947, "grad_norm": 9.18002114901742, "learning_rate": 7.481403923883618e-06, "loss": 2.4062094688415527, "step": 1525 }, { "epoch": 1.2053712480252765, "grad_norm": 11.51200612045866, "learning_rate": 7.477413099253507e-06, "loss": 1.3634181022644043, "step": 1526 }, { "epoch": 1.2061611374407584, "grad_norm": 9.886739815987795, "learning_rate": 7.4734201816966504e-06, "loss": 1.8432646989822388, "step": 1527 }, { "epoch": 1.20695102685624, "grad_norm": 7.821912561074209, "learning_rate": 7.46942517458628e-06, "loss": 1.6155750751495361, "step": 1528 }, { "epoch": 1.2077409162717219, "grad_norm": 208.73695380738994, "learning_rate": 7.46542808129739e-06, "loss": 2.2105181217193604, "step": 1529 }, { "epoch": 1.2085308056872037, "grad_norm": 9.442612830841552, "learning_rate": 7.461428905206738e-06, "loss": 1.035463571548462, "step": 1530 }, { "epoch": 1.2093206951026856, "grad_norm": 14.92511763923537, "learning_rate": 7.457427649692842e-06, "loss": 1.388323187828064, "step": 1531 }, { "epoch": 1.2101105845181674, "grad_norm": 10.04534923313997, "learning_rate": 7.453424318135976e-06, "loss": 1.5381313562393188, "step": 1532 }, { "epoch": 1.2109004739336493, "grad_norm": 10.045984329726831, "learning_rate": 7.4494189139181664e-06, "loss": 2.077467441558838, "step": 1533 }, { "epoch": 1.2116903633491312, "grad_norm": 11.346686651643333, "learning_rate": 7.445411440423191e-06, "loss": 1.4641904830932617, "step": 1534 }, { "epoch": 1.212480252764613, "grad_norm": 10.573921142627707, "learning_rate": 7.4414019010365804e-06, "loss": 1.4743311405181885, "step": 1535 }, { "epoch": 1.2132701421800949, "grad_norm": 13.824451819029829, "learning_rate": 7.437390299145602e-06, "loss": 1.4657893180847168, "step": 1536 }, { "epoch": 1.2140600315955767, "grad_norm": 7.208254750492127, "learning_rate": 7.433376638139273e-06, "loss": 1.7224773168563843, "step": 1537 }, { "epoch": 1.2148499210110584, "grad_norm": 14.6258689437837, "learning_rate": 7.429360921408348e-06, "loss": 1.240898847579956, "step": 1538 }, { "epoch": 1.2156398104265402, "grad_norm": 9.289403617691221, "learning_rate": 7.4253431523453156e-06, "loss": 1.5302990674972534, "step": 1539 }, { "epoch": 1.216429699842022, "grad_norm": 15.992588174167665, "learning_rate": 7.421323334344403e-06, "loss": 1.2926700115203857, "step": 1540 }, { "epoch": 1.217219589257504, "grad_norm": 13.29806452130727, "learning_rate": 7.417301470801563e-06, "loss": 1.858204960823059, "step": 1541 }, { "epoch": 1.2180094786729858, "grad_norm": 10.981694927001664, "learning_rate": 7.41327756511448e-06, "loss": 1.6751494407653809, "step": 1542 }, { "epoch": 1.2187993680884677, "grad_norm": 12.919094806936021, "learning_rate": 7.409251620682565e-06, "loss": 1.1227457523345947, "step": 1543 }, { "epoch": 1.2195892575039495, "grad_norm": 12.822642145949752, "learning_rate": 7.405223640906945e-06, "loss": 1.8671314716339111, "step": 1544 }, { "epoch": 1.2203791469194312, "grad_norm": 13.207524616649813, "learning_rate": 7.401193629190475e-06, "loss": 2.4761242866516113, "step": 1545 }, { "epoch": 1.221169036334913, "grad_norm": 10.043425267983269, "learning_rate": 7.397161588937718e-06, "loss": 1.4061156511306763, "step": 1546 }, { "epoch": 1.2219589257503949, "grad_norm": 10.369465986167071, "learning_rate": 7.393127523554959e-06, "loss": 1.6333423852920532, "step": 1547 }, { "epoch": 1.2227488151658767, "grad_norm": 28.920248006785016, "learning_rate": 7.389091436450185e-06, "loss": 2.198690414428711, "step": 1548 }, { "epoch": 1.2235387045813586, "grad_norm": 10.220829768400877, "learning_rate": 7.3850533310330995e-06, "loss": 1.6852078437805176, "step": 1549 }, { "epoch": 1.2243285939968405, "grad_norm": 10.219543177913568, "learning_rate": 7.381013210715106e-06, "loss": 1.4084906578063965, "step": 1550 }, { "epoch": 1.2251184834123223, "grad_norm": 11.198352523535156, "learning_rate": 7.376971078909312e-06, "loss": 1.3699990510940552, "step": 1551 }, { "epoch": 1.2259083728278042, "grad_norm": 11.335053711356307, "learning_rate": 7.3729269390305226e-06, "loss": 1.7044817209243774, "step": 1552 }, { "epoch": 1.226698262243286, "grad_norm": 12.010639946062005, "learning_rate": 7.36888079449524e-06, "loss": 1.949375033378601, "step": 1553 }, { "epoch": 1.2274881516587677, "grad_norm": 8.245851384242593, "learning_rate": 7.364832648721664e-06, "loss": 1.9039802551269531, "step": 1554 }, { "epoch": 1.2282780410742495, "grad_norm": 7.08878950015621, "learning_rate": 7.360782505129675e-06, "loss": 1.9304604530334473, "step": 1555 }, { "epoch": 1.2290679304897314, "grad_norm": 15.151405491025901, "learning_rate": 7.356730367140856e-06, "loss": 1.5305366516113281, "step": 1556 }, { "epoch": 1.2298578199052133, "grad_norm": 25.91795312714592, "learning_rate": 7.35267623817846e-06, "loss": 1.2220271825790405, "step": 1557 }, { "epoch": 1.2306477093206951, "grad_norm": 15.027153220859233, "learning_rate": 7.348620121667431e-06, "loss": 1.7458312511444092, "step": 1558 }, { "epoch": 1.231437598736177, "grad_norm": 11.869844226224354, "learning_rate": 7.34456202103439e-06, "loss": 1.1599485874176025, "step": 1559 }, { "epoch": 1.2322274881516588, "grad_norm": 9.271449236873991, "learning_rate": 7.3405019397076314e-06, "loss": 1.1800360679626465, "step": 1560 }, { "epoch": 1.2330173775671407, "grad_norm": 11.38399689838541, "learning_rate": 7.336439881117128e-06, "loss": 1.3739066123962402, "step": 1561 }, { "epoch": 1.2338072669826223, "grad_norm": 10.444548453126936, "learning_rate": 7.332375848694519e-06, "loss": 1.8069045543670654, "step": 1562 }, { "epoch": 1.2345971563981042, "grad_norm": 17.266827050977902, "learning_rate": 7.328309845873112e-06, "loss": 2.19236159324646, "step": 1563 }, { "epoch": 1.235387045813586, "grad_norm": 13.591954894799105, "learning_rate": 7.3242418760878805e-06, "loss": 2.5699267387390137, "step": 1564 }, { "epoch": 1.236176935229068, "grad_norm": 11.237452612969626, "learning_rate": 7.320171942775458e-06, "loss": 1.278207778930664, "step": 1565 }, { "epoch": 1.2369668246445498, "grad_norm": 17.089988693890064, "learning_rate": 7.316100049374139e-06, "loss": 1.3304542303085327, "step": 1566 }, { "epoch": 1.2377567140600316, "grad_norm": 11.92363646482957, "learning_rate": 7.312026199323869e-06, "loss": 1.8582803010940552, "step": 1567 }, { "epoch": 1.2385466034755135, "grad_norm": 12.460514274935408, "learning_rate": 7.307950396066257e-06, "loss": 1.5322096347808838, "step": 1568 }, { "epoch": 1.2393364928909953, "grad_norm": 12.477145420644185, "learning_rate": 7.303872643044545e-06, "loss": 1.9782072305679321, "step": 1569 }, { "epoch": 1.2401263823064772, "grad_norm": 12.74235377003251, "learning_rate": 7.299792943703642e-06, "loss": 1.2845888137817383, "step": 1570 }, { "epoch": 1.2409162717219588, "grad_norm": 10.673508111722988, "learning_rate": 7.295711301490084e-06, "loss": 1.6466004848480225, "step": 1571 }, { "epoch": 1.2417061611374407, "grad_norm": 9.652022285685392, "learning_rate": 7.291627719852059e-06, "loss": 1.3372915983200073, "step": 1572 }, { "epoch": 1.2424960505529226, "grad_norm": 9.596732477385675, "learning_rate": 7.287542202239392e-06, "loss": 1.8011666536331177, "step": 1573 }, { "epoch": 1.2432859399684044, "grad_norm": 7.083810704363881, "learning_rate": 7.283454752103538e-06, "loss": 1.1540056467056274, "step": 1574 }, { "epoch": 1.2440758293838863, "grad_norm": 9.393392719670336, "learning_rate": 7.27936537289759e-06, "loss": 1.677268385887146, "step": 1575 }, { "epoch": 1.2448657187993681, "grad_norm": 17.733645365732354, "learning_rate": 7.27527406807627e-06, "loss": 0.8276841640472412, "step": 1576 }, { "epoch": 1.24565560821485, "grad_norm": 13.067729880728594, "learning_rate": 7.271180841095924e-06, "loss": 1.3127690553665161, "step": 1577 }, { "epoch": 1.2464454976303316, "grad_norm": 13.042413994956872, "learning_rate": 7.267085695414525e-06, "loss": 1.4197359085083008, "step": 1578 }, { "epoch": 1.2472353870458135, "grad_norm": 10.928197034241155, "learning_rate": 7.262988634491664e-06, "loss": 1.261674165725708, "step": 1579 }, { "epoch": 1.2480252764612954, "grad_norm": 10.758652573808499, "learning_rate": 7.258889661788554e-06, "loss": 1.5431902408599854, "step": 1580 }, { "epoch": 1.2488151658767772, "grad_norm": 10.6083190072106, "learning_rate": 7.254788780768018e-06, "loss": 1.829581379890442, "step": 1581 }, { "epoch": 1.249605055292259, "grad_norm": 13.989251634457949, "learning_rate": 7.250685994894496e-06, "loss": 1.342024564743042, "step": 1582 }, { "epoch": 1.250394944707741, "grad_norm": 10.951019623163544, "learning_rate": 7.246581307634033e-06, "loss": 1.6949963569641113, "step": 1583 }, { "epoch": 1.2511848341232228, "grad_norm": 14.242065090086145, "learning_rate": 7.242474722454286e-06, "loss": 1.4241949319839478, "step": 1584 }, { "epoch": 1.2519747235387046, "grad_norm": 12.522283827649254, "learning_rate": 7.238366242824506e-06, "loss": 2.3611106872558594, "step": 1585 }, { "epoch": 1.2527646129541865, "grad_norm": 10.607084561048083, "learning_rate": 7.234255872215554e-06, "loss": 1.5256626605987549, "step": 1586 }, { "epoch": 1.2535545023696684, "grad_norm": 7.96307207456326, "learning_rate": 7.230143614099885e-06, "loss": 1.2228140830993652, "step": 1587 }, { "epoch": 1.25434439178515, "grad_norm": 9.008816824433687, "learning_rate": 7.226029471951545e-06, "loss": 1.4289093017578125, "step": 1588 }, { "epoch": 1.2551342812006319, "grad_norm": 20.920359812194434, "learning_rate": 7.221913449246178e-06, "loss": 1.3880183696746826, "step": 1589 }, { "epoch": 1.2559241706161137, "grad_norm": 15.238445561586087, "learning_rate": 7.217795549461012e-06, "loss": 1.5309922695159912, "step": 1590 }, { "epoch": 1.2567140600315956, "grad_norm": 16.4951650860319, "learning_rate": 7.213675776074862e-06, "loss": 1.835852026939392, "step": 1591 }, { "epoch": 1.2575039494470774, "grad_norm": 8.574885331151954, "learning_rate": 7.209554132568128e-06, "loss": 0.49211519956588745, "step": 1592 }, { "epoch": 1.2582938388625593, "grad_norm": 19.73342294585578, "learning_rate": 7.205430622422786e-06, "loss": 1.538137435913086, "step": 1593 }, { "epoch": 1.259083728278041, "grad_norm": 18.023327663051525, "learning_rate": 7.201305249122392e-06, "loss": 1.457167148590088, "step": 1594 }, { "epoch": 1.2598736176935228, "grad_norm": 12.076397086371236, "learning_rate": 7.197178016152076e-06, "loss": 1.1796395778656006, "step": 1595 }, { "epoch": 1.2606635071090047, "grad_norm": 10.685605988239777, "learning_rate": 7.193048926998535e-06, "loss": 1.7177990674972534, "step": 1596 }, { "epoch": 1.2614533965244865, "grad_norm": 11.711401184144716, "learning_rate": 7.188917985150038e-06, "loss": 1.2435717582702637, "step": 1597 }, { "epoch": 1.2622432859399684, "grad_norm": 11.385740810589741, "learning_rate": 7.184785194096421e-06, "loss": 1.6954989433288574, "step": 1598 }, { "epoch": 1.2630331753554502, "grad_norm": 11.48602327882758, "learning_rate": 7.180650557329076e-06, "loss": 1.3193989992141724, "step": 1599 }, { "epoch": 1.263823064770932, "grad_norm": 12.906600512802692, "learning_rate": 7.1765140783409615e-06, "loss": 1.6648807525634766, "step": 1600 }, { "epoch": 1.264612954186414, "grad_norm": 10.918513023394707, "learning_rate": 7.1723757606265845e-06, "loss": 1.1041438579559326, "step": 1601 }, { "epoch": 1.2654028436018958, "grad_norm": 10.843029403978079, "learning_rate": 7.16823560768201e-06, "loss": 1.7871309518814087, "step": 1602 }, { "epoch": 1.2661927330173777, "grad_norm": 19.286859146429332, "learning_rate": 7.164093623004858e-06, "loss": 2.269460439682007, "step": 1603 }, { "epoch": 1.2669826224328595, "grad_norm": 15.634640920458306, "learning_rate": 7.159949810094283e-06, "loss": 1.2490503787994385, "step": 1604 }, { "epoch": 1.2677725118483412, "grad_norm": 9.491676824429073, "learning_rate": 7.155804172450999e-06, "loss": 1.3973164558410645, "step": 1605 }, { "epoch": 1.268562401263823, "grad_norm": 8.966724738021117, "learning_rate": 7.151656713577248e-06, "loss": 1.924464225769043, "step": 1606 }, { "epoch": 1.2693522906793049, "grad_norm": 10.340849066157938, "learning_rate": 7.147507436976823e-06, "loss": 0.9530832171440125, "step": 1607 }, { "epoch": 1.2701421800947867, "grad_norm": 8.707473817662407, "learning_rate": 7.143356346155042e-06, "loss": 1.555940866470337, "step": 1608 }, { "epoch": 1.2709320695102686, "grad_norm": 10.219855234886426, "learning_rate": 7.139203444618762e-06, "loss": 1.6086714267730713, "step": 1609 }, { "epoch": 1.2717219589257505, "grad_norm": 8.944536325339534, "learning_rate": 7.135048735876368e-06, "loss": 1.2452785968780518, "step": 1610 }, { "epoch": 1.272511848341232, "grad_norm": 12.328677473799477, "learning_rate": 7.130892223437771e-06, "loss": 2.04856014251709, "step": 1611 }, { "epoch": 1.273301737756714, "grad_norm": 12.463645792972384, "learning_rate": 7.126733910814406e-06, "loss": 1.6133408546447754, "step": 1612 }, { "epoch": 1.2740916271721958, "grad_norm": 19.208477820593387, "learning_rate": 7.122573801519229e-06, "loss": 1.7257981300354004, "step": 1613 }, { "epoch": 1.2748815165876777, "grad_norm": 10.648285597019955, "learning_rate": 7.118411899066717e-06, "loss": 1.6325335502624512, "step": 1614 }, { "epoch": 1.2756714060031595, "grad_norm": 8.899110923088042, "learning_rate": 7.114248206972856e-06, "loss": 2.037642478942871, "step": 1615 }, { "epoch": 1.2764612954186414, "grad_norm": 11.20674724929018, "learning_rate": 7.110082728755147e-06, "loss": 1.7042773962020874, "step": 1616 }, { "epoch": 1.2772511848341233, "grad_norm": 18.487835882110616, "learning_rate": 7.105915467932601e-06, "loss": 1.3538520336151123, "step": 1617 }, { "epoch": 1.2780410742496051, "grad_norm": 7.198026203991742, "learning_rate": 7.101746428025732e-06, "loss": 1.396047592163086, "step": 1618 }, { "epoch": 1.278830963665087, "grad_norm": 13.95821454839482, "learning_rate": 7.097575612556561e-06, "loss": 1.2027523517608643, "step": 1619 }, { "epoch": 1.2796208530805688, "grad_norm": 11.242118092533596, "learning_rate": 7.0934030250486044e-06, "loss": 2.4239554405212402, "step": 1620 }, { "epoch": 1.2804107424960507, "grad_norm": 11.487912451476449, "learning_rate": 7.089228669026878e-06, "loss": 1.6502265930175781, "step": 1621 }, { "epoch": 1.2812006319115323, "grad_norm": 13.921903791621322, "learning_rate": 7.085052548017893e-06, "loss": 2.114908456802368, "step": 1622 }, { "epoch": 1.2819905213270142, "grad_norm": 7.371765331620391, "learning_rate": 7.08087466554965e-06, "loss": 1.7077139616012573, "step": 1623 }, { "epoch": 1.282780410742496, "grad_norm": 16.302760837877532, "learning_rate": 7.076695025151636e-06, "loss": 1.2382946014404297, "step": 1624 }, { "epoch": 1.283570300157978, "grad_norm": 13.124566165615319, "learning_rate": 7.072513630354827e-06, "loss": 2.201374053955078, "step": 1625 }, { "epoch": 1.2843601895734598, "grad_norm": 7.528109529798553, "learning_rate": 7.068330484691679e-06, "loss": 1.3300297260284424, "step": 1626 }, { "epoch": 1.2851500789889416, "grad_norm": 12.041483320882802, "learning_rate": 7.0641455916961244e-06, "loss": 0.9577473402023315, "step": 1627 }, { "epoch": 1.2859399684044233, "grad_norm": 8.602253519605528, "learning_rate": 7.0599589549035785e-06, "loss": 1.0266809463500977, "step": 1628 }, { "epoch": 1.2867298578199051, "grad_norm": 11.151691436114625, "learning_rate": 7.055770577850925e-06, "loss": 2.103332757949829, "step": 1629 }, { "epoch": 1.287519747235387, "grad_norm": 12.866943913296561, "learning_rate": 7.051580464076515e-06, "loss": 1.1621947288513184, "step": 1630 }, { "epoch": 1.2883096366508688, "grad_norm": 13.134566302958063, "learning_rate": 7.047388617120175e-06, "loss": 2.0336053371429443, "step": 1631 }, { "epoch": 1.2890995260663507, "grad_norm": 18.298472533368262, "learning_rate": 7.043195040523187e-06, "loss": 1.6731476783752441, "step": 1632 }, { "epoch": 1.2898894154818326, "grad_norm": 11.064624121392312, "learning_rate": 7.0389997378283034e-06, "loss": 1.4558773040771484, "step": 1633 }, { "epoch": 1.2906793048973144, "grad_norm": 12.035862755722269, "learning_rate": 7.034802712579725e-06, "loss": 1.4987773895263672, "step": 1634 }, { "epoch": 1.2914691943127963, "grad_norm": 11.000347165868366, "learning_rate": 7.030603968323115e-06, "loss": 1.652524471282959, "step": 1635 }, { "epoch": 1.2922590837282781, "grad_norm": 11.920424614729273, "learning_rate": 7.026403508605586e-06, "loss": 1.199878215789795, "step": 1636 }, { "epoch": 1.29304897314376, "grad_norm": 12.250643358349953, "learning_rate": 7.022201336975701e-06, "loss": 1.563902735710144, "step": 1637 }, { "epoch": 1.2938388625592416, "grad_norm": 10.264821730924035, "learning_rate": 7.017997456983465e-06, "loss": 1.4691765308380127, "step": 1638 }, { "epoch": 1.2946287519747235, "grad_norm": 13.243547862779414, "learning_rate": 7.013791872180333e-06, "loss": 2.1046786308288574, "step": 1639 }, { "epoch": 1.2954186413902053, "grad_norm": 7.356657896392653, "learning_rate": 7.009584586119198e-06, "loss": 1.6833205223083496, "step": 1640 }, { "epoch": 1.2962085308056872, "grad_norm": 7.547893143985833, "learning_rate": 7.005375602354385e-06, "loss": 1.406240463256836, "step": 1641 }, { "epoch": 1.296998420221169, "grad_norm": 10.735447099759265, "learning_rate": 7.00116492444166e-06, "loss": 1.8073036670684814, "step": 1642 }, { "epoch": 1.297788309636651, "grad_norm": 10.170543223857521, "learning_rate": 6.996952555938217e-06, "loss": 1.3641012907028198, "step": 1643 }, { "epoch": 1.2985781990521326, "grad_norm": 6.261340458786887, "learning_rate": 6.992738500402679e-06, "loss": 1.5656462907791138, "step": 1644 }, { "epoch": 1.2993680884676144, "grad_norm": 12.61218163311875, "learning_rate": 6.988522761395093e-06, "loss": 1.4655276536941528, "step": 1645 }, { "epoch": 1.3001579778830963, "grad_norm": 9.829462165844339, "learning_rate": 6.984305342476931e-06, "loss": 1.841139554977417, "step": 1646 }, { "epoch": 1.3009478672985781, "grad_norm": 10.428718423453438, "learning_rate": 6.980086247211082e-06, "loss": 1.9564460515975952, "step": 1647 }, { "epoch": 1.30173775671406, "grad_norm": 7.749615781587074, "learning_rate": 6.975865479161849e-06, "loss": 1.387686848640442, "step": 1648 }, { "epoch": 1.3025276461295419, "grad_norm": 9.844486088428916, "learning_rate": 6.971643041894953e-06, "loss": 0.7119489312171936, "step": 1649 }, { "epoch": 1.3033175355450237, "grad_norm": 8.565082994108224, "learning_rate": 6.967418938977524e-06, "loss": 1.407379150390625, "step": 1650 }, { "epoch": 1.3041074249605056, "grad_norm": 9.682368107994689, "learning_rate": 6.963193173978095e-06, "loss": 1.277189016342163, "step": 1651 }, { "epoch": 1.3048973143759874, "grad_norm": 13.513336697960028, "learning_rate": 6.9589657504666095e-06, "loss": 1.2872400283813477, "step": 1652 }, { "epoch": 1.3056872037914693, "grad_norm": 20.735938043035457, "learning_rate": 6.954736672014406e-06, "loss": 2.1411285400390625, "step": 1653 }, { "epoch": 1.3064770932069512, "grad_norm": 13.154183276627684, "learning_rate": 6.950505942194226e-06, "loss": 1.1476898193359375, "step": 1654 }, { "epoch": 1.3072669826224328, "grad_norm": 22.670571761450503, "learning_rate": 6.946273564580202e-06, "loss": 1.3200886249542236, "step": 1655 }, { "epoch": 1.3080568720379147, "grad_norm": 13.376955794870272, "learning_rate": 6.942039542747863e-06, "loss": 1.7086883783340454, "step": 1656 }, { "epoch": 1.3088467614533965, "grad_norm": 6.135115256463472, "learning_rate": 6.937803880274122e-06, "loss": 1.4040107727050781, "step": 1657 }, { "epoch": 1.3096366508688784, "grad_norm": 10.847847813759419, "learning_rate": 6.933566580737282e-06, "loss": 1.342395544052124, "step": 1658 }, { "epoch": 1.3104265402843602, "grad_norm": 9.846488420083572, "learning_rate": 6.929327647717028e-06, "loss": 1.7640855312347412, "step": 1659 }, { "epoch": 1.311216429699842, "grad_norm": 17.579714964873293, "learning_rate": 6.925087084794422e-06, "loss": 1.837497353553772, "step": 1660 }, { "epoch": 1.3120063191153237, "grad_norm": 13.308781650389735, "learning_rate": 6.92084489555191e-06, "loss": 1.5832990407943726, "step": 1661 }, { "epoch": 1.3127962085308056, "grad_norm": 15.704664883204273, "learning_rate": 6.916601083573302e-06, "loss": 2.004098653793335, "step": 1662 }, { "epoch": 1.3135860979462874, "grad_norm": 8.685136767072184, "learning_rate": 6.912355652443791e-06, "loss": 1.037060260772705, "step": 1663 }, { "epoch": 1.3143759873617693, "grad_norm": 10.120809843785937, "learning_rate": 6.908108605749925e-06, "loss": 1.7781083583831787, "step": 1664 }, { "epoch": 1.3151658767772512, "grad_norm": 8.220963621405001, "learning_rate": 6.903859947079625e-06, "loss": 1.7696709632873535, "step": 1665 }, { "epoch": 1.315955766192733, "grad_norm": 10.606659026468149, "learning_rate": 6.899609680022175e-06, "loss": 1.7073678970336914, "step": 1666 }, { "epoch": 1.3167456556082149, "grad_norm": 10.40542278789583, "learning_rate": 6.895357808168209e-06, "loss": 1.32874596118927, "step": 1667 }, { "epoch": 1.3175355450236967, "grad_norm": 14.900627896368178, "learning_rate": 6.8911043351097265e-06, "loss": 1.4731537103652954, "step": 1668 }, { "epoch": 1.3183254344391786, "grad_norm": 14.289865839749087, "learning_rate": 6.886849264440074e-06, "loss": 2.3647401332855225, "step": 1669 }, { "epoch": 1.3191153238546605, "grad_norm": 10.096520767718063, "learning_rate": 6.88259259975395e-06, "loss": 1.7499630451202393, "step": 1670 }, { "epoch": 1.319905213270142, "grad_norm": 12.58275455787123, "learning_rate": 6.878334344647399e-06, "loss": 1.4793438911437988, "step": 1671 }, { "epoch": 1.320695102685624, "grad_norm": 9.213580765946672, "learning_rate": 6.874074502717807e-06, "loss": 1.533569574356079, "step": 1672 }, { "epoch": 1.3214849921011058, "grad_norm": 11.233518853535507, "learning_rate": 6.869813077563905e-06, "loss": 1.5673787593841553, "step": 1673 }, { "epoch": 1.3222748815165877, "grad_norm": 11.5105493592369, "learning_rate": 6.865550072785757e-06, "loss": 1.8369773626327515, "step": 1674 }, { "epoch": 1.3230647709320695, "grad_norm": 14.469541616554176, "learning_rate": 6.861285491984764e-06, "loss": 1.5419373512268066, "step": 1675 }, { "epoch": 1.3238546603475514, "grad_norm": 10.063880181802128, "learning_rate": 6.857019338763657e-06, "loss": 1.5308325290679932, "step": 1676 }, { "epoch": 1.3246445497630333, "grad_norm": 10.589111964405951, "learning_rate": 6.852751616726501e-06, "loss": 1.3474477529525757, "step": 1677 }, { "epoch": 1.325434439178515, "grad_norm": 16.81222045065148, "learning_rate": 6.848482329478675e-06, "loss": 2.032487630844116, "step": 1678 }, { "epoch": 1.3262243285939967, "grad_norm": 11.866053591597979, "learning_rate": 6.844211480626892e-06, "loss": 1.1282556056976318, "step": 1679 }, { "epoch": 1.3270142180094786, "grad_norm": 9.019099813424168, "learning_rate": 6.839939073779177e-06, "loss": 1.6074413061141968, "step": 1680 }, { "epoch": 1.3278041074249605, "grad_norm": 8.783590478924218, "learning_rate": 6.835665112544871e-06, "loss": 1.224888801574707, "step": 1681 }, { "epoch": 1.3285939968404423, "grad_norm": 9.151154533135232, "learning_rate": 6.831389600534637e-06, "loss": 1.2797878980636597, "step": 1682 }, { "epoch": 1.3293838862559242, "grad_norm": 15.249803779803319, "learning_rate": 6.8271125413604344e-06, "loss": 1.8004652261734009, "step": 1683 }, { "epoch": 1.330173775671406, "grad_norm": 22.765578093916663, "learning_rate": 6.822833938635543e-06, "loss": 1.466485619544983, "step": 1684 }, { "epoch": 1.330963665086888, "grad_norm": 9.461043264779885, "learning_rate": 6.818553795974536e-06, "loss": 2.0474472045898438, "step": 1685 }, { "epoch": 1.3317535545023698, "grad_norm": 12.50340732157009, "learning_rate": 6.814272116993294e-06, "loss": 2.1818690299987793, "step": 1686 }, { "epoch": 1.3325434439178516, "grad_norm": 19.829927714642757, "learning_rate": 6.809988905308993e-06, "loss": 2.1175107955932617, "step": 1687 }, { "epoch": 1.3333333333333333, "grad_norm": 10.675607140843853, "learning_rate": 6.805704164540105e-06, "loss": 1.130906343460083, "step": 1688 }, { "epoch": 1.3341232227488151, "grad_norm": 10.39793547659673, "learning_rate": 6.8014178983063914e-06, "loss": 1.098733901977539, "step": 1689 }, { "epoch": 1.334913112164297, "grad_norm": 10.503557720518131, "learning_rate": 6.7971301102289054e-06, "loss": 1.2792344093322754, "step": 1690 }, { "epoch": 1.3357030015797788, "grad_norm": 10.340351227260813, "learning_rate": 6.792840803929987e-06, "loss": 1.5470017194747925, "step": 1691 }, { "epoch": 1.3364928909952607, "grad_norm": 9.882170112299617, "learning_rate": 6.78854998303325e-06, "loss": 1.1156797409057617, "step": 1692 }, { "epoch": 1.3372827804107426, "grad_norm": 10.827400927376479, "learning_rate": 6.7842576511636e-06, "loss": 1.2364952564239502, "step": 1693 }, { "epoch": 1.3380726698262242, "grad_norm": 12.60283058617565, "learning_rate": 6.779963811947211e-06, "loss": 1.4180421829223633, "step": 1694 }, { "epoch": 1.338862559241706, "grad_norm": 12.037561798280398, "learning_rate": 6.775668469011531e-06, "loss": 1.811230182647705, "step": 1695 }, { "epoch": 1.339652448657188, "grad_norm": 14.317321439652712, "learning_rate": 6.771371625985282e-06, "loss": 1.5977898836135864, "step": 1696 }, { "epoch": 1.3404423380726698, "grad_norm": 12.550990602775505, "learning_rate": 6.767073286498449e-06, "loss": 1.4557725191116333, "step": 1697 }, { "epoch": 1.3412322274881516, "grad_norm": 9.595105877395659, "learning_rate": 6.762773454182285e-06, "loss": 1.620849609375, "step": 1698 }, { "epoch": 1.3420221169036335, "grad_norm": 26.931211154992397, "learning_rate": 6.7584721326693024e-06, "loss": 1.621090292930603, "step": 1699 }, { "epoch": 1.3428120063191153, "grad_norm": 18.2341302921318, "learning_rate": 6.754169325593273e-06, "loss": 1.1882051229476929, "step": 1700 }, { "epoch": 1.3436018957345972, "grad_norm": 13.079885370924936, "learning_rate": 6.749865036589219e-06, "loss": 1.9173457622528076, "step": 1701 }, { "epoch": 1.344391785150079, "grad_norm": 10.73628142485319, "learning_rate": 6.7455592692934205e-06, "loss": 1.4644713401794434, "step": 1702 }, { "epoch": 1.345181674565561, "grad_norm": 13.507997760455746, "learning_rate": 6.741252027343403e-06, "loss": 1.3514494895935059, "step": 1703 }, { "epoch": 1.3459715639810428, "grad_norm": 9.434097289624326, "learning_rate": 6.736943314377939e-06, "loss": 1.4577107429504395, "step": 1704 }, { "epoch": 1.3467614533965244, "grad_norm": 12.60917651464132, "learning_rate": 6.732633134037043e-06, "loss": 1.3438491821289062, "step": 1705 }, { "epoch": 1.3475513428120063, "grad_norm": 34.45628221491453, "learning_rate": 6.7283214899619685e-06, "loss": 1.6202247142791748, "step": 1706 }, { "epoch": 1.3483412322274881, "grad_norm": 11.815572511405222, "learning_rate": 6.724008385795211e-06, "loss": 1.8743906021118164, "step": 1707 }, { "epoch": 1.34913112164297, "grad_norm": 14.955379827242334, "learning_rate": 6.719693825180491e-06, "loss": 1.5417712926864624, "step": 1708 }, { "epoch": 1.3499210110584519, "grad_norm": 13.982695999884012, "learning_rate": 6.7153778117627635e-06, "loss": 1.5238138437271118, "step": 1709 }, { "epoch": 1.3507109004739337, "grad_norm": 13.558773627084195, "learning_rate": 6.711060349188213e-06, "loss": 1.0729316473007202, "step": 1710 }, { "epoch": 1.3515007898894154, "grad_norm": 15.460928981412394, "learning_rate": 6.706741441104243e-06, "loss": 1.6075103282928467, "step": 1711 }, { "epoch": 1.3522906793048972, "grad_norm": 27.070598507663696, "learning_rate": 6.702421091159487e-06, "loss": 1.9741368293762207, "step": 1712 }, { "epoch": 1.353080568720379, "grad_norm": 9.716774266414612, "learning_rate": 6.698099303003784e-06, "loss": 1.7203664779663086, "step": 1713 }, { "epoch": 1.353870458135861, "grad_norm": 10.642254002327258, "learning_rate": 6.693776080288198e-06, "loss": 1.163581132888794, "step": 1714 }, { "epoch": 1.3546603475513428, "grad_norm": 8.496905620664727, "learning_rate": 6.689451426665004e-06, "loss": 2.0292768478393555, "step": 1715 }, { "epoch": 1.3554502369668247, "grad_norm": 10.126564260567976, "learning_rate": 6.685125345787679e-06, "loss": 2.220201015472412, "step": 1716 }, { "epoch": 1.3562401263823065, "grad_norm": 13.061476880554993, "learning_rate": 6.680797841310914e-06, "loss": 2.056443214416504, "step": 1717 }, { "epoch": 1.3570300157977884, "grad_norm": 13.47212904208046, "learning_rate": 6.676468916890597e-06, "loss": 1.468867540359497, "step": 1718 }, { "epoch": 1.3578199052132702, "grad_norm": 13.538453828063716, "learning_rate": 6.67213857618382e-06, "loss": 0.8965187072753906, "step": 1719 }, { "epoch": 1.358609794628752, "grad_norm": 7.238990017595394, "learning_rate": 6.667806822848865e-06, "loss": 1.4493942260742188, "step": 1720 }, { "epoch": 1.3593996840442337, "grad_norm": 13.06712117871877, "learning_rate": 6.663473660545217e-06, "loss": 1.303008794784546, "step": 1721 }, { "epoch": 1.3601895734597156, "grad_norm": 18.893117290210554, "learning_rate": 6.659139092933542e-06, "loss": 2.37463641166687, "step": 1722 }, { "epoch": 1.3609794628751974, "grad_norm": 10.071203220715486, "learning_rate": 6.6548031236756975e-06, "loss": 2.426710605621338, "step": 1723 }, { "epoch": 1.3617693522906793, "grad_norm": 7.544794430629949, "learning_rate": 6.650465756434724e-06, "loss": 0.8276800513267517, "step": 1724 }, { "epoch": 1.3625592417061612, "grad_norm": 9.200917908287058, "learning_rate": 6.6461269948748445e-06, "loss": 2.1654391288757324, "step": 1725 }, { "epoch": 1.363349131121643, "grad_norm": 25.451210894852924, "learning_rate": 6.64178684266146e-06, "loss": 1.5984547138214111, "step": 1726 }, { "epoch": 1.3641390205371249, "grad_norm": 12.38315288359827, "learning_rate": 6.637445303461143e-06, "loss": 0.9575186967849731, "step": 1727 }, { "epoch": 1.3649289099526065, "grad_norm": 13.455605811383814, "learning_rate": 6.633102380941643e-06, "loss": 1.4046566486358643, "step": 1728 }, { "epoch": 1.3657187993680884, "grad_norm": 11.193113951721786, "learning_rate": 6.628758078771873e-06, "loss": 2.0002331733703613, "step": 1729 }, { "epoch": 1.3665086887835702, "grad_norm": 10.231357721297691, "learning_rate": 6.624412400621916e-06, "loss": 1.3115997314453125, "step": 1730 }, { "epoch": 1.367298578199052, "grad_norm": 15.410181553876129, "learning_rate": 6.620065350163012e-06, "loss": 1.2699280977249146, "step": 1731 }, { "epoch": 1.368088467614534, "grad_norm": 10.808531808691974, "learning_rate": 6.615716931067566e-06, "loss": 2.0870327949523926, "step": 1732 }, { "epoch": 1.3688783570300158, "grad_norm": 9.638709809302068, "learning_rate": 6.6113671470091355e-06, "loss": 2.032163619995117, "step": 1733 }, { "epoch": 1.3696682464454977, "grad_norm": 9.573439798053808, "learning_rate": 6.607016001662434e-06, "loss": 1.5165016651153564, "step": 1734 }, { "epoch": 1.3704581358609795, "grad_norm": 16.637270126365415, "learning_rate": 6.602663498703323e-06, "loss": 1.579211711883545, "step": 1735 }, { "epoch": 1.3712480252764614, "grad_norm": 12.245545065101416, "learning_rate": 6.598309641808809e-06, "loss": 1.451501727104187, "step": 1736 }, { "epoch": 1.3720379146919433, "grad_norm": 10.871654496984037, "learning_rate": 6.593954434657047e-06, "loss": 1.3775560855865479, "step": 1737 }, { "epoch": 1.3728278041074249, "grad_norm": 14.826876502837509, "learning_rate": 6.5895978809273295e-06, "loss": 1.1092920303344727, "step": 1738 }, { "epoch": 1.3736176935229067, "grad_norm": 9.937827702763318, "learning_rate": 6.585239984300088e-06, "loss": 1.1688158512115479, "step": 1739 }, { "epoch": 1.3744075829383886, "grad_norm": 10.154954545278624, "learning_rate": 6.580880748456888e-06, "loss": 1.9065393209457397, "step": 1740 }, { "epoch": 1.3751974723538705, "grad_norm": 20.375541326941555, "learning_rate": 6.576520177080425e-06, "loss": 3.503018379211426, "step": 1741 }, { "epoch": 1.3759873617693523, "grad_norm": 15.460278049176381, "learning_rate": 6.572158273854527e-06, "loss": 1.5270636081695557, "step": 1742 }, { "epoch": 1.3767772511848342, "grad_norm": 21.983057924875485, "learning_rate": 6.567795042464139e-06, "loss": 1.8938589096069336, "step": 1743 }, { "epoch": 1.3775671406003158, "grad_norm": 16.471995959910757, "learning_rate": 6.563430486595339e-06, "loss": 1.7760772705078125, "step": 1744 }, { "epoch": 1.3783570300157977, "grad_norm": 15.032482903239565, "learning_rate": 6.5590646099353135e-06, "loss": 1.6687235832214355, "step": 1745 }, { "epoch": 1.3791469194312795, "grad_norm": 12.844038712723703, "learning_rate": 6.554697416172373e-06, "loss": 1.5714036226272583, "step": 1746 }, { "epoch": 1.3799368088467614, "grad_norm": 17.21043619059917, "learning_rate": 6.5503289089959354e-06, "loss": 3.404308795928955, "step": 1747 }, { "epoch": 1.3807266982622433, "grad_norm": 11.175765500846673, "learning_rate": 6.5459590920965295e-06, "loss": 1.9153468608856201, "step": 1748 }, { "epoch": 1.3815165876777251, "grad_norm": 12.6196975598501, "learning_rate": 6.541587969165793e-06, "loss": 2.095952033996582, "step": 1749 }, { "epoch": 1.382306477093207, "grad_norm": 10.31639931220762, "learning_rate": 6.537215543896463e-06, "loss": 1.1625829935073853, "step": 1750 }, { "epoch": 1.3830963665086888, "grad_norm": 29.7649936622185, "learning_rate": 6.532841819982381e-06, "loss": 3.0494065284729004, "step": 1751 }, { "epoch": 1.3838862559241707, "grad_norm": 10.666741440827831, "learning_rate": 6.52846680111848e-06, "loss": 1.2341630458831787, "step": 1752 }, { "epoch": 1.3846761453396526, "grad_norm": 12.22687954037762, "learning_rate": 6.524090491000793e-06, "loss": 1.308075189590454, "step": 1753 }, { "epoch": 1.3854660347551344, "grad_norm": 32.49773976246368, "learning_rate": 6.519712893326439e-06, "loss": 1.6739758253097534, "step": 1754 }, { "epoch": 1.386255924170616, "grad_norm": 10.896992529162494, "learning_rate": 6.515334011793629e-06, "loss": 2.6935648918151855, "step": 1755 }, { "epoch": 1.387045813586098, "grad_norm": 14.152067149422212, "learning_rate": 6.510953850101658e-06, "loss": 1.4659454822540283, "step": 1756 }, { "epoch": 1.3878357030015798, "grad_norm": 9.490627971683756, "learning_rate": 6.506572411950896e-06, "loss": 1.4744107723236084, "step": 1757 }, { "epoch": 1.3886255924170616, "grad_norm": 9.178009642515413, "learning_rate": 6.5021897010428006e-06, "loss": 1.4771769046783447, "step": 1758 }, { "epoch": 1.3894154818325435, "grad_norm": 7.901450497913, "learning_rate": 6.4978057210798995e-06, "loss": 1.5277764797210693, "step": 1759 }, { "epoch": 1.3902053712480253, "grad_norm": 16.24677122520427, "learning_rate": 6.4934204757657925e-06, "loss": 1.8329733610153198, "step": 1760 }, { "epoch": 1.390995260663507, "grad_norm": 17.706742868384215, "learning_rate": 6.489033968805149e-06, "loss": 1.9825087785720825, "step": 1761 }, { "epoch": 1.3917851500789888, "grad_norm": 15.685673090253694, "learning_rate": 6.484646203903706e-06, "loss": 1.2657302618026733, "step": 1762 }, { "epoch": 1.3925750394944707, "grad_norm": 10.790748538927113, "learning_rate": 6.4802571847682616e-06, "loss": 3.029433250427246, "step": 1763 }, { "epoch": 1.3933649289099526, "grad_norm": 9.163423388663407, "learning_rate": 6.4758669151066724e-06, "loss": 1.4058465957641602, "step": 1764 }, { "epoch": 1.3941548183254344, "grad_norm": 10.067549014086303, "learning_rate": 6.4714753986278535e-06, "loss": 1.397748589515686, "step": 1765 }, { "epoch": 1.3949447077409163, "grad_norm": 13.05855432795011, "learning_rate": 6.467082639041772e-06, "loss": 1.4465043544769287, "step": 1766 }, { "epoch": 1.3957345971563981, "grad_norm": 14.849808480124686, "learning_rate": 6.462688640059446e-06, "loss": 1.5454909801483154, "step": 1767 }, { "epoch": 1.39652448657188, "grad_norm": 11.702650248241305, "learning_rate": 6.4582934053929405e-06, "loss": 1.1027021408081055, "step": 1768 }, { "epoch": 1.3973143759873619, "grad_norm": 7.374612887912086, "learning_rate": 6.453896938755362e-06, "loss": 1.7206413745880127, "step": 1769 }, { "epoch": 1.3981042654028437, "grad_norm": 13.105128531972968, "learning_rate": 6.449499243860865e-06, "loss": 2.0842576026916504, "step": 1770 }, { "epoch": 1.3988941548183254, "grad_norm": 16.342442684958176, "learning_rate": 6.445100324424631e-06, "loss": 1.9844783544540405, "step": 1771 }, { "epoch": 1.3996840442338072, "grad_norm": 7.941412914747672, "learning_rate": 6.440700184162887e-06, "loss": 1.3604265451431274, "step": 1772 }, { "epoch": 1.400473933649289, "grad_norm": 7.665007430651305, "learning_rate": 6.436298826792882e-06, "loss": 0.7555409669876099, "step": 1773 }, { "epoch": 1.401263823064771, "grad_norm": 9.961027255210812, "learning_rate": 6.431896256032896e-06, "loss": 1.5438669919967651, "step": 1774 }, { "epoch": 1.4020537124802528, "grad_norm": 12.80133317623476, "learning_rate": 6.427492475602242e-06, "loss": 1.6424999237060547, "step": 1775 }, { "epoch": 1.4028436018957346, "grad_norm": 11.021716530624692, "learning_rate": 6.423087489221241e-06, "loss": 1.6980810165405273, "step": 1776 }, { "epoch": 1.4036334913112165, "grad_norm": 20.704996350287434, "learning_rate": 6.418681300611244e-06, "loss": 1.92954421043396, "step": 1777 }, { "epoch": 1.4044233807266981, "grad_norm": 12.682678877921038, "learning_rate": 6.414273913494612e-06, "loss": 0.9070745706558228, "step": 1778 }, { "epoch": 1.40521327014218, "grad_norm": 12.513930307508547, "learning_rate": 6.409865331594721e-06, "loss": 1.1177334785461426, "step": 1779 }, { "epoch": 1.4060031595576619, "grad_norm": 19.948578372309566, "learning_rate": 6.4054555586359556e-06, "loss": 1.5609198808670044, "step": 1780 }, { "epoch": 1.4067930489731437, "grad_norm": 10.304880695036104, "learning_rate": 6.401044598343705e-06, "loss": 1.9408183097839355, "step": 1781 }, { "epoch": 1.4075829383886256, "grad_norm": 11.485897348203851, "learning_rate": 6.3966324544443646e-06, "loss": 2.1067936420440674, "step": 1782 }, { "epoch": 1.4083728278041074, "grad_norm": 12.36441904247693, "learning_rate": 6.392219130665328e-06, "loss": 1.3297131061553955, "step": 1783 }, { "epoch": 1.4091627172195893, "grad_norm": 11.454598254103093, "learning_rate": 6.387804630734985e-06, "loss": 1.3817702531814575, "step": 1784 }, { "epoch": 1.4099526066350712, "grad_norm": 11.226000917241246, "learning_rate": 6.383388958382719e-06, "loss": 2.0304312705993652, "step": 1785 }, { "epoch": 1.410742496050553, "grad_norm": 15.283704119726693, "learning_rate": 6.378972117338908e-06, "loss": 1.2152456045150757, "step": 1786 }, { "epoch": 1.4115323854660349, "grad_norm": 14.282288242101943, "learning_rate": 6.374554111334908e-06, "loss": 2.027944564819336, "step": 1787 }, { "epoch": 1.4123222748815165, "grad_norm": 19.074788092743933, "learning_rate": 6.37013494410307e-06, "loss": 1.8768101930618286, "step": 1788 }, { "epoch": 1.4131121642969984, "grad_norm": 27.405083978695842, "learning_rate": 6.365714619376722e-06, "loss": 1.921675205230713, "step": 1789 }, { "epoch": 1.4139020537124802, "grad_norm": 12.762740990846117, "learning_rate": 6.361293140890161e-06, "loss": 1.2969154119491577, "step": 1790 }, { "epoch": 1.414691943127962, "grad_norm": 13.968926857294704, "learning_rate": 6.356870512378675e-06, "loss": 1.6537883281707764, "step": 1791 }, { "epoch": 1.415481832543444, "grad_norm": 10.371222981346492, "learning_rate": 6.3524467375785125e-06, "loss": 2.2494006156921387, "step": 1792 }, { "epoch": 1.4162717219589258, "grad_norm": 7.840468212923819, "learning_rate": 6.348021820226891e-06, "loss": 1.4510530233383179, "step": 1793 }, { "epoch": 1.4170616113744074, "grad_norm": 16.008361511520068, "learning_rate": 6.343595764061999e-06, "loss": 2.526327133178711, "step": 1794 }, { "epoch": 1.4178515007898893, "grad_norm": 7.9497180354538415, "learning_rate": 6.339168572822978e-06, "loss": 1.4724477529525757, "step": 1795 }, { "epoch": 1.4186413902053712, "grad_norm": 6.971397200044914, "learning_rate": 6.334740250249938e-06, "loss": 1.9278626441955566, "step": 1796 }, { "epoch": 1.419431279620853, "grad_norm": 14.711227502395163, "learning_rate": 6.33031080008394e-06, "loss": 1.0732061862945557, "step": 1797 }, { "epoch": 1.4202211690363349, "grad_norm": 8.10829033173858, "learning_rate": 6.325880226066997e-06, "loss": 1.8444054126739502, "step": 1798 }, { "epoch": 1.4210110584518167, "grad_norm": 8.220785031600306, "learning_rate": 6.321448531942072e-06, "loss": 1.180464267730713, "step": 1799 }, { "epoch": 1.4218009478672986, "grad_norm": 7.13121507626065, "learning_rate": 6.317015721453077e-06, "loss": 1.8239995241165161, "step": 1800 }, { "epoch": 1.4225908372827805, "grad_norm": 10.075780879909827, "learning_rate": 6.3125817983448624e-06, "loss": 1.9934985637664795, "step": 1801 }, { "epoch": 1.4233807266982623, "grad_norm": 11.805782298666543, "learning_rate": 6.308146766363225e-06, "loss": 1.1996102333068848, "step": 1802 }, { "epoch": 1.4241706161137442, "grad_norm": 14.758482012061515, "learning_rate": 6.303710629254889e-06, "loss": 1.6951093673706055, "step": 1803 }, { "epoch": 1.424960505529226, "grad_norm": 6.415227648148693, "learning_rate": 6.29927339076752e-06, "loss": 1.2805756330490112, "step": 1804 }, { "epoch": 1.4257503949447077, "grad_norm": 16.190811963050376, "learning_rate": 6.294835054649714e-06, "loss": 1.6054199934005737, "step": 1805 }, { "epoch": 1.4265402843601895, "grad_norm": 19.35675670483708, "learning_rate": 6.290395624650988e-06, "loss": 2.000746726989746, "step": 1806 }, { "epoch": 1.4273301737756714, "grad_norm": 10.953221955867015, "learning_rate": 6.28595510452179e-06, "loss": 1.7414836883544922, "step": 1807 }, { "epoch": 1.4281200631911533, "grad_norm": 14.62887358716854, "learning_rate": 6.281513498013485e-06, "loss": 1.2090952396392822, "step": 1808 }, { "epoch": 1.4289099526066351, "grad_norm": 8.841041489194227, "learning_rate": 6.277070808878356e-06, "loss": 1.1835849285125732, "step": 1809 }, { "epoch": 1.429699842022117, "grad_norm": 9.037929728341096, "learning_rate": 6.2726270408696035e-06, "loss": 1.8852903842926025, "step": 1810 }, { "epoch": 1.4304897314375986, "grad_norm": 14.839212546666879, "learning_rate": 6.268182197741336e-06, "loss": 2.071831703186035, "step": 1811 }, { "epoch": 1.4312796208530805, "grad_norm": 12.37353569458125, "learning_rate": 6.263736283248571e-06, "loss": 1.7889072895050049, "step": 1812 }, { "epoch": 1.4320695102685623, "grad_norm": 14.308437007860489, "learning_rate": 6.259289301147233e-06, "loss": 2.109577178955078, "step": 1813 }, { "epoch": 1.4328593996840442, "grad_norm": 10.588829861541612, "learning_rate": 6.254841255194148e-06, "loss": 0.9634921550750732, "step": 1814 }, { "epoch": 1.433649289099526, "grad_norm": 8.269928453685496, "learning_rate": 6.250392149147035e-06, "loss": 1.1673877239227295, "step": 1815 }, { "epoch": 1.434439178515008, "grad_norm": 12.167224980723917, "learning_rate": 6.2459419867645195e-06, "loss": 1.7496429681777954, "step": 1816 }, { "epoch": 1.4352290679304898, "grad_norm": 11.74353184566792, "learning_rate": 6.241490771806111e-06, "loss": 1.6411113739013672, "step": 1817 }, { "epoch": 1.4360189573459716, "grad_norm": 10.038663586636575, "learning_rate": 6.2370385080322085e-06, "loss": 1.6785610914230347, "step": 1818 }, { "epoch": 1.4368088467614535, "grad_norm": 12.705482849240397, "learning_rate": 6.232585199204102e-06, "loss": 1.4933853149414062, "step": 1819 }, { "epoch": 1.4375987361769353, "grad_norm": 8.797279147148013, "learning_rate": 6.22813084908396e-06, "loss": 1.2085703611373901, "step": 1820 }, { "epoch": 1.438388625592417, "grad_norm": 9.13523458275769, "learning_rate": 6.223675461434831e-06, "loss": 1.3428914546966553, "step": 1821 }, { "epoch": 1.4391785150078988, "grad_norm": 7.848994958997547, "learning_rate": 6.219219040020646e-06, "loss": 1.0880684852600098, "step": 1822 }, { "epoch": 1.4399684044233807, "grad_norm": 13.565272501706762, "learning_rate": 6.214761588606199e-06, "loss": 2.37025785446167, "step": 1823 }, { "epoch": 1.4407582938388626, "grad_norm": 11.496245700277813, "learning_rate": 6.210303110957161e-06, "loss": 1.0500859022140503, "step": 1824 }, { "epoch": 1.4415481832543444, "grad_norm": 11.699195594543191, "learning_rate": 6.205843610840071e-06, "loss": 1.364439606666565, "step": 1825 }, { "epoch": 1.4423380726698263, "grad_norm": 12.92363516840581, "learning_rate": 6.201383092022326e-06, "loss": 1.575169324874878, "step": 1826 }, { "epoch": 1.4431279620853081, "grad_norm": 9.636126549348427, "learning_rate": 6.196921558272191e-06, "loss": 1.2875540256500244, "step": 1827 }, { "epoch": 1.4439178515007898, "grad_norm": 25.404058624847497, "learning_rate": 6.19245901335878e-06, "loss": 1.948940634727478, "step": 1828 }, { "epoch": 1.4447077409162716, "grad_norm": 16.28182362055586, "learning_rate": 6.187995461052067e-06, "loss": 1.686116337776184, "step": 1829 }, { "epoch": 1.4454976303317535, "grad_norm": 12.142680163856589, "learning_rate": 6.183530905122881e-06, "loss": 1.513108253479004, "step": 1830 }, { "epoch": 1.4462875197472354, "grad_norm": 9.796255845103145, "learning_rate": 6.179065349342885e-06, "loss": 1.6807351112365723, "step": 1831 }, { "epoch": 1.4470774091627172, "grad_norm": 14.771373088976882, "learning_rate": 6.1745987974846e-06, "loss": 1.763586163520813, "step": 1832 }, { "epoch": 1.447867298578199, "grad_norm": 11.746125463949197, "learning_rate": 6.170131253321384e-06, "loss": 1.5122478008270264, "step": 1833 }, { "epoch": 1.448657187993681, "grad_norm": 10.693788951485445, "learning_rate": 6.16566272062743e-06, "loss": 2.1962921619415283, "step": 1834 }, { "epoch": 1.4494470774091628, "grad_norm": 12.44749625950401, "learning_rate": 6.161193203177773e-06, "loss": 1.9236458539962769, "step": 1835 }, { "epoch": 1.4502369668246446, "grad_norm": 17.61123152678093, "learning_rate": 6.156722704748273e-06, "loss": 1.6482089757919312, "step": 1836 }, { "epoch": 1.4510268562401265, "grad_norm": 7.824657050413297, "learning_rate": 6.152251229115625e-06, "loss": 1.2396411895751953, "step": 1837 }, { "epoch": 1.4518167456556081, "grad_norm": 12.100352057209935, "learning_rate": 6.147778780057342e-06, "loss": 1.445483922958374, "step": 1838 }, { "epoch": 1.45260663507109, "grad_norm": 10.176922246717954, "learning_rate": 6.143305361351766e-06, "loss": 2.227597713470459, "step": 1839 }, { "epoch": 1.4533965244865719, "grad_norm": 23.08434863955186, "learning_rate": 6.1388309767780575e-06, "loss": 2.184255361557007, "step": 1840 }, { "epoch": 1.4541864139020537, "grad_norm": 15.247266132776515, "learning_rate": 6.134355630116189e-06, "loss": 1.357899785041809, "step": 1841 }, { "epoch": 1.4549763033175356, "grad_norm": 7.690736401984131, "learning_rate": 6.129879325146948e-06, "loss": 1.3187198638916016, "step": 1842 }, { "epoch": 1.4557661927330174, "grad_norm": 17.084504147813014, "learning_rate": 6.125402065651933e-06, "loss": 1.1403226852416992, "step": 1843 }, { "epoch": 1.456556082148499, "grad_norm": 10.647716875554703, "learning_rate": 6.120923855413546e-06, "loss": 1.5669901371002197, "step": 1844 }, { "epoch": 1.457345971563981, "grad_norm": 17.169042345137168, "learning_rate": 6.116444698214996e-06, "loss": 1.8641374111175537, "step": 1845 }, { "epoch": 1.4581358609794628, "grad_norm": 10.230215295961285, "learning_rate": 6.111964597840288e-06, "loss": 1.8520389795303345, "step": 1846 }, { "epoch": 1.4589257503949447, "grad_norm": 14.086738280821706, "learning_rate": 6.1074835580742274e-06, "loss": 1.135934829711914, "step": 1847 }, { "epoch": 1.4597156398104265, "grad_norm": 20.40220132997962, "learning_rate": 6.103001582702408e-06, "loss": 1.4812136888504028, "step": 1848 }, { "epoch": 1.4605055292259084, "grad_norm": 31.44316031849772, "learning_rate": 6.098518675511221e-06, "loss": 2.650513172149658, "step": 1849 }, { "epoch": 1.4612954186413902, "grad_norm": 8.456469054005813, "learning_rate": 6.094034840287838e-06, "loss": 1.8254547119140625, "step": 1850 }, { "epoch": 1.462085308056872, "grad_norm": 8.984175379105094, "learning_rate": 6.08955008082022e-06, "loss": 1.96278715133667, "step": 1851 }, { "epoch": 1.462875197472354, "grad_norm": 17.024546561160296, "learning_rate": 6.085064400897106e-06, "loss": 1.2698783874511719, "step": 1852 }, { "epoch": 1.4636650868878358, "grad_norm": 9.736224700781001, "learning_rate": 6.080577804308012e-06, "loss": 1.6380083560943604, "step": 1853 }, { "epoch": 1.4644549763033177, "grad_norm": 20.83200309530118, "learning_rate": 6.076090294843233e-06, "loss": 1.6041996479034424, "step": 1854 }, { "epoch": 1.4652448657187993, "grad_norm": 10.731218316042554, "learning_rate": 6.0716018762938265e-06, "loss": 1.3004403114318848, "step": 1855 }, { "epoch": 1.4660347551342812, "grad_norm": 12.047943285394913, "learning_rate": 6.067112552451628e-06, "loss": 1.7745938301086426, "step": 1856 }, { "epoch": 1.466824644549763, "grad_norm": 11.575538627172165, "learning_rate": 6.062622327109231e-06, "loss": 1.8333407640457153, "step": 1857 }, { "epoch": 1.4676145339652449, "grad_norm": 8.54961227395821, "learning_rate": 6.0581312040599926e-06, "loss": 1.6345336437225342, "step": 1858 }, { "epoch": 1.4684044233807267, "grad_norm": 15.409163973089639, "learning_rate": 6.053639187098028e-06, "loss": 0.9887954592704773, "step": 1859 }, { "epoch": 1.4691943127962086, "grad_norm": 12.434754832094152, "learning_rate": 6.04914628001821e-06, "loss": 1.7964859008789062, "step": 1860 }, { "epoch": 1.4699842022116902, "grad_norm": 30.51669618244978, "learning_rate": 6.044652486616159e-06, "loss": 2.461520195007324, "step": 1861 }, { "epoch": 1.470774091627172, "grad_norm": 10.994064510333782, "learning_rate": 6.040157810688245e-06, "loss": 1.86288583278656, "step": 1862 }, { "epoch": 1.471563981042654, "grad_norm": 9.30779783354174, "learning_rate": 6.035662256031592e-06, "loss": 1.46977698802948, "step": 1863 }, { "epoch": 1.4723538704581358, "grad_norm": 7.934486944796402, "learning_rate": 6.03116582644405e-06, "loss": 1.5796260833740234, "step": 1864 }, { "epoch": 1.4731437598736177, "grad_norm": 14.423496800164589, "learning_rate": 6.026668525724226e-06, "loss": 1.8589414358139038, "step": 1865 }, { "epoch": 1.4739336492890995, "grad_norm": 10.939581596140304, "learning_rate": 6.022170357671448e-06, "loss": 2.25348162651062, "step": 1866 }, { "epoch": 1.4747235387045814, "grad_norm": 18.444767247036655, "learning_rate": 6.017671326085787e-06, "loss": 2.1810455322265625, "step": 1867 }, { "epoch": 1.4755134281200633, "grad_norm": 6.294704216554538, "learning_rate": 6.013171434768039e-06, "loss": 1.2740647792816162, "step": 1868 }, { "epoch": 1.4763033175355451, "grad_norm": 11.258105727454375, "learning_rate": 6.008670687519726e-06, "loss": 1.537172555923462, "step": 1869 }, { "epoch": 1.477093206951027, "grad_norm": 8.495483927169012, "learning_rate": 6.004169088143093e-06, "loss": 1.5573079586029053, "step": 1870 }, { "epoch": 1.4778830963665086, "grad_norm": 15.784635016949512, "learning_rate": 5.99966664044111e-06, "loss": 1.1430606842041016, "step": 1871 }, { "epoch": 1.4786729857819905, "grad_norm": 10.067351907649972, "learning_rate": 5.9951633482174565e-06, "loss": 1.8512628078460693, "step": 1872 }, { "epoch": 1.4794628751974723, "grad_norm": 12.439277630875722, "learning_rate": 5.99065921527653e-06, "loss": 1.0742204189300537, "step": 1873 }, { "epoch": 1.4802527646129542, "grad_norm": 11.790578305048301, "learning_rate": 5.986154245423435e-06, "loss": 2.0604054927825928, "step": 1874 }, { "epoch": 1.481042654028436, "grad_norm": 9.886128974159105, "learning_rate": 5.981648442463987e-06, "loss": 1.2165788412094116, "step": 1875 }, { "epoch": 1.481832543443918, "grad_norm": 11.693448495370616, "learning_rate": 5.977141810204702e-06, "loss": 1.2570345401763916, "step": 1876 }, { "epoch": 1.4826224328593998, "grad_norm": 11.392265499751487, "learning_rate": 5.972634352452797e-06, "loss": 1.3734052181243896, "step": 1877 }, { "epoch": 1.4834123222748814, "grad_norm": 9.935897727227923, "learning_rate": 5.968126073016188e-06, "loss": 1.536318302154541, "step": 1878 }, { "epoch": 1.4842022116903633, "grad_norm": 13.37009015419332, "learning_rate": 5.963616975703488e-06, "loss": 1.6027817726135254, "step": 1879 }, { "epoch": 1.4849921011058451, "grad_norm": 13.54732672385344, "learning_rate": 5.95910706432399e-06, "loss": 1.4695227146148682, "step": 1880 }, { "epoch": 1.485781990521327, "grad_norm": 10.426563375284957, "learning_rate": 5.954596342687686e-06, "loss": 1.2945826053619385, "step": 1881 }, { "epoch": 1.4865718799368088, "grad_norm": 12.085174713834514, "learning_rate": 5.950084814605252e-06, "loss": 1.690997838973999, "step": 1882 }, { "epoch": 1.4873617693522907, "grad_norm": 14.941420232595867, "learning_rate": 5.945572483888033e-06, "loss": 1.3554736375808716, "step": 1883 }, { "epoch": 1.4881516587677726, "grad_norm": 12.228142172469568, "learning_rate": 5.94105935434807e-06, "loss": 1.512892723083496, "step": 1884 }, { "epoch": 1.4889415481832544, "grad_norm": 11.913640083855395, "learning_rate": 5.936545429798062e-06, "loss": 0.7298011779785156, "step": 1885 }, { "epoch": 1.4897314375987363, "grad_norm": 15.962737514375329, "learning_rate": 5.932030714051392e-06, "loss": 1.493302583694458, "step": 1886 }, { "epoch": 1.4905213270142181, "grad_norm": 9.775040361342926, "learning_rate": 5.927515210922107e-06, "loss": 2.2897167205810547, "step": 1887 }, { "epoch": 1.4913112164296998, "grad_norm": 16.24056817956605, "learning_rate": 5.922998924224917e-06, "loss": 1.8842390775680542, "step": 1888 }, { "epoch": 1.4921011058451816, "grad_norm": 10.431504207313186, "learning_rate": 5.918481857775196e-06, "loss": 1.5306816101074219, "step": 1889 }, { "epoch": 1.4928909952606635, "grad_norm": 10.932057272276792, "learning_rate": 5.913964015388976e-06, "loss": 1.5974483489990234, "step": 1890 }, { "epoch": 1.4936808846761453, "grad_norm": 16.338433560086507, "learning_rate": 5.909445400882948e-06, "loss": 0.9032529592514038, "step": 1891 }, { "epoch": 1.4944707740916272, "grad_norm": 16.083461588553835, "learning_rate": 5.904926018074448e-06, "loss": 2.5330991744995117, "step": 1892 }, { "epoch": 1.495260663507109, "grad_norm": 7.719604767773579, "learning_rate": 5.9004058707814715e-06, "loss": 1.676531434059143, "step": 1893 }, { "epoch": 1.4960505529225907, "grad_norm": 7.901640170457522, "learning_rate": 5.895884962822648e-06, "loss": 1.7386832237243652, "step": 1894 }, { "epoch": 1.4968404423380726, "grad_norm": 9.168608646448185, "learning_rate": 5.891363298017259e-06, "loss": 1.2910975217819214, "step": 1895 }, { "epoch": 1.4976303317535544, "grad_norm": 23.252379383869613, "learning_rate": 5.886840880185221e-06, "loss": 1.953572154045105, "step": 1896 }, { "epoch": 1.4984202211690363, "grad_norm": 14.861072270306032, "learning_rate": 5.8823177131470845e-06, "loss": 1.128541350364685, "step": 1897 }, { "epoch": 1.4992101105845181, "grad_norm": 11.269382836461835, "learning_rate": 5.877793800724041e-06, "loss": 1.7088985443115234, "step": 1898 }, { "epoch": 1.5, "grad_norm": 10.533224054579982, "learning_rate": 5.873269146737901e-06, "loss": 2.03849458694458, "step": 1899 }, { "epoch": 1.5007898894154819, "grad_norm": 10.821862530515162, "learning_rate": 5.868743755011113e-06, "loss": 0.8391838669776917, "step": 1900 }, { "epoch": 1.5015797788309637, "grad_norm": 12.815338614755259, "learning_rate": 5.86421762936674e-06, "loss": 2.290050745010376, "step": 1901 }, { "epoch": 1.5023696682464456, "grad_norm": 6.75820438876223, "learning_rate": 5.859690773628466e-06, "loss": 1.656872034072876, "step": 1902 }, { "epoch": 1.5031595576619274, "grad_norm": 13.057735640244541, "learning_rate": 5.855163191620597e-06, "loss": 1.4379336833953857, "step": 1903 }, { "epoch": 1.5039494470774093, "grad_norm": 8.816578457255313, "learning_rate": 5.8506348871680475e-06, "loss": 1.9750895500183105, "step": 1904 }, { "epoch": 1.5047393364928912, "grad_norm": 10.27544331263895, "learning_rate": 5.846105864096343e-06, "loss": 1.3693504333496094, "step": 1905 }, { "epoch": 1.5055292259083728, "grad_norm": 19.407143316882802, "learning_rate": 5.84157612623162e-06, "loss": 1.635138750076294, "step": 1906 }, { "epoch": 1.5063191153238547, "grad_norm": 12.029439868679129, "learning_rate": 5.837045677400613e-06, "loss": 0.5531861782073975, "step": 1907 }, { "epoch": 1.5071090047393365, "grad_norm": 14.005078207323043, "learning_rate": 5.832514521430661e-06, "loss": 1.1493902206420898, "step": 1908 }, { "epoch": 1.5078988941548184, "grad_norm": 6.882826860067384, "learning_rate": 5.827982662149703e-06, "loss": 1.7186492681503296, "step": 1909 }, { "epoch": 1.5086887835703, "grad_norm": 9.057187685271941, "learning_rate": 5.8234501033862624e-06, "loss": 2.1788861751556396, "step": 1910 }, { "epoch": 1.5094786729857819, "grad_norm": 11.664865065310822, "learning_rate": 5.818916848969463e-06, "loss": 2.0605411529541016, "step": 1911 }, { "epoch": 1.5102685624012637, "grad_norm": 8.847275772967832, "learning_rate": 5.814382902729015e-06, "loss": 1.4167741537094116, "step": 1912 }, { "epoch": 1.5110584518167456, "grad_norm": 9.964601394953007, "learning_rate": 5.809848268495206e-06, "loss": 0.9648761749267578, "step": 1913 }, { "epoch": 1.5118483412322274, "grad_norm": 18.140123517520358, "learning_rate": 5.8053129500989156e-06, "loss": 1.2075505256652832, "step": 1914 }, { "epoch": 1.5126382306477093, "grad_norm": 14.956061178193, "learning_rate": 5.80077695137159e-06, "loss": 1.8038408756256104, "step": 1915 }, { "epoch": 1.5134281200631912, "grad_norm": 8.549470794553304, "learning_rate": 5.7962402761452616e-06, "loss": 1.2158410549163818, "step": 1916 }, { "epoch": 1.514218009478673, "grad_norm": 10.833339245641687, "learning_rate": 5.791702928252525e-06, "loss": 0.7378091812133789, "step": 1917 }, { "epoch": 1.5150078988941549, "grad_norm": 9.732584946999157, "learning_rate": 5.7871649115265484e-06, "loss": 1.1355817317962646, "step": 1918 }, { "epoch": 1.5157977883096367, "grad_norm": 15.80570500150481, "learning_rate": 5.782626229801062e-06, "loss": 1.5603950023651123, "step": 1919 }, { "epoch": 1.5165876777251186, "grad_norm": 12.04096957012893, "learning_rate": 5.778086886910359e-06, "loss": 1.6280852556228638, "step": 1920 }, { "epoch": 1.5173775671406005, "grad_norm": 17.757331266774536, "learning_rate": 5.773546886689292e-06, "loss": 1.6459561586380005, "step": 1921 }, { "epoch": 1.518167456556082, "grad_norm": 10.980216863583156, "learning_rate": 5.769006232973266e-06, "loss": 1.7304844856262207, "step": 1922 }, { "epoch": 1.518957345971564, "grad_norm": 8.64298769734182, "learning_rate": 5.764464929598246e-06, "loss": 1.9379894733428955, "step": 1923 }, { "epoch": 1.5197472353870458, "grad_norm": 8.969841480124796, "learning_rate": 5.759922980400734e-06, "loss": 1.1220753192901611, "step": 1924 }, { "epoch": 1.5205371248025277, "grad_norm": 14.709009423534145, "learning_rate": 5.755380389217785e-06, "loss": 1.1699135303497314, "step": 1925 }, { "epoch": 1.5213270142180095, "grad_norm": 9.204182201289338, "learning_rate": 5.750837159886996e-06, "loss": 1.9453136920928955, "step": 1926 }, { "epoch": 1.5221169036334912, "grad_norm": 20.29037732104967, "learning_rate": 5.746293296246502e-06, "loss": 1.1104214191436768, "step": 1927 }, { "epoch": 1.522906793048973, "grad_norm": 9.963584395362918, "learning_rate": 5.741748802134976e-06, "loss": 1.0753260850906372, "step": 1928 }, { "epoch": 1.5236966824644549, "grad_norm": 10.194943039916806, "learning_rate": 5.7372036813916155e-06, "loss": 1.6703574657440186, "step": 1929 }, { "epoch": 1.5244865718799367, "grad_norm": 12.899448039370167, "learning_rate": 5.732657937856158e-06, "loss": 1.3306403160095215, "step": 1930 }, { "epoch": 1.5252764612954186, "grad_norm": 9.761711644822508, "learning_rate": 5.728111575368865e-06, "loss": 1.5857300758361816, "step": 1931 }, { "epoch": 1.5260663507109005, "grad_norm": 10.629942354032634, "learning_rate": 5.723564597770514e-06, "loss": 1.4648703336715698, "step": 1932 }, { "epoch": 1.5268562401263823, "grad_norm": 13.432843847452732, "learning_rate": 5.719017008902407e-06, "loss": 1.4944384098052979, "step": 1933 }, { "epoch": 1.5276461295418642, "grad_norm": 11.813071490114952, "learning_rate": 5.714468812606364e-06, "loss": 1.323237657546997, "step": 1934 }, { "epoch": 1.528436018957346, "grad_norm": 8.562112811115774, "learning_rate": 5.709920012724716e-06, "loss": 2.0364575386047363, "step": 1935 }, { "epoch": 1.529225908372828, "grad_norm": 13.067657788966802, "learning_rate": 5.705370613100303e-06, "loss": 1.4062690734863281, "step": 1936 }, { "epoch": 1.5300157977883098, "grad_norm": 13.707958453722402, "learning_rate": 5.700820617576472e-06, "loss": 2.091384172439575, "step": 1937 }, { "epoch": 1.5308056872037916, "grad_norm": 10.413369263252646, "learning_rate": 5.696270029997078e-06, "loss": 1.5702612400054932, "step": 1938 }, { "epoch": 1.5315955766192733, "grad_norm": 7.882076727278544, "learning_rate": 5.691718854206469e-06, "loss": 1.8636072874069214, "step": 1939 }, { "epoch": 1.5323854660347551, "grad_norm": 9.17078361317583, "learning_rate": 5.687167094049493e-06, "loss": 2.772977828979492, "step": 1940 }, { "epoch": 1.533175355450237, "grad_norm": 15.116676322563023, "learning_rate": 5.682614753371493e-06, "loss": 1.5175914764404297, "step": 1941 }, { "epoch": 1.5339652448657188, "grad_norm": 7.844046810826043, "learning_rate": 5.678061836018303e-06, "loss": 2.061984062194824, "step": 1942 }, { "epoch": 1.5347551342812005, "grad_norm": 10.701078465755538, "learning_rate": 5.673508345836239e-06, "loss": 1.6619548797607422, "step": 1943 }, { "epoch": 1.5355450236966823, "grad_norm": 7.891225505862987, "learning_rate": 5.6689542866721095e-06, "loss": 1.1752052307128906, "step": 1944 }, { "epoch": 1.5363349131121642, "grad_norm": 6.304249509659231, "learning_rate": 5.664399662373192e-06, "loss": 0.8437387943267822, "step": 1945 }, { "epoch": 1.537124802527646, "grad_norm": 17.603728317953017, "learning_rate": 5.659844476787255e-06, "loss": 1.1868000030517578, "step": 1946 }, { "epoch": 1.537914691943128, "grad_norm": 66.47257484493905, "learning_rate": 5.655288733762531e-06, "loss": 2.8787412643432617, "step": 1947 }, { "epoch": 1.5387045813586098, "grad_norm": 8.360497076634983, "learning_rate": 5.650732437147725e-06, "loss": 1.4284359216690063, "step": 1948 }, { "epoch": 1.5394944707740916, "grad_norm": 16.849166837845633, "learning_rate": 5.646175590792015e-06, "loss": 1.8208255767822266, "step": 1949 }, { "epoch": 1.5402843601895735, "grad_norm": 15.853584711578053, "learning_rate": 5.6416181985450365e-06, "loss": 1.4639555215835571, "step": 1950 }, { "epoch": 1.5410742496050553, "grad_norm": 11.418310736533611, "learning_rate": 5.637060264256893e-06, "loss": 1.445953369140625, "step": 1951 }, { "epoch": 1.5418641390205372, "grad_norm": 10.03242971159572, "learning_rate": 5.632501791778139e-06, "loss": 1.7128900289535522, "step": 1952 }, { "epoch": 1.542654028436019, "grad_norm": 11.985633656861989, "learning_rate": 5.6279427849597876e-06, "loss": 1.3158780336380005, "step": 1953 }, { "epoch": 1.543443917851501, "grad_norm": 13.823142105370444, "learning_rate": 5.623383247653306e-06, "loss": 1.426164150238037, "step": 1954 }, { "epoch": 1.5442338072669828, "grad_norm": 12.41483138077164, "learning_rate": 5.6188231837106024e-06, "loss": 1.4300283193588257, "step": 1955 }, { "epoch": 1.5450236966824644, "grad_norm": 12.100433098038376, "learning_rate": 5.6142625969840355e-06, "loss": 1.334028720855713, "step": 1956 }, { "epoch": 1.5458135860979463, "grad_norm": 7.826204795219399, "learning_rate": 5.6097014913264036e-06, "loss": 1.2844315767288208, "step": 1957 }, { "epoch": 1.5466034755134281, "grad_norm": 10.029697199051965, "learning_rate": 5.605139870590945e-06, "loss": 1.7111456394195557, "step": 1958 }, { "epoch": 1.54739336492891, "grad_norm": 23.257849538913792, "learning_rate": 5.600577738631331e-06, "loss": 1.3320598602294922, "step": 1959 }, { "epoch": 1.5481832543443916, "grad_norm": 28.133569384076022, "learning_rate": 5.596015099301665e-06, "loss": 1.7531509399414062, "step": 1960 }, { "epoch": 1.5489731437598735, "grad_norm": 7.595217526983019, "learning_rate": 5.591451956456482e-06, "loss": 1.5534119606018066, "step": 1961 }, { "epoch": 1.5497630331753554, "grad_norm": 14.700635546212805, "learning_rate": 5.586888313950737e-06, "loss": 0.8609148263931274, "step": 1962 }, { "epoch": 1.5505529225908372, "grad_norm": 11.364286626899796, "learning_rate": 5.5823241756398115e-06, "loss": 1.637607216835022, "step": 1963 }, { "epoch": 1.551342812006319, "grad_norm": 9.425435127940757, "learning_rate": 5.577759545379507e-06, "loss": 1.3392387628555298, "step": 1964 }, { "epoch": 1.552132701421801, "grad_norm": 13.850940959168309, "learning_rate": 5.573194427026034e-06, "loss": 1.3945591449737549, "step": 1965 }, { "epoch": 1.5529225908372828, "grad_norm": 11.948388622145545, "learning_rate": 5.568628824436022e-06, "loss": 1.3258531093597412, "step": 1966 }, { "epoch": 1.5537124802527646, "grad_norm": 12.964726484077811, "learning_rate": 5.564062741466506e-06, "loss": 1.6788570880889893, "step": 1967 }, { "epoch": 1.5545023696682465, "grad_norm": 14.308086462658784, "learning_rate": 5.559496181974929e-06, "loss": 1.5159149169921875, "step": 1968 }, { "epoch": 1.5552922590837284, "grad_norm": 12.57208338798326, "learning_rate": 5.554929149819136e-06, "loss": 1.6231142282485962, "step": 1969 }, { "epoch": 1.5560821484992102, "grad_norm": 24.3591417534666, "learning_rate": 5.550361648857369e-06, "loss": 2.435429573059082, "step": 1970 }, { "epoch": 1.556872037914692, "grad_norm": 9.389696915132824, "learning_rate": 5.545793682948269e-06, "loss": 1.346461296081543, "step": 1971 }, { "epoch": 1.5576619273301737, "grad_norm": 15.375687685358574, "learning_rate": 5.541225255950868e-06, "loss": 1.1790099143981934, "step": 1972 }, { "epoch": 1.5584518167456556, "grad_norm": 11.802119366322316, "learning_rate": 5.536656371724588e-06, "loss": 1.6181936264038086, "step": 1973 }, { "epoch": 1.5592417061611374, "grad_norm": 9.110006585026039, "learning_rate": 5.5320870341292396e-06, "loss": 1.6034982204437256, "step": 1974 }, { "epoch": 1.5600315955766193, "grad_norm": 11.636051169463032, "learning_rate": 5.527517247025012e-06, "loss": 1.4720101356506348, "step": 1975 }, { "epoch": 1.5608214849921012, "grad_norm": 11.163295280212573, "learning_rate": 5.522947014272476e-06, "loss": 1.140345573425293, "step": 1976 }, { "epoch": 1.5616113744075828, "grad_norm": 13.38890976856887, "learning_rate": 5.518376339732582e-06, "loss": 1.2083477973937988, "step": 1977 }, { "epoch": 1.5624012638230647, "grad_norm": 11.809538383883128, "learning_rate": 5.513805227266648e-06, "loss": 2.1081316471099854, "step": 1978 }, { "epoch": 1.5631911532385465, "grad_norm": 7.495208922125521, "learning_rate": 5.5092336807363655e-06, "loss": 2.039696455001831, "step": 1979 }, { "epoch": 1.5639810426540284, "grad_norm": 13.493304714233394, "learning_rate": 5.504661704003793e-06, "loss": 1.486254096031189, "step": 1980 }, { "epoch": 1.5647709320695102, "grad_norm": 7.784789213569366, "learning_rate": 5.5000893009313515e-06, "loss": 1.3860276937484741, "step": 1981 }, { "epoch": 1.565560821484992, "grad_norm": 18.85650733652106, "learning_rate": 5.495516475381822e-06, "loss": 1.6732574701309204, "step": 1982 }, { "epoch": 1.566350710900474, "grad_norm": 8.123420469888398, "learning_rate": 5.490943231218343e-06, "loss": 1.847348928451538, "step": 1983 }, { "epoch": 1.5671406003159558, "grad_norm": 16.49563314393432, "learning_rate": 5.486369572304404e-06, "loss": 1.5314483642578125, "step": 1984 }, { "epoch": 1.5679304897314377, "grad_norm": 8.487512867858714, "learning_rate": 5.48179550250385e-06, "loss": 1.5116339921951294, "step": 1985 }, { "epoch": 1.5687203791469195, "grad_norm": 6.915693492968808, "learning_rate": 5.477221025680868e-06, "loss": 1.482391119003296, "step": 1986 }, { "epoch": 1.5695102685624014, "grad_norm": 19.644776640804693, "learning_rate": 5.472646145699991e-06, "loss": 1.6486904621124268, "step": 1987 }, { "epoch": 1.5703001579778832, "grad_norm": 7.603103639854315, "learning_rate": 5.468070866426098e-06, "loss": 1.633828043937683, "step": 1988 }, { "epoch": 1.5710900473933649, "grad_norm": 13.602654260017356, "learning_rate": 5.4634951917243905e-06, "loss": 1.9716848134994507, "step": 1989 }, { "epoch": 1.5718799368088467, "grad_norm": 9.757410424603487, "learning_rate": 5.458919125460421e-06, "loss": 2.7240705490112305, "step": 1990 }, { "epoch": 1.5726698262243286, "grad_norm": 19.360862166490207, "learning_rate": 5.4543426715000644e-06, "loss": 1.697304368019104, "step": 1991 }, { "epoch": 1.5734597156398105, "grad_norm": 10.343188536941312, "learning_rate": 5.4497658337095205e-06, "loss": 1.9906163215637207, "step": 1992 }, { "epoch": 1.574249605055292, "grad_norm": 11.847212271229933, "learning_rate": 5.44518861595532e-06, "loss": 2.298971176147461, "step": 1993 }, { "epoch": 1.575039494470774, "grad_norm": 9.679704863397104, "learning_rate": 5.440611022104312e-06, "loss": 1.621870994567871, "step": 1994 }, { "epoch": 1.5758293838862558, "grad_norm": 8.36320218724247, "learning_rate": 5.43603305602366e-06, "loss": 0.9557559490203857, "step": 1995 }, { "epoch": 1.5766192733017377, "grad_norm": 9.018756709124666, "learning_rate": 5.431454721580847e-06, "loss": 1.3342235088348389, "step": 1996 }, { "epoch": 1.5774091627172195, "grad_norm": 10.023772272993021, "learning_rate": 5.426876022643665e-06, "loss": 2.0808849334716797, "step": 1997 }, { "epoch": 1.5781990521327014, "grad_norm": 11.455856990097669, "learning_rate": 5.422296963080212e-06, "loss": 1.4423177242279053, "step": 1998 }, { "epoch": 1.5789889415481833, "grad_norm": 23.049740134401613, "learning_rate": 5.417717546758895e-06, "loss": 1.558653712272644, "step": 1999 }, { "epoch": 1.5797788309636651, "grad_norm": 19.165624885196646, "learning_rate": 5.413137777548418e-06, "loss": 2.2184576988220215, "step": 2000 }, { "epoch": 1.580568720379147, "grad_norm": 11.595793426567166, "learning_rate": 5.4085576593177865e-06, "loss": 1.0131672620773315, "step": 2001 }, { "epoch": 1.5813586097946288, "grad_norm": 8.213720732018274, "learning_rate": 5.403977195936301e-06, "loss": 1.3775444030761719, "step": 2002 }, { "epoch": 1.5821484992101107, "grad_norm": 12.113284402572612, "learning_rate": 5.399396391273547e-06, "loss": 1.8444898128509521, "step": 2003 }, { "epoch": 1.5829383886255926, "grad_norm": 14.168275573138237, "learning_rate": 5.394815249199408e-06, "loss": 1.2480335235595703, "step": 2004 }, { "epoch": 1.5837282780410744, "grad_norm": 5.247577025302916, "learning_rate": 5.390233773584047e-06, "loss": 0.7935315370559692, "step": 2005 }, { "epoch": 1.584518167456556, "grad_norm": 10.201870401815132, "learning_rate": 5.385651968297907e-06, "loss": 1.4372203350067139, "step": 2006 }, { "epoch": 1.585308056872038, "grad_norm": 9.572141631234302, "learning_rate": 5.3810698372117165e-06, "loss": 1.6270627975463867, "step": 2007 }, { "epoch": 1.5860979462875198, "grad_norm": 11.97927351834067, "learning_rate": 5.37648738419647e-06, "loss": 1.5614657402038574, "step": 2008 }, { "epoch": 1.5868878357030016, "grad_norm": 8.772950923899149, "learning_rate": 5.371904613123444e-06, "loss": 1.8875480890274048, "step": 2009 }, { "epoch": 1.5876777251184833, "grad_norm": 8.245381714413037, "learning_rate": 5.367321527864175e-06, "loss": 1.6283080577850342, "step": 2010 }, { "epoch": 1.5884676145339651, "grad_norm": 7.982415790960697, "learning_rate": 5.362738132290471e-06, "loss": 1.4374988079071045, "step": 2011 }, { "epoch": 1.589257503949447, "grad_norm": 11.43633322051133, "learning_rate": 5.358154430274397e-06, "loss": 1.4911184310913086, "step": 2012 }, { "epoch": 1.5900473933649288, "grad_norm": 10.388360353070853, "learning_rate": 5.353570425688282e-06, "loss": 0.8646364212036133, "step": 2013 }, { "epoch": 1.5908372827804107, "grad_norm": 8.234236363122712, "learning_rate": 5.348986122404706e-06, "loss": 1.3800685405731201, "step": 2014 }, { "epoch": 1.5916271721958926, "grad_norm": 9.432342143567002, "learning_rate": 5.344401524296506e-06, "loss": 1.8379184007644653, "step": 2015 }, { "epoch": 1.5924170616113744, "grad_norm": 10.131723893874554, "learning_rate": 5.339816635236762e-06, "loss": 1.7298725843429565, "step": 2016 }, { "epoch": 1.5932069510268563, "grad_norm": 9.394398464778371, "learning_rate": 5.335231459098806e-06, "loss": 1.9646117687225342, "step": 2017 }, { "epoch": 1.5939968404423381, "grad_norm": 15.074944044842479, "learning_rate": 5.330645999756211e-06, "loss": 1.759244680404663, "step": 2018 }, { "epoch": 1.59478672985782, "grad_norm": 30.106091146105907, "learning_rate": 5.326060261082786e-06, "loss": 1.3692538738250732, "step": 2019 }, { "epoch": 1.5955766192733019, "grad_norm": 10.236894381216507, "learning_rate": 5.321474246952577e-06, "loss": 1.1828837394714355, "step": 2020 }, { "epoch": 1.5963665086887837, "grad_norm": 14.4756810516669, "learning_rate": 5.3168879612398684e-06, "loss": 2.2654309272766113, "step": 2021 }, { "epoch": 1.5971563981042654, "grad_norm": 12.643909473952599, "learning_rate": 5.3123014078191635e-06, "loss": 1.6730940341949463, "step": 2022 }, { "epoch": 1.5979462875197472, "grad_norm": 11.041702917394197, "learning_rate": 5.307714590565203e-06, "loss": 1.547790288925171, "step": 2023 }, { "epoch": 1.598736176935229, "grad_norm": 11.4813876902655, "learning_rate": 5.303127513352943e-06, "loss": 1.3282029628753662, "step": 2024 }, { "epoch": 1.599526066350711, "grad_norm": 17.03475421856299, "learning_rate": 5.298540180057561e-06, "loss": 1.527526617050171, "step": 2025 }, { "epoch": 1.6003159557661928, "grad_norm": 12.426584174439972, "learning_rate": 5.293952594554452e-06, "loss": 1.0609666109085083, "step": 2026 }, { "epoch": 1.6011058451816744, "grad_norm": 14.614321755378219, "learning_rate": 5.289364760719223e-06, "loss": 1.6314609050750732, "step": 2027 }, { "epoch": 1.6018957345971563, "grad_norm": 17.90827490801936, "learning_rate": 5.284776682427691e-06, "loss": 2.031951904296875, "step": 2028 }, { "epoch": 1.6026856240126381, "grad_norm": 9.299749173159547, "learning_rate": 5.280188363555881e-06, "loss": 1.3200483322143555, "step": 2029 }, { "epoch": 1.60347551342812, "grad_norm": 13.120649438444463, "learning_rate": 5.275599807980019e-06, "loss": 1.2993329763412476, "step": 2030 }, { "epoch": 1.6042654028436019, "grad_norm": 22.075534962390574, "learning_rate": 5.271011019576528e-06, "loss": 1.9799494743347168, "step": 2031 }, { "epoch": 1.6050552922590837, "grad_norm": 7.468671537450528, "learning_rate": 5.2664220022220404e-06, "loss": 1.303866982460022, "step": 2032 }, { "epoch": 1.6058451816745656, "grad_norm": 9.57368198328469, "learning_rate": 5.261832759793365e-06, "loss": 1.6507763862609863, "step": 2033 }, { "epoch": 1.6066350710900474, "grad_norm": 9.44370501166652, "learning_rate": 5.2572432961675115e-06, "loss": 1.9541301727294922, "step": 2034 }, { "epoch": 1.6074249605055293, "grad_norm": 14.242349861981912, "learning_rate": 5.252653615221677e-06, "loss": 1.2269582748413086, "step": 2035 }, { "epoch": 1.6082148499210112, "grad_norm": 8.26115422114046, "learning_rate": 5.248063720833233e-06, "loss": 1.1905943155288696, "step": 2036 }, { "epoch": 1.609004739336493, "grad_norm": 10.698374761244876, "learning_rate": 5.243473616879744e-06, "loss": 1.0884801149368286, "step": 2037 }, { "epoch": 1.6097946287519749, "grad_norm": 11.821184713135086, "learning_rate": 5.238883307238939e-06, "loss": 1.7929291725158691, "step": 2038 }, { "epoch": 1.6105845181674565, "grad_norm": 16.8095121665838, "learning_rate": 5.234292795788731e-06, "loss": 1.8585515022277832, "step": 2039 }, { "epoch": 1.6113744075829384, "grad_norm": 14.477383097248046, "learning_rate": 5.229702086407197e-06, "loss": 2.150439977645874, "step": 2040 }, { "epoch": 1.6121642969984202, "grad_norm": 11.989923601556997, "learning_rate": 5.225111182972584e-06, "loss": 1.4646919965744019, "step": 2041 }, { "epoch": 1.612954186413902, "grad_norm": 21.745170716359002, "learning_rate": 5.220520089363302e-06, "loss": 1.7785918712615967, "step": 2042 }, { "epoch": 1.6137440758293837, "grad_norm": 11.270529683690256, "learning_rate": 5.215928809457924e-06, "loss": 2.263561964035034, "step": 2043 }, { "epoch": 1.6145339652448656, "grad_norm": 15.910293271142669, "learning_rate": 5.211337347135176e-06, "loss": 1.4089993238449097, "step": 2044 }, { "epoch": 1.6153238546603474, "grad_norm": 16.722563692314175, "learning_rate": 5.20674570627394e-06, "loss": 1.6263060569763184, "step": 2045 }, { "epoch": 1.6161137440758293, "grad_norm": 11.666163831636595, "learning_rate": 5.202153890753252e-06, "loss": 1.2603790760040283, "step": 2046 }, { "epoch": 1.6169036334913112, "grad_norm": 12.621495227684445, "learning_rate": 5.197561904452291e-06, "loss": 2.883836507797241, "step": 2047 }, { "epoch": 1.617693522906793, "grad_norm": 10.161552116575363, "learning_rate": 5.192969751250382e-06, "loss": 1.666745662689209, "step": 2048 }, { "epoch": 1.6184834123222749, "grad_norm": 8.91334171222036, "learning_rate": 5.188377435026991e-06, "loss": 1.3152096271514893, "step": 2049 }, { "epoch": 1.6192733017377567, "grad_norm": 12.703171570917682, "learning_rate": 5.183784959661723e-06, "loss": 1.2735559940338135, "step": 2050 }, { "epoch": 1.6200631911532386, "grad_norm": 13.160459144969359, "learning_rate": 5.1791923290343175e-06, "loss": 0.9299610257148743, "step": 2051 }, { "epoch": 1.6208530805687205, "grad_norm": 8.443761976415868, "learning_rate": 5.17459954702464e-06, "loss": 1.560915231704712, "step": 2052 }, { "epoch": 1.6216429699842023, "grad_norm": 14.155078887312051, "learning_rate": 5.1700066175126915e-06, "loss": 2.029481887817383, "step": 2053 }, { "epoch": 1.6224328593996842, "grad_norm": 7.4669992465133825, "learning_rate": 5.165413544378594e-06, "loss": 1.332027792930603, "step": 2054 }, { "epoch": 1.623222748815166, "grad_norm": 11.339506770988185, "learning_rate": 5.160820331502587e-06, "loss": 1.6719763278961182, "step": 2055 }, { "epoch": 1.6240126382306477, "grad_norm": 9.957423283849112, "learning_rate": 5.1562269827650365e-06, "loss": 1.8021857738494873, "step": 2056 }, { "epoch": 1.6248025276461295, "grad_norm": 13.931093811264075, "learning_rate": 5.1516335020464146e-06, "loss": 1.2689666748046875, "step": 2057 }, { "epoch": 1.6255924170616114, "grad_norm": 10.873902590482956, "learning_rate": 5.147039893227312e-06, "loss": 1.6544064283370972, "step": 2058 }, { "epoch": 1.6263823064770933, "grad_norm": 14.998070753763457, "learning_rate": 5.142446160188423e-06, "loss": 0.7487756013870239, "step": 2059 }, { "epoch": 1.627172195892575, "grad_norm": 11.06717608145743, "learning_rate": 5.137852306810549e-06, "loss": 1.2134767770767212, "step": 2060 }, { "epoch": 1.6279620853080567, "grad_norm": 9.422607327758378, "learning_rate": 5.133258336974593e-06, "loss": 1.7278623580932617, "step": 2061 }, { "epoch": 1.6287519747235386, "grad_norm": 11.032813566120764, "learning_rate": 5.128664254561554e-06, "loss": 1.4675060510635376, "step": 2062 }, { "epoch": 1.6295418641390205, "grad_norm": 19.42554207831122, "learning_rate": 5.12407006345253e-06, "loss": 1.6174191236495972, "step": 2063 }, { "epoch": 1.6303317535545023, "grad_norm": 8.820495621535443, "learning_rate": 5.119475767528706e-06, "loss": 1.6649625301361084, "step": 2064 }, { "epoch": 1.6311216429699842, "grad_norm": 8.576125799967375, "learning_rate": 5.114881370671363e-06, "loss": 1.447519063949585, "step": 2065 }, { "epoch": 1.631911532385466, "grad_norm": 12.22755594166127, "learning_rate": 5.1102868767618564e-06, "loss": 2.094078540802002, "step": 2066 }, { "epoch": 1.632701421800948, "grad_norm": 17.90068380574316, "learning_rate": 5.105692289681637e-06, "loss": 2.2962210178375244, "step": 2067 }, { "epoch": 1.6334913112164298, "grad_norm": 10.844973400668414, "learning_rate": 5.10109761331222e-06, "loss": 1.753501534461975, "step": 2068 }, { "epoch": 1.6342812006319116, "grad_norm": 9.036279470960077, "learning_rate": 5.096502851535207e-06, "loss": 1.4237632751464844, "step": 2069 }, { "epoch": 1.6350710900473935, "grad_norm": 19.783228854851586, "learning_rate": 5.091908008232269e-06, "loss": 1.0084950923919678, "step": 2070 }, { "epoch": 1.6358609794628753, "grad_norm": 8.265709299215786, "learning_rate": 5.08731308728514e-06, "loss": 2.1787667274475098, "step": 2071 }, { "epoch": 1.636650868878357, "grad_norm": 14.174582481095438, "learning_rate": 5.082718092575629e-06, "loss": 2.0619583129882812, "step": 2072 }, { "epoch": 1.6374407582938388, "grad_norm": 11.108397416157564, "learning_rate": 5.078123027985602e-06, "loss": 1.3192667961120605, "step": 2073 }, { "epoch": 1.6382306477093207, "grad_norm": 8.363886228263077, "learning_rate": 5.073527897396983e-06, "loss": 1.50796639919281, "step": 2074 }, { "epoch": 1.6390205371248026, "grad_norm": 15.251141269075907, "learning_rate": 5.068932704691754e-06, "loss": 2.005817174911499, "step": 2075 }, { "epoch": 1.6398104265402842, "grad_norm": 13.455564265575436, "learning_rate": 5.064337453751949e-06, "loss": 0.9753101468086243, "step": 2076 }, { "epoch": 1.640600315955766, "grad_norm": 10.151593209532505, "learning_rate": 5.059742148459651e-06, "loss": 1.3239325284957886, "step": 2077 }, { "epoch": 1.641390205371248, "grad_norm": 9.012341783855266, "learning_rate": 5.055146792696989e-06, "loss": 1.3081142902374268, "step": 2078 }, { "epoch": 1.6421800947867298, "grad_norm": 14.881682947088617, "learning_rate": 5.050551390346135e-06, "loss": 1.9254162311553955, "step": 2079 }, { "epoch": 1.6429699842022116, "grad_norm": 9.10456564272968, "learning_rate": 5.0459559452893e-06, "loss": 1.5548919439315796, "step": 2080 }, { "epoch": 1.6437598736176935, "grad_norm": 19.6788851757542, "learning_rate": 5.041360461408733e-06, "loss": 1.1454696655273438, "step": 2081 }, { "epoch": 1.6445497630331753, "grad_norm": 14.710569283676744, "learning_rate": 5.036764942586709e-06, "loss": 1.5640335083007812, "step": 2082 }, { "epoch": 1.6453396524486572, "grad_norm": 12.439497693452665, "learning_rate": 5.032169392705542e-06, "loss": 1.5709795951843262, "step": 2083 }, { "epoch": 1.646129541864139, "grad_norm": 16.197420446815777, "learning_rate": 5.027573815647567e-06, "loss": 2.125795602798462, "step": 2084 }, { "epoch": 1.646919431279621, "grad_norm": 12.282692453985112, "learning_rate": 5.0229782152951405e-06, "loss": 1.4845194816589355, "step": 2085 }, { "epoch": 1.6477093206951028, "grad_norm": 9.520283321188863, "learning_rate": 5.018382595530643e-06, "loss": 1.538682460784912, "step": 2086 }, { "epoch": 1.6484992101105846, "grad_norm": 9.675670573110745, "learning_rate": 5.0137869602364665e-06, "loss": 1.280341625213623, "step": 2087 }, { "epoch": 1.6492890995260665, "grad_norm": 18.974744400190374, "learning_rate": 5.009191313295021e-06, "loss": 1.772722601890564, "step": 2088 }, { "epoch": 1.6500789889415481, "grad_norm": 9.452933859527397, "learning_rate": 5.004595658588725e-06, "loss": 1.2798036336898804, "step": 2089 }, { "epoch": 1.65086887835703, "grad_norm": 11.716983488622807, "learning_rate": 5e-06, "loss": 2.5696773529052734, "step": 2090 }, { "epoch": 1.6516587677725119, "grad_norm": 17.30778123295629, "learning_rate": 4.995404341411277e-06, "loss": 2.143465518951416, "step": 2091 }, { "epoch": 1.6524486571879937, "grad_norm": 6.676595853028537, "learning_rate": 4.990808686704979e-06, "loss": 1.9209420680999756, "step": 2092 }, { "epoch": 1.6532385466034754, "grad_norm": 10.700026843162101, "learning_rate": 4.986213039763537e-06, "loss": 1.3106441497802734, "step": 2093 }, { "epoch": 1.6540284360189572, "grad_norm": 11.845311555733167, "learning_rate": 4.98161740446936e-06, "loss": 1.331827998161316, "step": 2094 }, { "epoch": 1.654818325434439, "grad_norm": 14.482146662888397, "learning_rate": 4.977021784704862e-06, "loss": 1.2673121690750122, "step": 2095 }, { "epoch": 1.655608214849921, "grad_norm": 9.96068930193819, "learning_rate": 4.9724261843524345e-06, "loss": 1.4080572128295898, "step": 2096 }, { "epoch": 1.6563981042654028, "grad_norm": 6.81552208619803, "learning_rate": 4.967830607294459e-06, "loss": 1.8892409801483154, "step": 2097 }, { "epoch": 1.6571879936808847, "grad_norm": 9.139351921483907, "learning_rate": 4.963235057413292e-06, "loss": 1.865785002708435, "step": 2098 }, { "epoch": 1.6579778830963665, "grad_norm": 18.25394807894221, "learning_rate": 4.95863953859127e-06, "loss": 3.839024782180786, "step": 2099 }, { "epoch": 1.6587677725118484, "grad_norm": 12.684731410566474, "learning_rate": 4.9540440547107016e-06, "loss": 1.6854069232940674, "step": 2100 }, { "epoch": 1.6595576619273302, "grad_norm": 11.466195743942457, "learning_rate": 4.9494486096538654e-06, "loss": 0.6167169809341431, "step": 2101 }, { "epoch": 1.660347551342812, "grad_norm": 11.989810253242599, "learning_rate": 4.9448532073030125e-06, "loss": 1.5397396087646484, "step": 2102 }, { "epoch": 1.661137440758294, "grad_norm": 13.091229369852948, "learning_rate": 4.940257851540351e-06, "loss": 1.1000051498413086, "step": 2103 }, { "epoch": 1.6619273301737758, "grad_norm": 15.278599967251001, "learning_rate": 4.935662546248054e-06, "loss": 1.370941162109375, "step": 2104 }, { "epoch": 1.6627172195892577, "grad_norm": 8.92736303739886, "learning_rate": 4.9310672953082486e-06, "loss": 1.4179476499557495, "step": 2105 }, { "epoch": 1.6635071090047393, "grad_norm": 7.439214281898773, "learning_rate": 4.92647210260302e-06, "loss": 1.1264572143554688, "step": 2106 }, { "epoch": 1.6642969984202212, "grad_norm": 8.391749061486982, "learning_rate": 4.9218769720144e-06, "loss": 1.7352138757705688, "step": 2107 }, { "epoch": 1.665086887835703, "grad_norm": 8.41116001387029, "learning_rate": 4.917281907424371e-06, "loss": 1.3535940647125244, "step": 2108 }, { "epoch": 1.6658767772511849, "grad_norm": 9.156612347669475, "learning_rate": 4.912686912714861e-06, "loss": 1.4920666217803955, "step": 2109 }, { "epoch": 1.6666666666666665, "grad_norm": 16.712403107809568, "learning_rate": 4.908091991767734e-06, "loss": 1.6276068687438965, "step": 2110 }, { "epoch": 1.6674565560821484, "grad_norm": 18.944886780238278, "learning_rate": 4.903497148464795e-06, "loss": 1.2643494606018066, "step": 2111 }, { "epoch": 1.6682464454976302, "grad_norm": 8.854420836492178, "learning_rate": 4.898902386687782e-06, "loss": 1.728925108909607, "step": 2112 }, { "epoch": 1.669036334913112, "grad_norm": 7.998931904416969, "learning_rate": 4.894307710318365e-06, "loss": 1.2208718061447144, "step": 2113 }, { "epoch": 1.669826224328594, "grad_norm": 11.667293171545051, "learning_rate": 4.8897131232381435e-06, "loss": 1.281367540359497, "step": 2114 }, { "epoch": 1.6706161137440758, "grad_norm": 8.360501164751504, "learning_rate": 4.88511862932864e-06, "loss": 1.7427953481674194, "step": 2115 }, { "epoch": 1.6714060031595577, "grad_norm": 12.84273569482612, "learning_rate": 4.880524232471295e-06, "loss": 1.1219735145568848, "step": 2116 }, { "epoch": 1.6721958925750395, "grad_norm": 8.81213908324747, "learning_rate": 4.875929936547472e-06, "loss": 1.642223596572876, "step": 2117 }, { "epoch": 1.6729857819905214, "grad_norm": 17.800338724408167, "learning_rate": 4.871335745438448e-06, "loss": 2.3190040588378906, "step": 2118 }, { "epoch": 1.6737756714060033, "grad_norm": 17.22836983227111, "learning_rate": 4.866741663025409e-06, "loss": 1.689987063407898, "step": 2119 }, { "epoch": 1.674565560821485, "grad_norm": 10.031772239297299, "learning_rate": 4.8621476931894505e-06, "loss": 1.2501479387283325, "step": 2120 }, { "epoch": 1.675355450236967, "grad_norm": 9.091404836352739, "learning_rate": 4.857553839811579e-06, "loss": 1.4689991474151611, "step": 2121 }, { "epoch": 1.6761453396524486, "grad_norm": 11.720644404349697, "learning_rate": 4.85296010677269e-06, "loss": 1.3092423677444458, "step": 2122 }, { "epoch": 1.6769352290679305, "grad_norm": 9.826261602656874, "learning_rate": 4.848366497953586e-06, "loss": 1.6839494705200195, "step": 2123 }, { "epoch": 1.6777251184834123, "grad_norm": 10.367714457357824, "learning_rate": 4.843773017234964e-06, "loss": 1.245840072631836, "step": 2124 }, { "epoch": 1.6785150078988942, "grad_norm": 15.019641392439464, "learning_rate": 4.839179668497413e-06, "loss": 1.2178664207458496, "step": 2125 }, { "epoch": 1.6793048973143758, "grad_norm": 13.354170916911665, "learning_rate": 4.834586455621409e-06, "loss": 1.5003374814987183, "step": 2126 }, { "epoch": 1.6800947867298577, "grad_norm": 9.162723435056346, "learning_rate": 4.829993382487309e-06, "loss": 1.3886042833328247, "step": 2127 }, { "epoch": 1.6808846761453395, "grad_norm": 9.551211296648892, "learning_rate": 4.825400452975361e-06, "loss": 1.7338354587554932, "step": 2128 }, { "epoch": 1.6816745655608214, "grad_norm": 15.926343890819645, "learning_rate": 4.820807670965683e-06, "loss": 1.217260718345642, "step": 2129 }, { "epoch": 1.6824644549763033, "grad_norm": 9.271922963812838, "learning_rate": 4.816215040338277e-06, "loss": 1.4944868087768555, "step": 2130 }, { "epoch": 1.6832543443917851, "grad_norm": 10.041906359891817, "learning_rate": 4.811622564973011e-06, "loss": 1.8609442710876465, "step": 2131 }, { "epoch": 1.684044233807267, "grad_norm": 10.228103619124214, "learning_rate": 4.807030248749621e-06, "loss": 1.6223942041397095, "step": 2132 }, { "epoch": 1.6848341232227488, "grad_norm": 9.327061347477445, "learning_rate": 4.802438095547712e-06, "loss": 1.100557804107666, "step": 2133 }, { "epoch": 1.6856240126382307, "grad_norm": 30.522951746249742, "learning_rate": 4.7978461092467495e-06, "loss": 1.0848801136016846, "step": 2134 }, { "epoch": 1.6864139020537126, "grad_norm": 13.164129620057786, "learning_rate": 4.793254293726061e-06, "loss": 1.1626616716384888, "step": 2135 }, { "epoch": 1.6872037914691944, "grad_norm": 21.254188334183024, "learning_rate": 4.788662652864825e-06, "loss": 1.7419378757476807, "step": 2136 }, { "epoch": 1.6879936808846763, "grad_norm": 15.489897028057435, "learning_rate": 4.784071190542079e-06, "loss": 1.6035929918289185, "step": 2137 }, { "epoch": 1.6887835703001581, "grad_norm": 9.477674481683636, "learning_rate": 4.7794799106366985e-06, "loss": 0.9086638689041138, "step": 2138 }, { "epoch": 1.6895734597156398, "grad_norm": 14.744530447825799, "learning_rate": 4.774888817027417e-06, "loss": 2.459580183029175, "step": 2139 }, { "epoch": 1.6903633491311216, "grad_norm": 10.143359754031751, "learning_rate": 4.770297913592805e-06, "loss": 1.5249871015548706, "step": 2140 }, { "epoch": 1.6911532385466035, "grad_norm": 8.92591707625083, "learning_rate": 4.76570720421127e-06, "loss": 1.3436775207519531, "step": 2141 }, { "epoch": 1.6919431279620853, "grad_norm": 30.798541971194087, "learning_rate": 4.7611166927610625e-06, "loss": 1.0415196418762207, "step": 2142 }, { "epoch": 1.692733017377567, "grad_norm": 10.766454601215523, "learning_rate": 4.756526383120258e-06, "loss": 1.6581356525421143, "step": 2143 }, { "epoch": 1.6935229067930488, "grad_norm": 10.521084763687215, "learning_rate": 4.751936279166767e-06, "loss": 1.5447998046875, "step": 2144 }, { "epoch": 1.6943127962085307, "grad_norm": 9.436881293717475, "learning_rate": 4.747346384778325e-06, "loss": 2.1874170303344727, "step": 2145 }, { "epoch": 1.6951026856240126, "grad_norm": 9.945491755562474, "learning_rate": 4.7427567038324884e-06, "loss": 1.6179015636444092, "step": 2146 }, { "epoch": 1.6958925750394944, "grad_norm": 20.026946174142626, "learning_rate": 4.738167240206637e-06, "loss": 1.4960978031158447, "step": 2147 }, { "epoch": 1.6966824644549763, "grad_norm": 9.92767129752773, "learning_rate": 4.733577997777963e-06, "loss": 1.0159810781478882, "step": 2148 }, { "epoch": 1.6974723538704581, "grad_norm": 8.419076351215592, "learning_rate": 4.728988980423473e-06, "loss": 1.4090895652770996, "step": 2149 }, { "epoch": 1.69826224328594, "grad_norm": 8.367910062718474, "learning_rate": 4.724400192019983e-06, "loss": 1.2694896459579468, "step": 2150 }, { "epoch": 1.6990521327014219, "grad_norm": 15.475743745207783, "learning_rate": 4.71981163644412e-06, "loss": 1.5766160488128662, "step": 2151 }, { "epoch": 1.6998420221169037, "grad_norm": 8.610091370022774, "learning_rate": 4.715223317572309e-06, "loss": 1.2474552392959595, "step": 2152 }, { "epoch": 1.7006319115323856, "grad_norm": 8.616644887482238, "learning_rate": 4.7106352392807794e-06, "loss": 1.4365007877349854, "step": 2153 }, { "epoch": 1.7014218009478674, "grad_norm": 12.289373525187166, "learning_rate": 4.70604740544555e-06, "loss": 1.2263505458831787, "step": 2154 }, { "epoch": 1.7022116903633493, "grad_norm": 14.493463927125523, "learning_rate": 4.701459819942441e-06, "loss": 1.1191456317901611, "step": 2155 }, { "epoch": 1.703001579778831, "grad_norm": 11.091605026430587, "learning_rate": 4.696872486647059e-06, "loss": 1.8349318504333496, "step": 2156 }, { "epoch": 1.7037914691943128, "grad_norm": 13.242011895533519, "learning_rate": 4.692285409434797e-06, "loss": 1.2665749788284302, "step": 2157 }, { "epoch": 1.7045813586097947, "grad_norm": 12.97363928423666, "learning_rate": 4.6876985921808365e-06, "loss": 1.3644407987594604, "step": 2158 }, { "epoch": 1.7053712480252765, "grad_norm": 9.838108998315606, "learning_rate": 4.683112038760135e-06, "loss": 1.3256113529205322, "step": 2159 }, { "epoch": 1.7061611374407581, "grad_norm": 11.584707197962354, "learning_rate": 4.6785257530474244e-06, "loss": 1.4737862348556519, "step": 2160 }, { "epoch": 1.70695102685624, "grad_norm": 13.735300921932108, "learning_rate": 4.673939738917216e-06, "loss": 1.6939847469329834, "step": 2161 }, { "epoch": 1.7077409162717219, "grad_norm": 9.701613005896544, "learning_rate": 4.669354000243791e-06, "loss": 1.218980312347412, "step": 2162 }, { "epoch": 1.7085308056872037, "grad_norm": 16.55205261891123, "learning_rate": 4.664768540901194e-06, "loss": 1.3491718769073486, "step": 2163 }, { "epoch": 1.7093206951026856, "grad_norm": 10.899568751624019, "learning_rate": 4.66018336476324e-06, "loss": 1.0617191791534424, "step": 2164 }, { "epoch": 1.7101105845181674, "grad_norm": 31.659285393659797, "learning_rate": 4.655598475703498e-06, "loss": 1.6565725803375244, "step": 2165 }, { "epoch": 1.7109004739336493, "grad_norm": 14.401725841266487, "learning_rate": 4.651013877595296e-06, "loss": 1.4930999279022217, "step": 2166 }, { "epoch": 1.7116903633491312, "grad_norm": 16.299266500455296, "learning_rate": 4.64642957431172e-06, "loss": 0.774669885635376, "step": 2167 }, { "epoch": 1.712480252764613, "grad_norm": 19.156771040259898, "learning_rate": 4.641845569725605e-06, "loss": 2.0019631385803223, "step": 2168 }, { "epoch": 1.7132701421800949, "grad_norm": 17.22778044854949, "learning_rate": 4.63726186770953e-06, "loss": 1.3190504312515259, "step": 2169 }, { "epoch": 1.7140600315955767, "grad_norm": 9.853391429883748, "learning_rate": 4.6326784721358255e-06, "loss": 1.7607496976852417, "step": 2170 }, { "epoch": 1.7148499210110586, "grad_norm": 11.600650539403235, "learning_rate": 4.628095386876557e-06, "loss": 1.8006988763809204, "step": 2171 }, { "epoch": 1.7156398104265402, "grad_norm": 9.816615664918947, "learning_rate": 4.623512615803531e-06, "loss": 1.423611044883728, "step": 2172 }, { "epoch": 1.716429699842022, "grad_norm": 11.105593098350882, "learning_rate": 4.618930162788284e-06, "loss": 1.4016926288604736, "step": 2173 }, { "epoch": 1.717219589257504, "grad_norm": 9.31423831663281, "learning_rate": 4.614348031702093e-06, "loss": 1.490910291671753, "step": 2174 }, { "epoch": 1.7180094786729858, "grad_norm": 9.65485402130286, "learning_rate": 4.609766226415955e-06, "loss": 1.4671694040298462, "step": 2175 }, { "epoch": 1.7187993680884674, "grad_norm": 8.831389046526823, "learning_rate": 4.605184750800594e-06, "loss": 1.1502845287322998, "step": 2176 }, { "epoch": 1.7195892575039493, "grad_norm": 11.97525173977403, "learning_rate": 4.6006036087264544e-06, "loss": 1.1983712911605835, "step": 2177 }, { "epoch": 1.7203791469194312, "grad_norm": 7.344974809829897, "learning_rate": 4.596022804063701e-06, "loss": 1.7621641159057617, "step": 2178 }, { "epoch": 1.721169036334913, "grad_norm": 8.826471148662039, "learning_rate": 4.591442340682214e-06, "loss": 1.350406289100647, "step": 2179 }, { "epoch": 1.7219589257503949, "grad_norm": 14.464898797136621, "learning_rate": 4.586862222451582e-06, "loss": 1.644295334815979, "step": 2180 }, { "epoch": 1.7227488151658767, "grad_norm": 14.126992669178419, "learning_rate": 4.582282453241108e-06, "loss": 1.1674833297729492, "step": 2181 }, { "epoch": 1.7235387045813586, "grad_norm": 15.220420323290032, "learning_rate": 4.5777030369197895e-06, "loss": 1.2685773372650146, "step": 2182 }, { "epoch": 1.7243285939968405, "grad_norm": 9.673306531164522, "learning_rate": 4.573123977356337e-06, "loss": 1.3816874027252197, "step": 2183 }, { "epoch": 1.7251184834123223, "grad_norm": 10.109225928810197, "learning_rate": 4.568545278419154e-06, "loss": 0.9319192171096802, "step": 2184 }, { "epoch": 1.7259083728278042, "grad_norm": 9.14802501607995, "learning_rate": 4.56396694397634e-06, "loss": 1.8452692031860352, "step": 2185 }, { "epoch": 1.726698262243286, "grad_norm": 12.908130900260579, "learning_rate": 4.55938897789569e-06, "loss": 1.1169474124908447, "step": 2186 }, { "epoch": 1.727488151658768, "grad_norm": 13.515379103637612, "learning_rate": 4.554811384044681e-06, "loss": 2.0045344829559326, "step": 2187 }, { "epoch": 1.7282780410742498, "grad_norm": 14.12974423042282, "learning_rate": 4.550234166290481e-06, "loss": 1.453951358795166, "step": 2188 }, { "epoch": 1.7290679304897314, "grad_norm": 11.23753254826144, "learning_rate": 4.545657328499937e-06, "loss": 1.3621933460235596, "step": 2189 }, { "epoch": 1.7298578199052133, "grad_norm": 13.330762458315919, "learning_rate": 4.541080874539579e-06, "loss": 2.1850600242614746, "step": 2190 }, { "epoch": 1.7306477093206951, "grad_norm": 15.273321036263173, "learning_rate": 4.5365048082756095e-06, "loss": 2.321899890899658, "step": 2191 }, { "epoch": 1.731437598736177, "grad_norm": 10.112662361275033, "learning_rate": 4.531929133573906e-06, "loss": 1.4877285957336426, "step": 2192 }, { "epoch": 1.7322274881516586, "grad_norm": 12.097152921819895, "learning_rate": 4.5273538543000095e-06, "loss": 1.2780163288116455, "step": 2193 }, { "epoch": 1.7330173775671405, "grad_norm": 10.432317615119704, "learning_rate": 4.522778974319133e-06, "loss": 1.5664427280426025, "step": 2194 }, { "epoch": 1.7338072669826223, "grad_norm": 7.912507954338899, "learning_rate": 4.518204497496151e-06, "loss": 1.6642968654632568, "step": 2195 }, { "epoch": 1.7345971563981042, "grad_norm": 11.522163423849092, "learning_rate": 4.513630427695597e-06, "loss": 2.0236799716949463, "step": 2196 }, { "epoch": 1.735387045813586, "grad_norm": 9.953485109546435, "learning_rate": 4.50905676878166e-06, "loss": 2.1658871173858643, "step": 2197 }, { "epoch": 1.736176935229068, "grad_norm": 13.055500452513753, "learning_rate": 4.504483524618179e-06, "loss": 0.8655682802200317, "step": 2198 }, { "epoch": 1.7369668246445498, "grad_norm": 10.348280690590556, "learning_rate": 4.499910699068649e-06, "loss": 1.5977658033370972, "step": 2199 }, { "epoch": 1.7377567140600316, "grad_norm": 9.252649833434882, "learning_rate": 4.495338295996208e-06, "loss": 1.5957226753234863, "step": 2200 }, { "epoch": 1.7385466034755135, "grad_norm": 11.994815495201612, "learning_rate": 4.4907663192636345e-06, "loss": 0.9464290738105774, "step": 2201 }, { "epoch": 1.7393364928909953, "grad_norm": 8.819889689255204, "learning_rate": 4.486194772733356e-06, "loss": 1.736267328262329, "step": 2202 }, { "epoch": 1.7401263823064772, "grad_norm": 10.89375250297425, "learning_rate": 4.4816236602674204e-06, "loss": 1.5301947593688965, "step": 2203 }, { "epoch": 1.740916271721959, "grad_norm": 28.361460541379106, "learning_rate": 4.477052985727525e-06, "loss": 2.966822385787964, "step": 2204 }, { "epoch": 1.741706161137441, "grad_norm": 16.527934269673235, "learning_rate": 4.47248275297499e-06, "loss": 1.2912685871124268, "step": 2205 }, { "epoch": 1.7424960505529226, "grad_norm": 7.535472217731121, "learning_rate": 4.467912965870761e-06, "loss": 1.1335291862487793, "step": 2206 }, { "epoch": 1.7432859399684044, "grad_norm": 8.2905986876192, "learning_rate": 4.463343628275412e-06, "loss": 1.4929556846618652, "step": 2207 }, { "epoch": 1.7440758293838863, "grad_norm": 12.59336480489856, "learning_rate": 4.458774744049134e-06, "loss": 1.0215003490447998, "step": 2208 }, { "epoch": 1.7448657187993681, "grad_norm": 10.41128966672044, "learning_rate": 4.454206317051734e-06, "loss": 1.46480393409729, "step": 2209 }, { "epoch": 1.7456556082148498, "grad_norm": 10.615522798499182, "learning_rate": 4.449638351142632e-06, "loss": 1.465099811553955, "step": 2210 }, { "epoch": 1.7464454976303316, "grad_norm": 9.897944735916337, "learning_rate": 4.445070850180865e-06, "loss": 1.993574857711792, "step": 2211 }, { "epoch": 1.7472353870458135, "grad_norm": 11.844266382618313, "learning_rate": 4.4405038180250715e-06, "loss": 1.3619449138641357, "step": 2212 }, { "epoch": 1.7480252764612954, "grad_norm": 8.226740624728182, "learning_rate": 4.435937258533496e-06, "loss": 1.619173288345337, "step": 2213 }, { "epoch": 1.7488151658767772, "grad_norm": 11.343296287634017, "learning_rate": 4.43137117556398e-06, "loss": 1.3998527526855469, "step": 2214 }, { "epoch": 1.749605055292259, "grad_norm": 13.493219686349496, "learning_rate": 4.426805572973968e-06, "loss": 1.625508427619934, "step": 2215 }, { "epoch": 1.750394944707741, "grad_norm": 12.38010034938405, "learning_rate": 4.422240454620496e-06, "loss": 2.122622013092041, "step": 2216 }, { "epoch": 1.7511848341232228, "grad_norm": 14.980457953037133, "learning_rate": 4.4176758243601885e-06, "loss": 1.5803240537643433, "step": 2217 }, { "epoch": 1.7519747235387046, "grad_norm": 9.389237583408196, "learning_rate": 4.413111686049264e-06, "loss": 1.1918928623199463, "step": 2218 }, { "epoch": 1.7527646129541865, "grad_norm": 10.030002557757358, "learning_rate": 4.40854804354352e-06, "loss": 0.9838066101074219, "step": 2219 }, { "epoch": 1.7535545023696684, "grad_norm": 9.03857007330401, "learning_rate": 4.403984900698336e-06, "loss": 1.4722139835357666, "step": 2220 }, { "epoch": 1.7543443917851502, "grad_norm": 13.96770687161923, "learning_rate": 4.399422261368671e-06, "loss": 1.4021885395050049, "step": 2221 }, { "epoch": 1.7551342812006319, "grad_norm": 8.562842515589852, "learning_rate": 4.394860129409056e-06, "loss": 1.6055982112884521, "step": 2222 }, { "epoch": 1.7559241706161137, "grad_norm": 16.556021076354195, "learning_rate": 4.390298508673596e-06, "loss": 1.7816779613494873, "step": 2223 }, { "epoch": 1.7567140600315956, "grad_norm": 12.43470769219723, "learning_rate": 4.385737403015967e-06, "loss": 1.5340075492858887, "step": 2224 }, { "epoch": 1.7575039494470774, "grad_norm": 24.066858353519663, "learning_rate": 4.3811768162894e-06, "loss": 1.3754091262817383, "step": 2225 }, { "epoch": 1.758293838862559, "grad_norm": 11.278622345644811, "learning_rate": 4.376616752346696e-06, "loss": 1.6019980907440186, "step": 2226 }, { "epoch": 1.759083728278041, "grad_norm": 9.564683161114333, "learning_rate": 4.372057215040213e-06, "loss": 1.9771608114242554, "step": 2227 }, { "epoch": 1.7598736176935228, "grad_norm": 12.195360323247021, "learning_rate": 4.367498208221863e-06, "loss": 1.3690104484558105, "step": 2228 }, { "epoch": 1.7606635071090047, "grad_norm": 13.65143464575961, "learning_rate": 4.362939735743108e-06, "loss": 1.7435321807861328, "step": 2229 }, { "epoch": 1.7614533965244865, "grad_norm": 14.339262082940179, "learning_rate": 4.358381801454966e-06, "loss": 0.8703070878982544, "step": 2230 }, { "epoch": 1.7622432859399684, "grad_norm": 9.781836635217395, "learning_rate": 4.353824409207988e-06, "loss": 2.1449623107910156, "step": 2231 }, { "epoch": 1.7630331753554502, "grad_norm": 9.901685682067114, "learning_rate": 4.349267562852276e-06, "loss": 1.5492500066757202, "step": 2232 }, { "epoch": 1.763823064770932, "grad_norm": 11.408200068592867, "learning_rate": 4.34471126623747e-06, "loss": 1.4743053913116455, "step": 2233 }, { "epoch": 1.764612954186414, "grad_norm": 11.620081956175499, "learning_rate": 4.340155523212746e-06, "loss": 1.495714545249939, "step": 2234 }, { "epoch": 1.7654028436018958, "grad_norm": 12.144719137720987, "learning_rate": 4.335600337626809e-06, "loss": 1.1870977878570557, "step": 2235 }, { "epoch": 1.7661927330173777, "grad_norm": 10.073805141373832, "learning_rate": 4.331045713327894e-06, "loss": 1.2681677341461182, "step": 2236 }, { "epoch": 1.7669826224328595, "grad_norm": 13.71543165458782, "learning_rate": 4.326491654163762e-06, "loss": 1.1395235061645508, "step": 2237 }, { "epoch": 1.7677725118483414, "grad_norm": 13.755051603926155, "learning_rate": 4.321938163981699e-06, "loss": 1.4418907165527344, "step": 2238 }, { "epoch": 1.768562401263823, "grad_norm": 12.213862776654832, "learning_rate": 4.317385246628508e-06, "loss": 1.602990746498108, "step": 2239 }, { "epoch": 1.7693522906793049, "grad_norm": 13.831956811214068, "learning_rate": 4.312832905950509e-06, "loss": 1.280178427696228, "step": 2240 }, { "epoch": 1.7701421800947867, "grad_norm": 12.777246523961239, "learning_rate": 4.308281145793535e-06, "loss": 1.7366316318511963, "step": 2241 }, { "epoch": 1.7709320695102686, "grad_norm": 7.280612489287066, "learning_rate": 4.303729970002924e-06, "loss": 1.6962597370147705, "step": 2242 }, { "epoch": 1.7717219589257502, "grad_norm": 13.907639824752652, "learning_rate": 4.2991793824235286e-06, "loss": 1.6384978294372559, "step": 2243 }, { "epoch": 1.772511848341232, "grad_norm": 11.962864332754167, "learning_rate": 4.294629386899699e-06, "loss": 1.5645751953125, "step": 2244 }, { "epoch": 1.773301737756714, "grad_norm": 10.40002219488319, "learning_rate": 4.290079987275285e-06, "loss": 1.8200668096542358, "step": 2245 }, { "epoch": 1.7740916271721958, "grad_norm": 7.796350477300916, "learning_rate": 4.285531187393639e-06, "loss": 1.8340072631835938, "step": 2246 }, { "epoch": 1.7748815165876777, "grad_norm": 14.305874479858872, "learning_rate": 4.280982991097594e-06, "loss": 1.3229985237121582, "step": 2247 }, { "epoch": 1.7756714060031595, "grad_norm": 17.06836253359979, "learning_rate": 4.276435402229488e-06, "loss": 1.6926765441894531, "step": 2248 }, { "epoch": 1.7764612954186414, "grad_norm": 14.834597218782198, "learning_rate": 4.271888424631137e-06, "loss": 1.4247950315475464, "step": 2249 }, { "epoch": 1.7772511848341233, "grad_norm": 12.619750807648803, "learning_rate": 4.267342062143841e-06, "loss": 1.1141537427902222, "step": 2250 }, { "epoch": 1.7780410742496051, "grad_norm": 12.049893792135833, "learning_rate": 4.2627963186083844e-06, "loss": 1.3787682056427002, "step": 2251 }, { "epoch": 1.778830963665087, "grad_norm": 17.958477722896514, "learning_rate": 4.258251197865028e-06, "loss": 1.4096425771713257, "step": 2252 }, { "epoch": 1.7796208530805688, "grad_norm": 10.777731590166498, "learning_rate": 4.253706703753499e-06, "loss": 1.2030799388885498, "step": 2253 }, { "epoch": 1.7804107424960507, "grad_norm": 17.628412607375658, "learning_rate": 4.249162840113005e-06, "loss": 2.639885425567627, "step": 2254 }, { "epoch": 1.7812006319115326, "grad_norm": 14.704071301126223, "learning_rate": 4.244619610782216e-06, "loss": 1.2214397192001343, "step": 2255 }, { "epoch": 1.7819905213270142, "grad_norm": 12.269537431737808, "learning_rate": 4.240077019599268e-06, "loss": 0.9519909620285034, "step": 2256 }, { "epoch": 1.782780410742496, "grad_norm": 7.752341256018781, "learning_rate": 4.235535070401757e-06, "loss": 1.37101149559021, "step": 2257 }, { "epoch": 1.783570300157978, "grad_norm": 16.293151882011077, "learning_rate": 4.2309937670267355e-06, "loss": 1.9549283981323242, "step": 2258 }, { "epoch": 1.7843601895734598, "grad_norm": 12.578113105487619, "learning_rate": 4.22645311331071e-06, "loss": 1.6407248973846436, "step": 2259 }, { "epoch": 1.7851500789889414, "grad_norm": 11.529806537339917, "learning_rate": 4.221913113089643e-06, "loss": 1.6672661304473877, "step": 2260 }, { "epoch": 1.7859399684044233, "grad_norm": 10.540110804631544, "learning_rate": 4.217373770198939e-06, "loss": 1.268946647644043, "step": 2261 }, { "epoch": 1.7867298578199051, "grad_norm": 12.276750271201358, "learning_rate": 4.212835088473455e-06, "loss": 1.6153327226638794, "step": 2262 }, { "epoch": 1.787519747235387, "grad_norm": 8.08704746323851, "learning_rate": 4.208297071747476e-06, "loss": 2.285081386566162, "step": 2263 }, { "epoch": 1.7883096366508688, "grad_norm": 18.248267446477442, "learning_rate": 4.20375972385474e-06, "loss": 1.9024407863616943, "step": 2264 }, { "epoch": 1.7890995260663507, "grad_norm": 9.775614166254378, "learning_rate": 4.1992230486284105e-06, "loss": 1.5444855690002441, "step": 2265 }, { "epoch": 1.7898894154818326, "grad_norm": 13.515175274258306, "learning_rate": 4.194687049901086e-06, "loss": 1.2774243354797363, "step": 2266 }, { "epoch": 1.7906793048973144, "grad_norm": 12.940831969941822, "learning_rate": 4.190151731504795e-06, "loss": 1.5125453472137451, "step": 2267 }, { "epoch": 1.7914691943127963, "grad_norm": 9.973015494639506, "learning_rate": 4.185617097270989e-06, "loss": 1.5182291269302368, "step": 2268 }, { "epoch": 1.7922590837282781, "grad_norm": 10.794888121500216, "learning_rate": 4.181083151030539e-06, "loss": 0.991271436214447, "step": 2269 }, { "epoch": 1.79304897314376, "grad_norm": 23.58963526924935, "learning_rate": 4.176549896613739e-06, "loss": 1.3529629707336426, "step": 2270 }, { "epoch": 1.7938388625592419, "grad_norm": 15.70406604388244, "learning_rate": 4.1720173378503e-06, "loss": 1.1336208581924438, "step": 2271 }, { "epoch": 1.7946287519747235, "grad_norm": 8.10592167127099, "learning_rate": 4.1674854785693395e-06, "loss": 1.4463564157485962, "step": 2272 }, { "epoch": 1.7954186413902053, "grad_norm": 16.247088268768564, "learning_rate": 4.162954322599389e-06, "loss": 1.5160022974014282, "step": 2273 }, { "epoch": 1.7962085308056872, "grad_norm": 14.65045540543928, "learning_rate": 4.158423873768382e-06, "loss": 1.3699758052825928, "step": 2274 }, { "epoch": 1.796998420221169, "grad_norm": 8.615488903760822, "learning_rate": 4.153894135903658e-06, "loss": 1.9635517597198486, "step": 2275 }, { "epoch": 1.7977883096366507, "grad_norm": 7.489719358192559, "learning_rate": 4.149365112831954e-06, "loss": 1.8137181997299194, "step": 2276 }, { "epoch": 1.7985781990521326, "grad_norm": 9.694581073663853, "learning_rate": 4.144836808379404e-06, "loss": 1.3416333198547363, "step": 2277 }, { "epoch": 1.7993680884676144, "grad_norm": 12.289624728131038, "learning_rate": 4.140309226371534e-06, "loss": 1.53472900390625, "step": 2278 }, { "epoch": 1.8001579778830963, "grad_norm": 11.175578971485294, "learning_rate": 4.135782370633263e-06, "loss": 1.6156749725341797, "step": 2279 }, { "epoch": 1.8009478672985781, "grad_norm": 9.238967112564058, "learning_rate": 4.131256244988888e-06, "loss": 0.7965636253356934, "step": 2280 }, { "epoch": 1.80173775671406, "grad_norm": 12.814521660266232, "learning_rate": 4.1267308532621e-06, "loss": 1.129547357559204, "step": 2281 }, { "epoch": 1.8025276461295419, "grad_norm": 15.163564996840357, "learning_rate": 4.122206199275961e-06, "loss": 1.5986425876617432, "step": 2282 }, { "epoch": 1.8033175355450237, "grad_norm": 12.370089121691372, "learning_rate": 4.117682286852916e-06, "loss": 1.1130129098892212, "step": 2283 }, { "epoch": 1.8041074249605056, "grad_norm": 7.3337703985283325, "learning_rate": 4.1131591198147825e-06, "loss": 1.4250520467758179, "step": 2284 }, { "epoch": 1.8048973143759874, "grad_norm": 15.555243696941096, "learning_rate": 4.108636701982744e-06, "loss": 1.0984294414520264, "step": 2285 }, { "epoch": 1.8056872037914693, "grad_norm": 11.624250828595164, "learning_rate": 4.104115037177354e-06, "loss": 1.8641960620880127, "step": 2286 }, { "epoch": 1.8064770932069512, "grad_norm": 8.641764907398887, "learning_rate": 4.09959412921853e-06, "loss": 2.202810287475586, "step": 2287 }, { "epoch": 1.807266982622433, "grad_norm": 10.272414502220078, "learning_rate": 4.0950739819255516e-06, "loss": 1.1767101287841797, "step": 2288 }, { "epoch": 1.8080568720379147, "grad_norm": 10.73523808838913, "learning_rate": 4.090554599117053e-06, "loss": 1.3976107835769653, "step": 2289 }, { "epoch": 1.8088467614533965, "grad_norm": 9.029022789020436, "learning_rate": 4.086035984611026e-06, "loss": 1.4328737258911133, "step": 2290 }, { "epoch": 1.8096366508688784, "grad_norm": 12.75831158267409, "learning_rate": 4.081518142224806e-06, "loss": 1.5775648355484009, "step": 2291 }, { "epoch": 1.8104265402843602, "grad_norm": 8.797489371278768, "learning_rate": 4.077001075775085e-06, "loss": 1.8569629192352295, "step": 2292 }, { "epoch": 1.8112164296998419, "grad_norm": 19.2818864408269, "learning_rate": 4.072484789077894e-06, "loss": 1.569921851158142, "step": 2293 }, { "epoch": 1.8120063191153237, "grad_norm": 9.606639381333586, "learning_rate": 4.0679692859486076e-06, "loss": 1.5646262168884277, "step": 2294 }, { "epoch": 1.8127962085308056, "grad_norm": 15.982700710777124, "learning_rate": 4.06345457020194e-06, "loss": 1.6539026498794556, "step": 2295 }, { "epoch": 1.8135860979462874, "grad_norm": 11.758946943159149, "learning_rate": 4.0589406456519335e-06, "loss": 2.3593804836273193, "step": 2296 }, { "epoch": 1.8143759873617693, "grad_norm": 10.648338644534252, "learning_rate": 4.054427516111968e-06, "loss": 1.4271035194396973, "step": 2297 }, { "epoch": 1.8151658767772512, "grad_norm": 13.889260348670108, "learning_rate": 4.049915185394751e-06, "loss": 1.5787549018859863, "step": 2298 }, { "epoch": 1.815955766192733, "grad_norm": 11.265998620973773, "learning_rate": 4.045403657312313e-06, "loss": 1.2817461490631104, "step": 2299 }, { "epoch": 1.8167456556082149, "grad_norm": 15.757768299774062, "learning_rate": 4.040892935676011e-06, "loss": 1.6207523345947266, "step": 2300 }, { "epoch": 1.8175355450236967, "grad_norm": 10.598445710266809, "learning_rate": 4.036383024296515e-06, "loss": 1.390448808670044, "step": 2301 }, { "epoch": 1.8183254344391786, "grad_norm": 10.708487190144936, "learning_rate": 4.031873926983813e-06, "loss": 1.3507099151611328, "step": 2302 }, { "epoch": 1.8191153238546605, "grad_norm": 13.04295029302765, "learning_rate": 4.027365647547204e-06, "loss": 1.3747905492782593, "step": 2303 }, { "epoch": 1.8199052132701423, "grad_norm": 15.06243045733676, "learning_rate": 4.0228581897953e-06, "loss": 1.7040674686431885, "step": 2304 }, { "epoch": 1.8206951026856242, "grad_norm": 8.112658972064233, "learning_rate": 4.018351557536015e-06, "loss": 1.1987684965133667, "step": 2305 }, { "epoch": 1.8214849921011058, "grad_norm": 6.926031154500966, "learning_rate": 4.013845754576567e-06, "loss": 1.538601040840149, "step": 2306 }, { "epoch": 1.8222748815165877, "grad_norm": 7.86014809309979, "learning_rate": 4.0093407847234725e-06, "loss": 1.5583560466766357, "step": 2307 }, { "epoch": 1.8230647709320695, "grad_norm": 14.5747224324332, "learning_rate": 4.004836651782545e-06, "loss": 1.8234143257141113, "step": 2308 }, { "epoch": 1.8238546603475514, "grad_norm": 13.904991074728608, "learning_rate": 4.000333359558891e-06, "loss": 1.4908664226531982, "step": 2309 }, { "epoch": 1.824644549763033, "grad_norm": 10.205474451588882, "learning_rate": 3.995830911856907e-06, "loss": 1.1028133630752563, "step": 2310 }, { "epoch": 1.825434439178515, "grad_norm": 10.206650398885225, "learning_rate": 3.991329312480275e-06, "loss": 0.9024048447608948, "step": 2311 }, { "epoch": 1.8262243285939967, "grad_norm": 20.427132609768222, "learning_rate": 3.986828565231963e-06, "loss": 1.241244912147522, "step": 2312 }, { "epoch": 1.8270142180094786, "grad_norm": 5.947375976678703, "learning_rate": 3.982328673914215e-06, "loss": 1.3205927610397339, "step": 2313 }, { "epoch": 1.8278041074249605, "grad_norm": 11.631133599224034, "learning_rate": 3.977829642328553e-06, "loss": 1.45220947265625, "step": 2314 }, { "epoch": 1.8285939968404423, "grad_norm": 30.658181570348717, "learning_rate": 3.9733314742757755e-06, "loss": 2.151265859603882, "step": 2315 }, { "epoch": 1.8293838862559242, "grad_norm": 14.081842026874503, "learning_rate": 3.96883417355595e-06, "loss": 1.9974944591522217, "step": 2316 }, { "epoch": 1.830173775671406, "grad_norm": 14.732530661028573, "learning_rate": 3.964337743968411e-06, "loss": 1.3346126079559326, "step": 2317 }, { "epoch": 1.830963665086888, "grad_norm": 11.657064075884962, "learning_rate": 3.959842189311756e-06, "loss": 1.1636943817138672, "step": 2318 }, { "epoch": 1.8317535545023698, "grad_norm": 21.842024915662858, "learning_rate": 3.955347513383842e-06, "loss": 2.715574264526367, "step": 2319 }, { "epoch": 1.8325434439178516, "grad_norm": 8.945848850752133, "learning_rate": 3.950853719981792e-06, "loss": 1.8645424842834473, "step": 2320 }, { "epoch": 1.8333333333333335, "grad_norm": 7.363041428428467, "learning_rate": 3.946360812901973e-06, "loss": 2.0045013427734375, "step": 2321 }, { "epoch": 1.8341232227488151, "grad_norm": 14.121661847109614, "learning_rate": 3.941868795940008e-06, "loss": 1.6329420804977417, "step": 2322 }, { "epoch": 1.834913112164297, "grad_norm": 12.076873368108538, "learning_rate": 3.937377672890771e-06, "loss": 1.9567821025848389, "step": 2323 }, { "epoch": 1.8357030015797788, "grad_norm": 9.278188737515212, "learning_rate": 3.932887447548373e-06, "loss": 1.7651054859161377, "step": 2324 }, { "epoch": 1.8364928909952607, "grad_norm": 7.783361269922115, "learning_rate": 3.928398123706174e-06, "loss": 1.2713581323623657, "step": 2325 }, { "epoch": 1.8372827804107423, "grad_norm": 10.329079837290895, "learning_rate": 3.923909705156768e-06, "loss": 1.2148998975753784, "step": 2326 }, { "epoch": 1.8380726698262242, "grad_norm": 8.376184643701944, "learning_rate": 3.919422195691987e-06, "loss": 1.1715630292892456, "step": 2327 }, { "epoch": 1.838862559241706, "grad_norm": 9.805258629191533, "learning_rate": 3.9149355991028955e-06, "loss": 1.2779135704040527, "step": 2328 }, { "epoch": 1.839652448657188, "grad_norm": 14.601491970830052, "learning_rate": 3.910449919179782e-06, "loss": 1.2939941883087158, "step": 2329 }, { "epoch": 1.8404423380726698, "grad_norm": 9.430037846938918, "learning_rate": 3.905965159712164e-06, "loss": 1.3495118618011475, "step": 2330 }, { "epoch": 1.8412322274881516, "grad_norm": 10.818117159129248, "learning_rate": 3.901481324488781e-06, "loss": 1.3297629356384277, "step": 2331 }, { "epoch": 1.8420221169036335, "grad_norm": 11.805518524216797, "learning_rate": 3.896998417297593e-06, "loss": 1.4001942873001099, "step": 2332 }, { "epoch": 1.8428120063191153, "grad_norm": 17.217093882822702, "learning_rate": 3.892516441925776e-06, "loss": 1.0463072061538696, "step": 2333 }, { "epoch": 1.8436018957345972, "grad_norm": 10.365237443982673, "learning_rate": 3.888035402159713e-06, "loss": 0.9372630715370178, "step": 2334 }, { "epoch": 1.844391785150079, "grad_norm": 14.0678966475704, "learning_rate": 3.883555301785005e-06, "loss": 1.43800950050354, "step": 2335 }, { "epoch": 1.845181674565561, "grad_norm": 11.354835493412002, "learning_rate": 3.879076144586455e-06, "loss": 1.584133505821228, "step": 2336 }, { "epoch": 1.8459715639810428, "grad_norm": 9.595921862598702, "learning_rate": 3.874597934348069e-06, "loss": 1.214270830154419, "step": 2337 }, { "epoch": 1.8467614533965246, "grad_norm": 16.11156740518648, "learning_rate": 3.870120674853053e-06, "loss": 1.300584316253662, "step": 2338 }, { "epoch": 1.8475513428120063, "grad_norm": 10.555883298615067, "learning_rate": 3.865644369883814e-06, "loss": 1.2705886363983154, "step": 2339 }, { "epoch": 1.8483412322274881, "grad_norm": 13.361585040619424, "learning_rate": 3.861169023221944e-06, "loss": 1.4234365224838257, "step": 2340 }, { "epoch": 1.84913112164297, "grad_norm": 8.97574341942341, "learning_rate": 3.856694638648235e-06, "loss": 2.0097668170928955, "step": 2341 }, { "epoch": 1.8499210110584519, "grad_norm": 19.363394658374865, "learning_rate": 3.85222121994266e-06, "loss": 2.359208106994629, "step": 2342 }, { "epoch": 1.8507109004739335, "grad_norm": 14.425059595509914, "learning_rate": 3.847748770884376e-06, "loss": 1.24526047706604, "step": 2343 }, { "epoch": 1.8515007898894154, "grad_norm": 16.95844235432838, "learning_rate": 3.8432772952517285e-06, "loss": 1.2295589447021484, "step": 2344 }, { "epoch": 1.8522906793048972, "grad_norm": 17.76856930002662, "learning_rate": 3.8388067968222285e-06, "loss": 1.4502266645431519, "step": 2345 }, { "epoch": 1.853080568720379, "grad_norm": 9.344678461709814, "learning_rate": 3.834337279372571e-06, "loss": 1.0235224962234497, "step": 2346 }, { "epoch": 1.853870458135861, "grad_norm": 7.771543247668636, "learning_rate": 3.829868746678617e-06, "loss": 0.9621250629425049, "step": 2347 }, { "epoch": 1.8546603475513428, "grad_norm": 8.997193063535544, "learning_rate": 3.825401202515401e-06, "loss": 1.0980961322784424, "step": 2348 }, { "epoch": 1.8554502369668247, "grad_norm": 11.396355571537933, "learning_rate": 3.820934650657116e-06, "loss": 1.376495599746704, "step": 2349 }, { "epoch": 1.8562401263823065, "grad_norm": 12.015491646099614, "learning_rate": 3.816469094877122e-06, "loss": 1.1994984149932861, "step": 2350 }, { "epoch": 1.8570300157977884, "grad_norm": 13.018195454364, "learning_rate": 3.812004538947933e-06, "loss": 1.5184617042541504, "step": 2351 }, { "epoch": 1.8578199052132702, "grad_norm": 16.613834476479624, "learning_rate": 3.807540986641221e-06, "loss": 1.3802169561386108, "step": 2352 }, { "epoch": 1.858609794628752, "grad_norm": 8.55272399415255, "learning_rate": 3.803078441727811e-06, "loss": 2.0620877742767334, "step": 2353 }, { "epoch": 1.859399684044234, "grad_norm": 16.517593501003834, "learning_rate": 3.7986169079776738e-06, "loss": 1.021907925605774, "step": 2354 }, { "epoch": 1.8601895734597158, "grad_norm": 12.612088978458775, "learning_rate": 3.794156389159932e-06, "loss": 1.9003463983535767, "step": 2355 }, { "epoch": 1.8609794628751974, "grad_norm": 9.508374505836391, "learning_rate": 3.78969688904284e-06, "loss": 1.6055456399917603, "step": 2356 }, { "epoch": 1.8617693522906793, "grad_norm": 13.25180074627195, "learning_rate": 3.7852384113938033e-06, "loss": 1.5654323101043701, "step": 2357 }, { "epoch": 1.8625592417061612, "grad_norm": 7.960467731299903, "learning_rate": 3.7807809599793564e-06, "loss": 1.5566787719726562, "step": 2358 }, { "epoch": 1.863349131121643, "grad_norm": 10.271738225651552, "learning_rate": 3.7763245385651688e-06, "loss": 0.9510334134101868, "step": 2359 }, { "epoch": 1.8641390205371247, "grad_norm": 10.69224989126236, "learning_rate": 3.771869150916041e-06, "loss": 1.842404842376709, "step": 2360 }, { "epoch": 1.8649289099526065, "grad_norm": 10.785184946501737, "learning_rate": 3.7674148007959e-06, "loss": 1.2416088581085205, "step": 2361 }, { "epoch": 1.8657187993680884, "grad_norm": 8.752263843413806, "learning_rate": 3.762961491967793e-06, "loss": 2.3513264656066895, "step": 2362 }, { "epoch": 1.8665086887835702, "grad_norm": 11.250525502071884, "learning_rate": 3.7585092281938906e-06, "loss": 1.8920762538909912, "step": 2363 }, { "epoch": 1.867298578199052, "grad_norm": 8.990065479551909, "learning_rate": 3.754058013235481e-06, "loss": 1.415196180343628, "step": 2364 }, { "epoch": 1.868088467614534, "grad_norm": 8.516130774368182, "learning_rate": 3.7496078508529655e-06, "loss": 1.542191743850708, "step": 2365 }, { "epoch": 1.8688783570300158, "grad_norm": 9.072473337050232, "learning_rate": 3.7451587448058553e-06, "loss": 1.6856296062469482, "step": 2366 }, { "epoch": 1.8696682464454977, "grad_norm": 12.708061491523079, "learning_rate": 3.7407106988527687e-06, "loss": 1.783271074295044, "step": 2367 }, { "epoch": 1.8704581358609795, "grad_norm": 10.769206669854055, "learning_rate": 3.7362637167514294e-06, "loss": 1.5678787231445312, "step": 2368 }, { "epoch": 1.8712480252764614, "grad_norm": 20.106127018365843, "learning_rate": 3.731817802258665e-06, "loss": 1.4182826280593872, "step": 2369 }, { "epoch": 1.8720379146919433, "grad_norm": 14.613741216644682, "learning_rate": 3.7273729591303977e-06, "loss": 0.9464998245239258, "step": 2370 }, { "epoch": 1.872827804107425, "grad_norm": 14.57353180320642, "learning_rate": 3.7229291911216438e-06, "loss": 1.2734510898590088, "step": 2371 }, { "epoch": 1.8736176935229067, "grad_norm": 18.57604525014746, "learning_rate": 3.718486501986517e-06, "loss": 1.3280824422836304, "step": 2372 }, { "epoch": 1.8744075829383886, "grad_norm": 10.655701944239475, "learning_rate": 3.7140448954782118e-06, "loss": 2.1533942222595215, "step": 2373 }, { "epoch": 1.8751974723538705, "grad_norm": 10.436689103007193, "learning_rate": 3.7096043753490135e-06, "loss": 3.3665125370025635, "step": 2374 }, { "epoch": 1.8759873617693523, "grad_norm": 11.030169703908209, "learning_rate": 3.7051649453502874e-06, "loss": 2.9506380558013916, "step": 2375 }, { "epoch": 1.876777251184834, "grad_norm": 14.013016012791953, "learning_rate": 3.7007266092324807e-06, "loss": 1.3356175422668457, "step": 2376 }, { "epoch": 1.8775671406003158, "grad_norm": 17.981966299258165, "learning_rate": 3.6962893707451136e-06, "loss": 0.6594985723495483, "step": 2377 }, { "epoch": 1.8783570300157977, "grad_norm": 13.564577074220647, "learning_rate": 3.6918532336367784e-06, "loss": 2.0847041606903076, "step": 2378 }, { "epoch": 1.8791469194312795, "grad_norm": 15.468644474324055, "learning_rate": 3.6874182016551384e-06, "loss": 1.3281214237213135, "step": 2379 }, { "epoch": 1.8799368088467614, "grad_norm": 18.198119013305984, "learning_rate": 3.6829842785469237e-06, "loss": 2.035587787628174, "step": 2380 }, { "epoch": 1.8807266982622433, "grad_norm": 7.022992331825169, "learning_rate": 3.678551468057928e-06, "loss": 1.31044602394104, "step": 2381 }, { "epoch": 1.8815165876777251, "grad_norm": 11.785392947710033, "learning_rate": 3.6741197739330036e-06, "loss": 1.1300781965255737, "step": 2382 }, { "epoch": 1.882306477093207, "grad_norm": 13.706145484472717, "learning_rate": 3.6696891999160624e-06, "loss": 2.2334394454956055, "step": 2383 }, { "epoch": 1.8830963665086888, "grad_norm": 9.781655657096698, "learning_rate": 3.6652597497500632e-06, "loss": 1.2571077346801758, "step": 2384 }, { "epoch": 1.8838862559241707, "grad_norm": 16.34961943071829, "learning_rate": 3.660831427177023e-06, "loss": 1.1918299198150635, "step": 2385 }, { "epoch": 1.8846761453396526, "grad_norm": 9.661383436716706, "learning_rate": 3.6564042359380035e-06, "loss": 1.571300983428955, "step": 2386 }, { "epoch": 1.8854660347551344, "grad_norm": 9.08149537153619, "learning_rate": 3.651978179773109e-06, "loss": 1.381611943244934, "step": 2387 }, { "epoch": 1.8862559241706163, "grad_norm": 8.947483706023426, "learning_rate": 3.647553262421489e-06, "loss": 1.569331169128418, "step": 2388 }, { "epoch": 1.887045813586098, "grad_norm": 15.643579814373975, "learning_rate": 3.6431294876213256e-06, "loss": 1.7174663543701172, "step": 2389 }, { "epoch": 1.8878357030015798, "grad_norm": 11.74224852213168, "learning_rate": 3.6387068591098394e-06, "loss": 1.6020495891571045, "step": 2390 }, { "epoch": 1.8886255924170616, "grad_norm": 17.71812535453601, "learning_rate": 3.6342853806232803e-06, "loss": 2.5341310501098633, "step": 2391 }, { "epoch": 1.8894154818325435, "grad_norm": 19.738562508307506, "learning_rate": 3.6298650558969297e-06, "loss": 1.986846685409546, "step": 2392 }, { "epoch": 1.8902053712480251, "grad_norm": 11.658929421377039, "learning_rate": 3.6254458886650916e-06, "loss": 1.847143530845642, "step": 2393 }, { "epoch": 1.890995260663507, "grad_norm": 12.222377083494536, "learning_rate": 3.6210278826610947e-06, "loss": 1.7250714302062988, "step": 2394 }, { "epoch": 1.8917851500789888, "grad_norm": 23.353812919786044, "learning_rate": 3.6166110416172824e-06, "loss": 1.1605632305145264, "step": 2395 }, { "epoch": 1.8925750394944707, "grad_norm": 13.967775786929394, "learning_rate": 3.612195369265016e-06, "loss": 1.477918028831482, "step": 2396 }, { "epoch": 1.8933649289099526, "grad_norm": 7.824641265572897, "learning_rate": 3.607780869334673e-06, "loss": 0.8736203908920288, "step": 2397 }, { "epoch": 1.8941548183254344, "grad_norm": 15.896684803906586, "learning_rate": 3.6033675455556362e-06, "loss": 1.7004368305206299, "step": 2398 }, { "epoch": 1.8949447077409163, "grad_norm": 12.602951334291856, "learning_rate": 3.5989554016562973e-06, "loss": 1.6466461420059204, "step": 2399 }, { "epoch": 1.8957345971563981, "grad_norm": 23.183480711381492, "learning_rate": 3.594544441364046e-06, "loss": 2.251540184020996, "step": 2400 }, { "epoch": 1.89652448657188, "grad_norm": 10.791019706405871, "learning_rate": 3.590134668405281e-06, "loss": 2.160658121109009, "step": 2401 }, { "epoch": 1.8973143759873619, "grad_norm": 20.769430115923996, "learning_rate": 3.5857260865053888e-06, "loss": 2.002129077911377, "step": 2402 }, { "epoch": 1.8981042654028437, "grad_norm": 11.654913849622542, "learning_rate": 3.5813186993887564e-06, "loss": 1.1885507106781006, "step": 2403 }, { "epoch": 1.8988941548183256, "grad_norm": 11.018801992801427, "learning_rate": 3.5769125107787615e-06, "loss": 1.4551243782043457, "step": 2404 }, { "epoch": 1.8996840442338072, "grad_norm": 13.81180119203942, "learning_rate": 3.57250752439776e-06, "loss": 1.8174326419830322, "step": 2405 }, { "epoch": 1.900473933649289, "grad_norm": 8.538596914130633, "learning_rate": 3.568103743967104e-06, "loss": 2.0413808822631836, "step": 2406 }, { "epoch": 1.901263823064771, "grad_norm": 13.208971367362476, "learning_rate": 3.56370117320712e-06, "loss": 1.130143642425537, "step": 2407 }, { "epoch": 1.9020537124802528, "grad_norm": 20.133975799192914, "learning_rate": 3.559299815837114e-06, "loss": 2.2564547061920166, "step": 2408 }, { "epoch": 1.9028436018957346, "grad_norm": 8.040080834857976, "learning_rate": 3.5548996755753686e-06, "loss": 1.9948700666427612, "step": 2409 }, { "epoch": 1.9036334913112163, "grad_norm": 10.107125326201913, "learning_rate": 3.550500756139137e-06, "loss": 1.2073781490325928, "step": 2410 }, { "epoch": 1.9044233807266981, "grad_norm": 12.074035310627842, "learning_rate": 3.546103061244639e-06, "loss": 1.4633095264434814, "step": 2411 }, { "epoch": 1.90521327014218, "grad_norm": 8.748368688532468, "learning_rate": 3.5417065946070616e-06, "loss": 1.4983105659484863, "step": 2412 }, { "epoch": 1.9060031595576619, "grad_norm": 8.119449198425269, "learning_rate": 3.5373113599405558e-06, "loss": 1.4577915668487549, "step": 2413 }, { "epoch": 1.9067930489731437, "grad_norm": 8.602605817692487, "learning_rate": 3.53291736095823e-06, "loss": 1.7091364860534668, "step": 2414 }, { "epoch": 1.9075829383886256, "grad_norm": 9.57318217061906, "learning_rate": 3.52852460137215e-06, "loss": 0.9629848003387451, "step": 2415 }, { "epoch": 1.9083728278041074, "grad_norm": 12.401875854076282, "learning_rate": 3.5241330848933297e-06, "loss": 1.1933588981628418, "step": 2416 }, { "epoch": 1.9091627172195893, "grad_norm": 19.330318737313394, "learning_rate": 3.5197428152317405e-06, "loss": 1.4718396663665771, "step": 2417 }, { "epoch": 1.9099526066350712, "grad_norm": 10.724633948475557, "learning_rate": 3.5153537960962953e-06, "loss": 1.8858482837677002, "step": 2418 }, { "epoch": 1.910742496050553, "grad_norm": 9.259587522429628, "learning_rate": 3.510966031194851e-06, "loss": 1.550106167793274, "step": 2419 }, { "epoch": 1.9115323854660349, "grad_norm": 10.146358708655796, "learning_rate": 3.5065795242342083e-06, "loss": 1.5809369087219238, "step": 2420 }, { "epoch": 1.9123222748815167, "grad_norm": 13.004346105846725, "learning_rate": 3.5021942789201026e-06, "loss": 1.3224411010742188, "step": 2421 }, { "epoch": 1.9131121642969984, "grad_norm": 8.080997608907936, "learning_rate": 3.4978102989572007e-06, "loss": 1.6125473976135254, "step": 2422 }, { "epoch": 1.9139020537124802, "grad_norm": 12.741289791550335, "learning_rate": 3.4934275880491055e-06, "loss": 1.4377524852752686, "step": 2423 }, { "epoch": 1.914691943127962, "grad_norm": 7.712596175637102, "learning_rate": 3.4890461498983436e-06, "loss": 1.7991526126861572, "step": 2424 }, { "epoch": 1.915481832543444, "grad_norm": 9.444274006754885, "learning_rate": 3.4846659882063704e-06, "loss": 1.237257957458496, "step": 2425 }, { "epoch": 1.9162717219589256, "grad_norm": 12.543456884340403, "learning_rate": 3.4802871066735623e-06, "loss": 1.9164607524871826, "step": 2426 }, { "epoch": 1.9170616113744074, "grad_norm": 13.74775394370082, "learning_rate": 3.4759095089992094e-06, "loss": 1.111218810081482, "step": 2427 }, { "epoch": 1.9178515007898893, "grad_norm": 13.934197139537574, "learning_rate": 3.4715331988815217e-06, "loss": 1.1446185111999512, "step": 2428 }, { "epoch": 1.9186413902053712, "grad_norm": 8.58653764163054, "learning_rate": 3.4671581800176208e-06, "loss": 1.4410523176193237, "step": 2429 }, { "epoch": 1.919431279620853, "grad_norm": 13.847657929892328, "learning_rate": 3.462784456103537e-06, "loss": 1.9751472473144531, "step": 2430 }, { "epoch": 1.9202211690363349, "grad_norm": 11.542004822189035, "learning_rate": 3.4584120308342068e-06, "loss": 0.9431929588317871, "step": 2431 }, { "epoch": 1.9210110584518167, "grad_norm": 12.290366977682353, "learning_rate": 3.454040907903472e-06, "loss": 1.09321129322052, "step": 2432 }, { "epoch": 1.9218009478672986, "grad_norm": 10.957843707967365, "learning_rate": 3.4496710910040654e-06, "loss": 2.014118194580078, "step": 2433 }, { "epoch": 1.9225908372827805, "grad_norm": 30.636247581974626, "learning_rate": 3.4453025838276283e-06, "loss": 1.576662302017212, "step": 2434 }, { "epoch": 1.9233807266982623, "grad_norm": 15.05397997539076, "learning_rate": 3.4409353900646873e-06, "loss": 1.2868478298187256, "step": 2435 }, { "epoch": 1.9241706161137442, "grad_norm": 12.897559358848328, "learning_rate": 3.4365695134046616e-06, "loss": 1.1865384578704834, "step": 2436 }, { "epoch": 1.924960505529226, "grad_norm": 15.253959007398343, "learning_rate": 3.432204957535862e-06, "loss": 1.626413106918335, "step": 2437 }, { "epoch": 1.925750394944708, "grad_norm": 8.00956704186196, "learning_rate": 3.4278417261454754e-06, "loss": 1.630741834640503, "step": 2438 }, { "epoch": 1.9265402843601895, "grad_norm": 17.812247659435748, "learning_rate": 3.4234798229195764e-06, "loss": 1.313347578048706, "step": 2439 }, { "epoch": 1.9273301737756714, "grad_norm": 9.723288589416475, "learning_rate": 3.4191192515431125e-06, "loss": 1.5095974206924438, "step": 2440 }, { "epoch": 1.9281200631911533, "grad_norm": 19.0000198211733, "learning_rate": 3.414760015699913e-06, "loss": 1.7037584781646729, "step": 2441 }, { "epoch": 1.9289099526066351, "grad_norm": 7.4858175557613436, "learning_rate": 3.410402119072671e-06, "loss": 2.0781524181365967, "step": 2442 }, { "epoch": 1.9296998420221168, "grad_norm": 9.98301676651302, "learning_rate": 3.406045565342955e-06, "loss": 1.491240382194519, "step": 2443 }, { "epoch": 1.9304897314375986, "grad_norm": 10.15893764761394, "learning_rate": 3.401690358191192e-06, "loss": 1.3460967540740967, "step": 2444 }, { "epoch": 1.9312796208530805, "grad_norm": 12.890492449150006, "learning_rate": 3.397336501296679e-06, "loss": 0.8602248430252075, "step": 2445 }, { "epoch": 1.9320695102685623, "grad_norm": 12.533401638864804, "learning_rate": 3.392983998337567e-06, "loss": 0.8721238374710083, "step": 2446 }, { "epoch": 1.9328593996840442, "grad_norm": 20.30153738152959, "learning_rate": 3.388632852990864e-06, "loss": 1.3083229064941406, "step": 2447 }, { "epoch": 1.933649289099526, "grad_norm": 9.12776969619587, "learning_rate": 3.3842830689324367e-06, "loss": 1.5735386610031128, "step": 2448 }, { "epoch": 1.934439178515008, "grad_norm": 20.700954625914637, "learning_rate": 3.3799346498369895e-06, "loss": 1.5870678424835205, "step": 2449 }, { "epoch": 1.9352290679304898, "grad_norm": 9.831445546192326, "learning_rate": 3.3755875993780862e-06, "loss": 1.48130464553833, "step": 2450 }, { "epoch": 1.9360189573459716, "grad_norm": 24.574710960838644, "learning_rate": 3.3712419212281284e-06, "loss": 1.7219302654266357, "step": 2451 }, { "epoch": 1.9368088467614535, "grad_norm": 14.89660871560069, "learning_rate": 3.366897619058358e-06, "loss": 0.9834906458854675, "step": 2452 }, { "epoch": 1.9375987361769353, "grad_norm": 10.712068685842123, "learning_rate": 3.362554696538857e-06, "loss": 2.1397347450256348, "step": 2453 }, { "epoch": 1.9383886255924172, "grad_norm": 7.7667540070752485, "learning_rate": 3.358213157338542e-06, "loss": 1.1784471273422241, "step": 2454 }, { "epoch": 1.9391785150078988, "grad_norm": 9.841175283941777, "learning_rate": 3.3538730051251576e-06, "loss": 1.503877878189087, "step": 2455 }, { "epoch": 1.9399684044233807, "grad_norm": 12.542202389896184, "learning_rate": 3.3495342435652777e-06, "loss": 1.7189602851867676, "step": 2456 }, { "epoch": 1.9407582938388626, "grad_norm": 11.391348598231419, "learning_rate": 3.3451968763243046e-06, "loss": 1.686078667640686, "step": 2457 }, { "epoch": 1.9415481832543444, "grad_norm": 11.575710499478722, "learning_rate": 3.34086090706646e-06, "loss": 1.031868577003479, "step": 2458 }, { "epoch": 1.9423380726698263, "grad_norm": 8.78322861407791, "learning_rate": 3.3365263394547852e-06, "loss": 1.1780157089233398, "step": 2459 }, { "epoch": 1.943127962085308, "grad_norm": 12.608494943318862, "learning_rate": 3.3321931771511363e-06, "loss": 1.1709704399108887, "step": 2460 }, { "epoch": 1.9439178515007898, "grad_norm": 12.683639129888297, "learning_rate": 3.3278614238161818e-06, "loss": 0.8888792991638184, "step": 2461 }, { "epoch": 1.9447077409162716, "grad_norm": 11.443846674864774, "learning_rate": 3.323531083109404e-06, "loss": 1.3447215557098389, "step": 2462 }, { "epoch": 1.9454976303317535, "grad_norm": 11.117047622365464, "learning_rate": 3.3192021586890866e-06, "loss": 1.329740047454834, "step": 2463 }, { "epoch": 1.9462875197472354, "grad_norm": 14.02605723542626, "learning_rate": 3.314874654212321e-06, "loss": 1.4177271127700806, "step": 2464 }, { "epoch": 1.9470774091627172, "grad_norm": 10.663308506333905, "learning_rate": 3.3105485733349984e-06, "loss": 1.0513715744018555, "step": 2465 }, { "epoch": 1.947867298578199, "grad_norm": 12.042995971816161, "learning_rate": 3.3062239197118027e-06, "loss": 2.0589680671691895, "step": 2466 }, { "epoch": 1.948657187993681, "grad_norm": 13.784435713074545, "learning_rate": 3.301900696996218e-06, "loss": 2.251110553741455, "step": 2467 }, { "epoch": 1.9494470774091628, "grad_norm": 13.710270558589283, "learning_rate": 3.297578908840515e-06, "loss": 1.2197270393371582, "step": 2468 }, { "epoch": 1.9502369668246446, "grad_norm": 10.246163702089847, "learning_rate": 3.2932585588957565e-06, "loss": 1.1383863687515259, "step": 2469 }, { "epoch": 1.9510268562401265, "grad_norm": 12.115693006024177, "learning_rate": 3.288939650811789e-06, "loss": 1.6648939847946167, "step": 2470 }, { "epoch": 1.9518167456556084, "grad_norm": 10.775832953436032, "learning_rate": 3.2846221882372386e-06, "loss": 1.417509913444519, "step": 2471 }, { "epoch": 1.95260663507109, "grad_norm": 13.57822764736623, "learning_rate": 3.280306174819511e-06, "loss": 1.1139479875564575, "step": 2472 }, { "epoch": 1.9533965244865719, "grad_norm": 14.390531948777902, "learning_rate": 3.27599161420479e-06, "loss": 1.3739941120147705, "step": 2473 }, { "epoch": 1.9541864139020537, "grad_norm": 11.875606554603966, "learning_rate": 3.271678510038031e-06, "loss": 1.250982642173767, "step": 2474 }, { "epoch": 1.9549763033175356, "grad_norm": 10.755099863929798, "learning_rate": 3.2673668659629594e-06, "loss": 1.8845748901367188, "step": 2475 }, { "epoch": 1.9557661927330172, "grad_norm": 12.930480165235073, "learning_rate": 3.2630566856220636e-06, "loss": 1.489890456199646, "step": 2476 }, { "epoch": 1.956556082148499, "grad_norm": 14.065755846735259, "learning_rate": 3.2587479726565985e-06, "loss": 1.648732304573059, "step": 2477 }, { "epoch": 1.957345971563981, "grad_norm": 12.648739189286825, "learning_rate": 3.2544407307065808e-06, "loss": 1.7760826349258423, "step": 2478 }, { "epoch": 1.9581358609794628, "grad_norm": 11.427887541540612, "learning_rate": 3.2501349634107825e-06, "loss": 0.8107354640960693, "step": 2479 }, { "epoch": 1.9589257503949447, "grad_norm": 16.28308318755435, "learning_rate": 3.245830674406728e-06, "loss": 1.8128418922424316, "step": 2480 }, { "epoch": 1.9597156398104265, "grad_norm": 13.404249494910019, "learning_rate": 3.2415278673306984e-06, "loss": 1.3533351421356201, "step": 2481 }, { "epoch": 1.9605055292259084, "grad_norm": 26.577557541363184, "learning_rate": 3.237226545817716e-06, "loss": 1.6566579341888428, "step": 2482 }, { "epoch": 1.9612954186413902, "grad_norm": 10.919680561043819, "learning_rate": 3.2329267135015526e-06, "loss": 1.7733817100524902, "step": 2483 }, { "epoch": 1.962085308056872, "grad_norm": 22.667838737436107, "learning_rate": 3.2286283740147194e-06, "loss": 1.8271636962890625, "step": 2484 }, { "epoch": 1.962875197472354, "grad_norm": 10.40163155926731, "learning_rate": 3.2243315309884697e-06, "loss": 1.3933049440383911, "step": 2485 }, { "epoch": 1.9636650868878358, "grad_norm": 9.862448912563854, "learning_rate": 3.2200361880527914e-06, "loss": 2.3680734634399414, "step": 2486 }, { "epoch": 1.9644549763033177, "grad_norm": 11.811513863223011, "learning_rate": 3.2157423488364013e-06, "loss": 1.6310195922851562, "step": 2487 }, { "epoch": 1.9652448657187995, "grad_norm": 8.26150488888275, "learning_rate": 3.2114500169667513e-06, "loss": 1.2277730703353882, "step": 2488 }, { "epoch": 1.9660347551342812, "grad_norm": 17.701718441376492, "learning_rate": 3.207159196070015e-06, "loss": 1.0331852436065674, "step": 2489 }, { "epoch": 1.966824644549763, "grad_norm": 12.041291133088178, "learning_rate": 3.2028698897710945e-06, "loss": 1.8590238094329834, "step": 2490 }, { "epoch": 1.9676145339652449, "grad_norm": 10.507710338817834, "learning_rate": 3.198582101693608e-06, "loss": 1.9933216571807861, "step": 2491 }, { "epoch": 1.9684044233807267, "grad_norm": 8.15881705828358, "learning_rate": 3.194295835459898e-06, "loss": 1.6253411769866943, "step": 2492 }, { "epoch": 1.9691943127962084, "grad_norm": 10.38474355259108, "learning_rate": 3.1900110946910084e-06, "loss": 1.3114337921142578, "step": 2493 }, { "epoch": 1.9699842022116902, "grad_norm": 9.949738129316227, "learning_rate": 3.1857278830067075e-06, "loss": 1.256972074508667, "step": 2494 }, { "epoch": 1.970774091627172, "grad_norm": 16.162021019463328, "learning_rate": 3.1814462040254657e-06, "loss": 1.8827450275421143, "step": 2495 }, { "epoch": 1.971563981042654, "grad_norm": 22.149860191874843, "learning_rate": 3.1771660613644582e-06, "loss": 0.9394640922546387, "step": 2496 }, { "epoch": 1.9723538704581358, "grad_norm": 8.539480886296017, "learning_rate": 3.1728874586395677e-06, "loss": 1.0433759689331055, "step": 2497 }, { "epoch": 1.9731437598736177, "grad_norm": 11.67531560801483, "learning_rate": 3.168610399465365e-06, "loss": 0.7940645813941956, "step": 2498 }, { "epoch": 1.9739336492890995, "grad_norm": 12.4425838328368, "learning_rate": 3.1643348874551294e-06, "loss": 1.4504199028015137, "step": 2499 }, { "epoch": 1.9747235387045814, "grad_norm": 10.347989319044142, "learning_rate": 3.1600609262208244e-06, "loss": 1.3296544551849365, "step": 2500 }, { "epoch": 1.9755134281200633, "grad_norm": 8.989622478769817, "learning_rate": 3.1557885193731086e-06, "loss": 1.6991509199142456, "step": 2501 }, { "epoch": 1.9763033175355451, "grad_norm": 9.631551183343522, "learning_rate": 3.151517670521325e-06, "loss": 1.6035475730895996, "step": 2502 }, { "epoch": 1.977093206951027, "grad_norm": 13.638367050748972, "learning_rate": 3.1472483832735014e-06, "loss": 1.8189468383789062, "step": 2503 }, { "epoch": 1.9778830963665088, "grad_norm": 13.50296256744864, "learning_rate": 3.1429806612363432e-06, "loss": 1.6658248901367188, "step": 2504 }, { "epoch": 1.9786729857819905, "grad_norm": 7.393153154801466, "learning_rate": 3.138714508015237e-06, "loss": 1.200107455253601, "step": 2505 }, { "epoch": 1.9794628751974723, "grad_norm": 11.920956663917268, "learning_rate": 3.1344499272142447e-06, "loss": 0.791041910648346, "step": 2506 }, { "epoch": 1.9802527646129542, "grad_norm": 17.66290191613317, "learning_rate": 3.130186922436097e-06, "loss": 1.5144439935684204, "step": 2507 }, { "epoch": 1.981042654028436, "grad_norm": 9.017263658547026, "learning_rate": 3.125925497282195e-06, "loss": 1.9003779888153076, "step": 2508 }, { "epoch": 1.981832543443918, "grad_norm": 16.75636258557487, "learning_rate": 3.1216656553526026e-06, "loss": 2.5085067749023438, "step": 2509 }, { "epoch": 1.9826224328593995, "grad_norm": 14.890632169656481, "learning_rate": 3.117407400246051e-06, "loss": 1.7289268970489502, "step": 2510 }, { "epoch": 1.9834123222748814, "grad_norm": 9.15166116321032, "learning_rate": 3.1131507355599267e-06, "loss": 1.61943781375885, "step": 2511 }, { "epoch": 1.9842022116903633, "grad_norm": 6.859620699544047, "learning_rate": 3.1088956648902735e-06, "loss": 2.0489935874938965, "step": 2512 }, { "epoch": 1.9849921011058451, "grad_norm": 14.799263181017864, "learning_rate": 3.1046421918317916e-06, "loss": 1.6825857162475586, "step": 2513 }, { "epoch": 1.985781990521327, "grad_norm": 8.947142781131408, "learning_rate": 3.1003903199778273e-06, "loss": 1.6780674457550049, "step": 2514 }, { "epoch": 1.9865718799368088, "grad_norm": 14.278954614357216, "learning_rate": 3.096140052920376e-06, "loss": 1.194378137588501, "step": 2515 }, { "epoch": 1.9873617693522907, "grad_norm": 12.21887451937195, "learning_rate": 3.091891394250077e-06, "loss": 2.4973843097686768, "step": 2516 }, { "epoch": 1.9881516587677726, "grad_norm": 13.216871045904774, "learning_rate": 3.087644347556211e-06, "loss": 1.668590784072876, "step": 2517 }, { "epoch": 1.9889415481832544, "grad_norm": 14.625469207562114, "learning_rate": 3.0833989164266974e-06, "loss": 1.5314085483551025, "step": 2518 }, { "epoch": 1.9897314375987363, "grad_norm": 11.732142027757016, "learning_rate": 3.0791551044480926e-06, "loss": 1.6780108213424683, "step": 2519 }, { "epoch": 1.9905213270142181, "grad_norm": 9.710757091096028, "learning_rate": 3.0749129152055795e-06, "loss": 1.6987766027450562, "step": 2520 }, { "epoch": 1.9913112164297, "grad_norm": 18.032480852526703, "learning_rate": 3.070672352282974e-06, "loss": 1.1783912181854248, "step": 2521 }, { "epoch": 1.9921011058451816, "grad_norm": 11.162322784263365, "learning_rate": 3.0664334192627197e-06, "loss": 1.6210596561431885, "step": 2522 }, { "epoch": 1.9928909952606635, "grad_norm": 9.526530451148624, "learning_rate": 3.06219611972588e-06, "loss": 0.9034061431884766, "step": 2523 }, { "epoch": 1.9936808846761453, "grad_norm": 12.853469107604116, "learning_rate": 3.0579604572521382e-06, "loss": 1.8308205604553223, "step": 2524 }, { "epoch": 1.9944707740916272, "grad_norm": 12.043619204725749, "learning_rate": 3.0537264354198005e-06, "loss": 1.336472749710083, "step": 2525 }, { "epoch": 1.9952606635071088, "grad_norm": 24.68078723401722, "learning_rate": 3.049494057805776e-06, "loss": 1.3730854988098145, "step": 2526 }, { "epoch": 1.9960505529225907, "grad_norm": 8.875683203972285, "learning_rate": 3.045263327985595e-06, "loss": 1.297802209854126, "step": 2527 }, { "epoch": 1.9968404423380726, "grad_norm": 10.388495319632469, "learning_rate": 3.0410342495333913e-06, "loss": 1.585411548614502, "step": 2528 }, { "epoch": 1.9976303317535544, "grad_norm": 9.15038105991676, "learning_rate": 3.0368068260219054e-06, "loss": 1.1837197542190552, "step": 2529 }, { "epoch": 1.9984202211690363, "grad_norm": 10.39999784581774, "learning_rate": 3.0325810610224783e-06, "loss": 1.3715815544128418, "step": 2530 }, { "epoch": 1.9992101105845181, "grad_norm": 20.945648961140414, "learning_rate": 3.0283569581050486e-06, "loss": 1.6476037502288818, "step": 2531 }, { "epoch": 2.0, "grad_norm": 12.576094322325524, "learning_rate": 3.0241345208381533e-06, "loss": 1.4700212478637695, "step": 2532 }, { "epoch": 2.000789889415482, "grad_norm": 10.359676749996417, "learning_rate": 3.01991375278892e-06, "loss": 0.46016550064086914, "step": 2533 }, { "epoch": 2.0015797788309637, "grad_norm": 10.397265684651309, "learning_rate": 3.01569465752307e-06, "loss": 0.43968772888183594, "step": 2534 }, { "epoch": 2.0023696682464456, "grad_norm": 10.588510736324265, "learning_rate": 3.0114772386049087e-06, "loss": 0.40929579734802246, "step": 2535 }, { "epoch": 2.0031595576619274, "grad_norm": 12.058979157630162, "learning_rate": 3.0072614995973236e-06, "loss": 0.6055519580841064, "step": 2536 }, { "epoch": 2.0039494470774093, "grad_norm": 9.905968562829903, "learning_rate": 3.003047444061784e-06, "loss": 0.33923691511154175, "step": 2537 }, { "epoch": 2.004739336492891, "grad_norm": 11.848016978276616, "learning_rate": 2.998835075558342e-06, "loss": 0.5501883029937744, "step": 2538 }, { "epoch": 2.005529225908373, "grad_norm": 7.660245787743295, "learning_rate": 2.994624397645616e-06, "loss": 0.3189247250556946, "step": 2539 }, { "epoch": 2.006319115323855, "grad_norm": 7.718904002575967, "learning_rate": 2.990415413880803e-06, "loss": 0.8301833868026733, "step": 2540 }, { "epoch": 2.0071090047393363, "grad_norm": 7.798220693220483, "learning_rate": 2.9862081278196685e-06, "loss": 0.34704911708831787, "step": 2541 }, { "epoch": 2.007898894154818, "grad_norm": 8.488060658104867, "learning_rate": 2.9820025430165358e-06, "loss": 0.8342065215110779, "step": 2542 }, { "epoch": 2.0086887835703, "grad_norm": 17.260191665775476, "learning_rate": 2.977798663024302e-06, "loss": 0.341113805770874, "step": 2543 }, { "epoch": 2.009478672985782, "grad_norm": 9.539527306419366, "learning_rate": 2.9735964913944153e-06, "loss": 0.7216507792472839, "step": 2544 }, { "epoch": 2.0102685624012637, "grad_norm": 6.040467200683253, "learning_rate": 2.9693960316768856e-06, "loss": 0.5428364276885986, "step": 2545 }, { "epoch": 2.0110584518167456, "grad_norm": 10.079491641898473, "learning_rate": 2.965197287420276e-06, "loss": 0.49245187640190125, "step": 2546 }, { "epoch": 2.0118483412322274, "grad_norm": 8.736640891813208, "learning_rate": 2.9610002621716987e-06, "loss": 0.37076878547668457, "step": 2547 }, { "epoch": 2.0126382306477093, "grad_norm": 14.117126577405678, "learning_rate": 2.956804959476814e-06, "loss": 0.4397502541542053, "step": 2548 }, { "epoch": 2.013428120063191, "grad_norm": 7.821072536076488, "learning_rate": 2.9526113828798266e-06, "loss": 0.3473249673843384, "step": 2549 }, { "epoch": 2.014218009478673, "grad_norm": 12.898146258975322, "learning_rate": 2.9484195359234855e-06, "loss": 0.507472813129425, "step": 2550 }, { "epoch": 2.015007898894155, "grad_norm": 16.615044423238285, "learning_rate": 2.9442294221490773e-06, "loss": 0.32390978932380676, "step": 2551 }, { "epoch": 2.0157977883096367, "grad_norm": 10.491110312547283, "learning_rate": 2.940041045096423e-06, "loss": 0.5616019368171692, "step": 2552 }, { "epoch": 2.0165876777251186, "grad_norm": 17.4266369509056, "learning_rate": 2.9358544083038772e-06, "loss": 0.5512704849243164, "step": 2553 }, { "epoch": 2.0173775671406005, "grad_norm": 9.189915300922015, "learning_rate": 2.931669515308323e-06, "loss": 0.5548714399337769, "step": 2554 }, { "epoch": 2.0181674565560823, "grad_norm": 12.471542857964309, "learning_rate": 2.927486369645174e-06, "loss": 0.5579742789268494, "step": 2555 }, { "epoch": 2.018957345971564, "grad_norm": 10.57651269059222, "learning_rate": 2.9233049748483637e-06, "loss": 0.4655217230319977, "step": 2556 }, { "epoch": 2.0197472353870456, "grad_norm": 16.404149783138585, "learning_rate": 2.9191253344503512e-06, "loss": 0.5133844017982483, "step": 2557 }, { "epoch": 2.0205371248025275, "grad_norm": 8.739917453875979, "learning_rate": 2.9149474519821073e-06, "loss": 0.425764799118042, "step": 2558 }, { "epoch": 2.0213270142180093, "grad_norm": 12.340461413890134, "learning_rate": 2.910771330973123e-06, "loss": 0.31363582611083984, "step": 2559 }, { "epoch": 2.022116903633491, "grad_norm": 17.15887001035681, "learning_rate": 2.9065969749513972e-06, "loss": 0.3946301341056824, "step": 2560 }, { "epoch": 2.022906793048973, "grad_norm": 12.235209650094014, "learning_rate": 2.9024243874434412e-06, "loss": 0.3221214711666107, "step": 2561 }, { "epoch": 2.023696682464455, "grad_norm": 12.122441612475534, "learning_rate": 2.898253571974267e-06, "loss": 0.7128668427467346, "step": 2562 }, { "epoch": 2.0244865718799367, "grad_norm": 11.253478945540717, "learning_rate": 2.8940845320674003e-06, "loss": 0.7156331539154053, "step": 2563 }, { "epoch": 2.0252764612954186, "grad_norm": 11.411246914531567, "learning_rate": 2.889917271244854e-06, "loss": 0.38757872581481934, "step": 2564 }, { "epoch": 2.0260663507109005, "grad_norm": 13.55858423904863, "learning_rate": 2.885751793027146e-06, "loss": 1.2920098304748535, "step": 2565 }, { "epoch": 2.0268562401263823, "grad_norm": 7.218573790221784, "learning_rate": 2.8815881009332847e-06, "loss": 0.305652379989624, "step": 2566 }, { "epoch": 2.027646129541864, "grad_norm": 21.733202425060888, "learning_rate": 2.8774261984807705e-06, "loss": 0.5940957069396973, "step": 2567 }, { "epoch": 2.028436018957346, "grad_norm": 10.902544131148884, "learning_rate": 2.873266089185597e-06, "loss": 0.4019826352596283, "step": 2568 }, { "epoch": 2.029225908372828, "grad_norm": 12.95751723968972, "learning_rate": 2.869107776562232e-06, "loss": 0.9044560790061951, "step": 2569 }, { "epoch": 2.0300157977883098, "grad_norm": 12.066912492836503, "learning_rate": 2.864951264123635e-06, "loss": 0.9884500503540039, "step": 2570 }, { "epoch": 2.0308056872037916, "grad_norm": 9.279060734945645, "learning_rate": 2.860796555381238e-06, "loss": 0.21672052145004272, "step": 2571 }, { "epoch": 2.0315955766192735, "grad_norm": 12.515738686148927, "learning_rate": 2.8566436538449583e-06, "loss": 0.5464432239532471, "step": 2572 }, { "epoch": 2.0323854660347553, "grad_norm": 5.072237401183682, "learning_rate": 2.8524925630231774e-06, "loss": 0.12266440689563751, "step": 2573 }, { "epoch": 2.0331753554502368, "grad_norm": 13.049508774102216, "learning_rate": 2.8483432864227533e-06, "loss": 0.5199903249740601, "step": 2574 }, { "epoch": 2.0339652448657186, "grad_norm": 26.009573976811115, "learning_rate": 2.8441958275490044e-06, "loss": 1.3708068132400513, "step": 2575 }, { "epoch": 2.0347551342812005, "grad_norm": 8.306114337004733, "learning_rate": 2.8400501899057164e-06, "loss": 0.41701173782348633, "step": 2576 }, { "epoch": 2.0355450236966823, "grad_norm": 19.310700207769223, "learning_rate": 2.8359063769951435e-06, "loss": 0.48340296745300293, "step": 2577 }, { "epoch": 2.036334913112164, "grad_norm": 9.685038803587986, "learning_rate": 2.8317643923179894e-06, "loss": 0.2897825539112091, "step": 2578 }, { "epoch": 2.037124802527646, "grad_norm": 7.494546906971376, "learning_rate": 2.827624239373419e-06, "loss": 0.3327743411064148, "step": 2579 }, { "epoch": 2.037914691943128, "grad_norm": 14.776433298746518, "learning_rate": 2.8234859216590406e-06, "loss": 0.30218467116355896, "step": 2580 }, { "epoch": 2.0387045813586098, "grad_norm": 9.947771139586356, "learning_rate": 2.8193494426709245e-06, "loss": 0.2601761519908905, "step": 2581 }, { "epoch": 2.0394944707740916, "grad_norm": 14.37459477886007, "learning_rate": 2.8152148059035804e-06, "loss": 2.110447645187378, "step": 2582 }, { "epoch": 2.0402843601895735, "grad_norm": 10.93265288740834, "learning_rate": 2.811082014849963e-06, "loss": 0.6686077117919922, "step": 2583 }, { "epoch": 2.0410742496050553, "grad_norm": 10.24553214669215, "learning_rate": 2.806951073001467e-06, "loss": 0.33964627981185913, "step": 2584 }, { "epoch": 2.041864139020537, "grad_norm": 10.359426539370531, "learning_rate": 2.8028219838479265e-06, "loss": 0.2946935296058655, "step": 2585 }, { "epoch": 2.042654028436019, "grad_norm": 7.743470398158407, "learning_rate": 2.798694750877609e-06, "loss": 0.5350501537322998, "step": 2586 }, { "epoch": 2.043443917851501, "grad_norm": 11.20913313781443, "learning_rate": 2.7945693775772153e-06, "loss": 0.3125535547733307, "step": 2587 }, { "epoch": 2.044233807266983, "grad_norm": 8.18102147636904, "learning_rate": 2.7904458674318733e-06, "loss": 0.5411368012428284, "step": 2588 }, { "epoch": 2.0450236966824646, "grad_norm": 9.35774408306311, "learning_rate": 2.7863242239251385e-06, "loss": 0.33201736211776733, "step": 2589 }, { "epoch": 2.0458135860979465, "grad_norm": 8.673142738839285, "learning_rate": 2.78220445053899e-06, "loss": 0.4620051085948944, "step": 2590 }, { "epoch": 2.046603475513428, "grad_norm": 9.55716346640722, "learning_rate": 2.7780865507538236e-06, "loss": 0.44175124168395996, "step": 2591 }, { "epoch": 2.0473933649289098, "grad_norm": 9.80538220230876, "learning_rate": 2.7739705280484565e-06, "loss": 0.23179033398628235, "step": 2592 }, { "epoch": 2.0481832543443916, "grad_norm": 7.778339221842325, "learning_rate": 2.769856385900118e-06, "loss": 0.2992667555809021, "step": 2593 }, { "epoch": 2.0489731437598735, "grad_norm": 10.15304744757696, "learning_rate": 2.7657441277844475e-06, "loss": 0.8387447595596313, "step": 2594 }, { "epoch": 2.0497630331753554, "grad_norm": 22.187673641215007, "learning_rate": 2.7616337571754937e-06, "loss": 0.6397115588188171, "step": 2595 }, { "epoch": 2.050552922590837, "grad_norm": 11.077698692839348, "learning_rate": 2.7575252775457175e-06, "loss": 0.4148407280445099, "step": 2596 }, { "epoch": 2.051342812006319, "grad_norm": 11.214710142878744, "learning_rate": 2.753418692365968e-06, "loss": 0.3338342607021332, "step": 2597 }, { "epoch": 2.052132701421801, "grad_norm": 24.053873668111414, "learning_rate": 2.7493140051055055e-06, "loss": 0.19720637798309326, "step": 2598 }, { "epoch": 2.052922590837283, "grad_norm": 7.360243899896904, "learning_rate": 2.7452112192319813e-06, "loss": 0.17611664533615112, "step": 2599 }, { "epoch": 2.0537124802527646, "grad_norm": 7.902969436001227, "learning_rate": 2.741110338211446e-06, "loss": 0.18759757280349731, "step": 2600 }, { "epoch": 2.0545023696682465, "grad_norm": 11.102771052686371, "learning_rate": 2.7370113655083373e-06, "loss": 0.5418599247932434, "step": 2601 }, { "epoch": 2.0552922590837284, "grad_norm": 10.890805356373017, "learning_rate": 2.732914304585478e-06, "loss": 0.4259986877441406, "step": 2602 }, { "epoch": 2.0560821484992102, "grad_norm": 11.586458157042319, "learning_rate": 2.728819158904078e-06, "loss": 0.36362117528915405, "step": 2603 }, { "epoch": 2.056872037914692, "grad_norm": 18.320864664521476, "learning_rate": 2.7247259319237306e-06, "loss": 0.939771294593811, "step": 2604 }, { "epoch": 2.057661927330174, "grad_norm": 11.501402001297102, "learning_rate": 2.7206346271024103e-06, "loss": 0.2737082839012146, "step": 2605 }, { "epoch": 2.058451816745656, "grad_norm": 12.125160137594307, "learning_rate": 2.716545247896465e-06, "loss": 0.7849869728088379, "step": 2606 }, { "epoch": 2.0592417061611377, "grad_norm": 11.093511362641284, "learning_rate": 2.7124577977606114e-06, "loss": 0.47068697214126587, "step": 2607 }, { "epoch": 2.060031595576619, "grad_norm": 6.735093477058937, "learning_rate": 2.7083722801479407e-06, "loss": 0.30919548869132996, "step": 2608 }, { "epoch": 2.060821484992101, "grad_norm": 11.012490104989881, "learning_rate": 2.704288698509917e-06, "loss": 0.5182478427886963, "step": 2609 }, { "epoch": 2.061611374407583, "grad_norm": 19.951181172455126, "learning_rate": 2.70020705629636e-06, "loss": 0.30237090587615967, "step": 2610 }, { "epoch": 2.0624012638230647, "grad_norm": 7.174756319853717, "learning_rate": 2.696127356955455e-06, "loss": 0.2660676836967468, "step": 2611 }, { "epoch": 2.0631911532385465, "grad_norm": 12.385922369030384, "learning_rate": 2.6920496039337474e-06, "loss": 0.3617851734161377, "step": 2612 }, { "epoch": 2.0639810426540284, "grad_norm": 11.015014488490415, "learning_rate": 2.6879738006761303e-06, "loss": 0.46704721450805664, "step": 2613 }, { "epoch": 2.0647709320695102, "grad_norm": 16.919341385806266, "learning_rate": 2.6838999506258623e-06, "loss": 0.6928585767745972, "step": 2614 }, { "epoch": 2.065560821484992, "grad_norm": 11.230526922983666, "learning_rate": 2.6798280572245427e-06, "loss": 0.47172248363494873, "step": 2615 }, { "epoch": 2.066350710900474, "grad_norm": 6.544914274412647, "learning_rate": 2.6757581239121203e-06, "loss": 0.27590444684028625, "step": 2616 }, { "epoch": 2.067140600315956, "grad_norm": 20.085203153128194, "learning_rate": 2.671690154126889e-06, "loss": 1.725602388381958, "step": 2617 }, { "epoch": 2.0679304897314377, "grad_norm": 9.954561309572316, "learning_rate": 2.667624151305482e-06, "loss": 0.34551358222961426, "step": 2618 }, { "epoch": 2.0687203791469195, "grad_norm": 9.55670352614984, "learning_rate": 2.6635601188828736e-06, "loss": 0.2692929804325104, "step": 2619 }, { "epoch": 2.0695102685624014, "grad_norm": 19.36634128513723, "learning_rate": 2.6594980602923702e-06, "loss": 0.4668102264404297, "step": 2620 }, { "epoch": 2.0703001579778832, "grad_norm": 10.139253556415792, "learning_rate": 2.6554379789656124e-06, "loss": 0.4466455578804016, "step": 2621 }, { "epoch": 2.071090047393365, "grad_norm": 10.412158390774838, "learning_rate": 2.6513798783325708e-06, "loss": 0.3185625970363617, "step": 2622 }, { "epoch": 2.071879936808847, "grad_norm": 16.193194717671236, "learning_rate": 2.647323761821542e-06, "loss": 1.1391959190368652, "step": 2623 }, { "epoch": 2.0726698262243284, "grad_norm": 10.665895924665014, "learning_rate": 2.643269632859146e-06, "loss": 0.5236232876777649, "step": 2624 }, { "epoch": 2.0734597156398102, "grad_norm": 9.152693760808571, "learning_rate": 2.6392174948703253e-06, "loss": 0.38468989729881287, "step": 2625 }, { "epoch": 2.074249605055292, "grad_norm": 10.476888206104105, "learning_rate": 2.635167351278339e-06, "loss": 0.21812653541564941, "step": 2626 }, { "epoch": 2.075039494470774, "grad_norm": 8.26594520989118, "learning_rate": 2.63111920550476e-06, "loss": 0.5767203569412231, "step": 2627 }, { "epoch": 2.075829383886256, "grad_norm": 10.011641691561126, "learning_rate": 2.62707306096948e-06, "loss": 0.5115009546279907, "step": 2628 }, { "epoch": 2.0766192733017377, "grad_norm": 9.073068879094404, "learning_rate": 2.6230289210906904e-06, "loss": 0.4996326267719269, "step": 2629 }, { "epoch": 2.0774091627172195, "grad_norm": 11.435973854776599, "learning_rate": 2.6189867892848962e-06, "loss": 1.0042023658752441, "step": 2630 }, { "epoch": 2.0781990521327014, "grad_norm": 16.103230479782546, "learning_rate": 2.614946668966902e-06, "loss": 0.5340977907180786, "step": 2631 }, { "epoch": 2.0789889415481833, "grad_norm": 10.298294605139441, "learning_rate": 2.610908563549815e-06, "loss": 0.23495090007781982, "step": 2632 }, { "epoch": 2.079778830963665, "grad_norm": 12.126262761621527, "learning_rate": 2.606872476445042e-06, "loss": 0.4852793216705322, "step": 2633 }, { "epoch": 2.080568720379147, "grad_norm": 14.89375330624192, "learning_rate": 2.602838411062284e-06, "loss": 0.42270925641059875, "step": 2634 }, { "epoch": 2.081358609794629, "grad_norm": 8.811379648381822, "learning_rate": 2.598806370809528e-06, "loss": 0.3555320203304291, "step": 2635 }, { "epoch": 2.0821484992101107, "grad_norm": 9.627230128604296, "learning_rate": 2.5947763590930542e-06, "loss": 0.7813281416893005, "step": 2636 }, { "epoch": 2.0829383886255926, "grad_norm": 12.811410790915238, "learning_rate": 2.590748379317436e-06, "loss": 0.5171550512313843, "step": 2637 }, { "epoch": 2.0837282780410744, "grad_norm": 6.703660941737119, "learning_rate": 2.586722434885519e-06, "loss": 0.4919162392616272, "step": 2638 }, { "epoch": 2.0845181674565563, "grad_norm": 8.360573561210712, "learning_rate": 2.582698529198439e-06, "loss": 0.28129732608795166, "step": 2639 }, { "epoch": 2.085308056872038, "grad_norm": 10.528148772142709, "learning_rate": 2.5786766656555996e-06, "loss": 0.28879594802856445, "step": 2640 }, { "epoch": 2.0860979462875195, "grad_norm": 9.079673356688698, "learning_rate": 2.574656847654684e-06, "loss": 0.5426896810531616, "step": 2641 }, { "epoch": 2.0868878357030014, "grad_norm": 9.340991792914485, "learning_rate": 2.5706390785916526e-06, "loss": 0.7707650661468506, "step": 2642 }, { "epoch": 2.0876777251184833, "grad_norm": 9.244986743162158, "learning_rate": 2.5666233618607274e-06, "loss": 0.34384623169898987, "step": 2643 }, { "epoch": 2.088467614533965, "grad_norm": 16.744652694879548, "learning_rate": 2.5626097008543995e-06, "loss": 0.5586552619934082, "step": 2644 }, { "epoch": 2.089257503949447, "grad_norm": 9.45648755694814, "learning_rate": 2.5585980989634217e-06, "loss": 0.20669318735599518, "step": 2645 }, { "epoch": 2.090047393364929, "grad_norm": 10.927788414106475, "learning_rate": 2.55458855957681e-06, "loss": 0.3616315424442291, "step": 2646 }, { "epoch": 2.0908372827804107, "grad_norm": 10.887569307564323, "learning_rate": 2.5505810860818356e-06, "loss": 0.4344330132007599, "step": 2647 }, { "epoch": 2.0916271721958926, "grad_norm": 7.716338584985328, "learning_rate": 2.5465756818640258e-06, "loss": 0.7404396533966064, "step": 2648 }, { "epoch": 2.0924170616113744, "grad_norm": 8.682244564927794, "learning_rate": 2.5425723503071586e-06, "loss": 0.7206960320472717, "step": 2649 }, { "epoch": 2.0932069510268563, "grad_norm": 10.426071122202922, "learning_rate": 2.538571094793263e-06, "loss": 0.4914172887802124, "step": 2650 }, { "epoch": 2.093996840442338, "grad_norm": 10.818446184348494, "learning_rate": 2.534571918702611e-06, "loss": 0.22247469425201416, "step": 2651 }, { "epoch": 2.09478672985782, "grad_norm": 8.183773295547795, "learning_rate": 2.530574825413722e-06, "loss": 0.5524343252182007, "step": 2652 }, { "epoch": 2.095576619273302, "grad_norm": 10.561001834128774, "learning_rate": 2.526579818303351e-06, "loss": 0.4442840814590454, "step": 2653 }, { "epoch": 2.0963665086887837, "grad_norm": 12.36354051019539, "learning_rate": 2.5225869007464953e-06, "loss": 0.43416035175323486, "step": 2654 }, { "epoch": 2.0971563981042656, "grad_norm": 10.720335557119547, "learning_rate": 2.5185960761163816e-06, "loss": 0.3992769718170166, "step": 2655 }, { "epoch": 2.0979462875197474, "grad_norm": 12.529038317941579, "learning_rate": 2.514607347784478e-06, "loss": 0.5805951356887817, "step": 2656 }, { "epoch": 2.098736176935229, "grad_norm": 8.702799415503215, "learning_rate": 2.510620719120469e-06, "loss": 0.4916655123233795, "step": 2657 }, { "epoch": 2.0995260663507107, "grad_norm": 14.453659414269685, "learning_rate": 2.5066361934922745e-06, "loss": 0.3664616346359253, "step": 2658 }, { "epoch": 2.1003159557661926, "grad_norm": 9.593268812077245, "learning_rate": 2.502653774266034e-06, "loss": 1.0257253646850586, "step": 2659 }, { "epoch": 2.1011058451816744, "grad_norm": 10.7133353823578, "learning_rate": 2.4986734648061066e-06, "loss": 0.3108058273792267, "step": 2660 }, { "epoch": 2.1018957345971563, "grad_norm": 9.400571761002238, "learning_rate": 2.4946952684750773e-06, "loss": 0.6172835826873779, "step": 2661 }, { "epoch": 2.102685624012638, "grad_norm": 13.17142044952775, "learning_rate": 2.4907191886337334e-06, "loss": 0.7078754305839539, "step": 2662 }, { "epoch": 2.10347551342812, "grad_norm": 9.62188121387961, "learning_rate": 2.4867452286410815e-06, "loss": 0.8794481754302979, "step": 2663 }, { "epoch": 2.104265402843602, "grad_norm": 8.644663424593888, "learning_rate": 2.482773391854335e-06, "loss": 0.3982178866863251, "step": 2664 }, { "epoch": 2.1050552922590837, "grad_norm": 8.326165450957228, "learning_rate": 2.4788036816289177e-06, "loss": 0.2630547881126404, "step": 2665 }, { "epoch": 2.1058451816745656, "grad_norm": 8.952882981363153, "learning_rate": 2.474836101318453e-06, "loss": 0.5207113027572632, "step": 2666 }, { "epoch": 2.1066350710900474, "grad_norm": 7.954230630151849, "learning_rate": 2.470870654274767e-06, "loss": 0.6573061943054199, "step": 2667 }, { "epoch": 2.1074249605055293, "grad_norm": 9.631811512663736, "learning_rate": 2.4669073438478784e-06, "loss": 0.29487311840057373, "step": 2668 }, { "epoch": 2.108214849921011, "grad_norm": 10.558058540717864, "learning_rate": 2.4629461733860055e-06, "loss": 0.24810953438282013, "step": 2669 }, { "epoch": 2.109004739336493, "grad_norm": 7.799037056656612, "learning_rate": 2.458987146235562e-06, "loss": 0.20641303062438965, "step": 2670 }, { "epoch": 2.109794628751975, "grad_norm": 9.478790048879866, "learning_rate": 2.4550302657411447e-06, "loss": 0.3360731303691864, "step": 2671 }, { "epoch": 2.1105845181674567, "grad_norm": 6.220059265575532, "learning_rate": 2.4510755352455413e-06, "loss": 0.5347034931182861, "step": 2672 }, { "epoch": 2.1113744075829386, "grad_norm": 12.272978606568286, "learning_rate": 2.4471229580897155e-06, "loss": 0.33499157428741455, "step": 2673 }, { "epoch": 2.11216429699842, "grad_norm": 11.71613295930798, "learning_rate": 2.443172537612823e-06, "loss": 0.810235321521759, "step": 2674 }, { "epoch": 2.112954186413902, "grad_norm": 10.155734244037252, "learning_rate": 2.43922427715219e-06, "loss": 0.7765663862228394, "step": 2675 }, { "epoch": 2.1137440758293837, "grad_norm": 8.684157709345978, "learning_rate": 2.4352781800433213e-06, "loss": 0.2663138508796692, "step": 2676 }, { "epoch": 2.1145339652448656, "grad_norm": 7.269900003866313, "learning_rate": 2.431334249619891e-06, "loss": 0.8140415549278259, "step": 2677 }, { "epoch": 2.1153238546603474, "grad_norm": 11.872895668793475, "learning_rate": 2.427392489213745e-06, "loss": 0.7136765718460083, "step": 2678 }, { "epoch": 2.1161137440758293, "grad_norm": 15.205862969567574, "learning_rate": 2.4234529021548963e-06, "loss": 0.581038236618042, "step": 2679 }, { "epoch": 2.116903633491311, "grad_norm": 10.329194968162666, "learning_rate": 2.4195154917715202e-06, "loss": 0.313004732131958, "step": 2680 }, { "epoch": 2.117693522906793, "grad_norm": 7.375552046117448, "learning_rate": 2.4155802613899537e-06, "loss": 0.41316336393356323, "step": 2681 }, { "epoch": 2.118483412322275, "grad_norm": 12.51452481692, "learning_rate": 2.4116472143346935e-06, "loss": 1.1459236145019531, "step": 2682 }, { "epoch": 2.1192733017377567, "grad_norm": 8.014729129098054, "learning_rate": 2.40771635392839e-06, "loss": 0.10683616995811462, "step": 2683 }, { "epoch": 2.1200631911532386, "grad_norm": 16.715819051395663, "learning_rate": 2.4037876834918467e-06, "loss": 0.8776874542236328, "step": 2684 }, { "epoch": 2.1208530805687205, "grad_norm": 15.222031604067872, "learning_rate": 2.3998612063440175e-06, "loss": 0.549107551574707, "step": 2685 }, { "epoch": 2.1216429699842023, "grad_norm": 11.067349309197576, "learning_rate": 2.3959369258020036e-06, "loss": 0.3277229368686676, "step": 2686 }, { "epoch": 2.122432859399684, "grad_norm": 10.422204153009583, "learning_rate": 2.3920148451810504e-06, "loss": 0.3309401869773865, "step": 2687 }, { "epoch": 2.123222748815166, "grad_norm": 11.728679388557909, "learning_rate": 2.3880949677945437e-06, "loss": 0.6130886673927307, "step": 2688 }, { "epoch": 2.124012638230648, "grad_norm": 7.010036521611688, "learning_rate": 2.3841772969540088e-06, "loss": 0.18529269099235535, "step": 2689 }, { "epoch": 2.1248025276461293, "grad_norm": 21.13853477472658, "learning_rate": 2.380261835969108e-06, "loss": 1.7113615274429321, "step": 2690 }, { "epoch": 2.125592417061611, "grad_norm": 11.881636165836428, "learning_rate": 2.3763485881476345e-06, "loss": 0.254605233669281, "step": 2691 }, { "epoch": 2.126382306477093, "grad_norm": 8.99366986984141, "learning_rate": 2.3724375567955115e-06, "loss": 0.17952272295951843, "step": 2692 }, { "epoch": 2.127172195892575, "grad_norm": 12.677526063207466, "learning_rate": 2.368528745216795e-06, "loss": 0.47902315855026245, "step": 2693 }, { "epoch": 2.1279620853080567, "grad_norm": 17.055248079539123, "learning_rate": 2.3646221567136618e-06, "loss": 0.36781108379364014, "step": 2694 }, { "epoch": 2.1287519747235386, "grad_norm": 12.63653018866412, "learning_rate": 2.3607177945864073e-06, "loss": 0.3109586834907532, "step": 2695 }, { "epoch": 2.1295418641390205, "grad_norm": 7.61269588995347, "learning_rate": 2.3568156621334508e-06, "loss": 0.5835099220275879, "step": 2696 }, { "epoch": 2.1303317535545023, "grad_norm": 9.309063388608303, "learning_rate": 2.352915762651325e-06, "loss": 0.6755543947219849, "step": 2697 }, { "epoch": 2.131121642969984, "grad_norm": 12.180295054957952, "learning_rate": 2.3490180994346816e-06, "loss": 0.3916603624820709, "step": 2698 }, { "epoch": 2.131911532385466, "grad_norm": 11.375922907331335, "learning_rate": 2.3451226757762794e-06, "loss": 0.3473902940750122, "step": 2699 }, { "epoch": 2.132701421800948, "grad_norm": 11.141196729822012, "learning_rate": 2.341229494966981e-06, "loss": 0.48169010877609253, "step": 2700 }, { "epoch": 2.1334913112164298, "grad_norm": 11.044250444422778, "learning_rate": 2.3373385602957595e-06, "loss": 0.5068016052246094, "step": 2701 }, { "epoch": 2.1342812006319116, "grad_norm": 14.599024263747504, "learning_rate": 2.3334498750496913e-06, "loss": 0.5701960325241089, "step": 2702 }, { "epoch": 2.1350710900473935, "grad_norm": 8.48416563722893, "learning_rate": 2.3295634425139502e-06, "loss": 0.7108640074729919, "step": 2703 }, { "epoch": 2.1358609794628753, "grad_norm": 8.64074974867785, "learning_rate": 2.3256792659718065e-06, "loss": 0.5362042188644409, "step": 2704 }, { "epoch": 2.136650868878357, "grad_norm": 13.030143439241625, "learning_rate": 2.321797348704625e-06, "loss": 0.6571926474571228, "step": 2705 }, { "epoch": 2.137440758293839, "grad_norm": 11.69730983416275, "learning_rate": 2.317917693991863e-06, "loss": 0.4899098575115204, "step": 2706 }, { "epoch": 2.138230647709321, "grad_norm": 14.65236719972843, "learning_rate": 2.314040305111065e-06, "loss": 1.4374269247055054, "step": 2707 }, { "epoch": 2.1390205371248023, "grad_norm": 11.277026389499401, "learning_rate": 2.310165185337862e-06, "loss": 0.40772897005081177, "step": 2708 }, { "epoch": 2.139810426540284, "grad_norm": 9.022193712896843, "learning_rate": 2.3062923379459684e-06, "loss": 0.2784253656864166, "step": 2709 }, { "epoch": 2.140600315955766, "grad_norm": 9.676192450422299, "learning_rate": 2.302421766207177e-06, "loss": 0.34623268246650696, "step": 2710 }, { "epoch": 2.141390205371248, "grad_norm": 8.652801319265459, "learning_rate": 2.2985534733913623e-06, "loss": 0.33302298188209534, "step": 2711 }, { "epoch": 2.1421800947867298, "grad_norm": 8.990062562090557, "learning_rate": 2.2946874627664677e-06, "loss": 0.44455865025520325, "step": 2712 }, { "epoch": 2.1429699842022116, "grad_norm": 21.04889344511468, "learning_rate": 2.2908237375985137e-06, "loss": 0.5782222747802734, "step": 2713 }, { "epoch": 2.1437598736176935, "grad_norm": 9.050405036026557, "learning_rate": 2.2869623011515874e-06, "loss": 0.3401952385902405, "step": 2714 }, { "epoch": 2.1445497630331753, "grad_norm": 15.891303126899643, "learning_rate": 2.283103156687843e-06, "loss": 0.5884231925010681, "step": 2715 }, { "epoch": 2.145339652448657, "grad_norm": 10.482443238163478, "learning_rate": 2.2792463074674987e-06, "loss": 0.6347147226333618, "step": 2716 }, { "epoch": 2.146129541864139, "grad_norm": 8.647383536000094, "learning_rate": 2.275391756748833e-06, "loss": 0.5545493960380554, "step": 2717 }, { "epoch": 2.146919431279621, "grad_norm": 12.912399269663396, "learning_rate": 2.2715395077881837e-06, "loss": 0.639219343662262, "step": 2718 }, { "epoch": 2.147709320695103, "grad_norm": 14.471210640975691, "learning_rate": 2.2676895638399427e-06, "loss": 0.5747156143188477, "step": 2719 }, { "epoch": 2.1484992101105846, "grad_norm": 10.784235307725265, "learning_rate": 2.2638419281565536e-06, "loss": 1.0131480693817139, "step": 2720 }, { "epoch": 2.1492890995260665, "grad_norm": 8.991887236619839, "learning_rate": 2.259996603988518e-06, "loss": 0.1766696721315384, "step": 2721 }, { "epoch": 2.1500789889415484, "grad_norm": 8.434251974603205, "learning_rate": 2.256153594584372e-06, "loss": 0.4502110481262207, "step": 2722 }, { "epoch": 2.1508688783570302, "grad_norm": 9.15347582657087, "learning_rate": 2.2523129031907047e-06, "loss": 0.17326998710632324, "step": 2723 }, { "epoch": 2.1516587677725116, "grad_norm": 10.132042927830476, "learning_rate": 2.248474533052145e-06, "loss": 0.32104817032814026, "step": 2724 }, { "epoch": 2.1524486571879935, "grad_norm": 22.126808261308526, "learning_rate": 2.2446384874113586e-06, "loss": 0.28873202204704285, "step": 2725 }, { "epoch": 2.1532385466034754, "grad_norm": 9.874113810387273, "learning_rate": 2.2408047695090533e-06, "loss": 0.9483802318572998, "step": 2726 }, { "epoch": 2.154028436018957, "grad_norm": 8.399149585433827, "learning_rate": 2.2369733825839663e-06, "loss": 0.4678645133972168, "step": 2727 }, { "epoch": 2.154818325434439, "grad_norm": 8.149774853170221, "learning_rate": 2.233144329872863e-06, "loss": 0.4194965660572052, "step": 2728 }, { "epoch": 2.155608214849921, "grad_norm": 10.007897566804706, "learning_rate": 2.229317614610539e-06, "loss": 0.18562570214271545, "step": 2729 }, { "epoch": 2.156398104265403, "grad_norm": 11.593570889438942, "learning_rate": 2.2254932400298195e-06, "loss": 0.2921253442764282, "step": 2730 }, { "epoch": 2.1571879936808847, "grad_norm": 9.592986733727226, "learning_rate": 2.2216712093615474e-06, "loss": 0.5794805288314819, "step": 2731 }, { "epoch": 2.1579778830963665, "grad_norm": 18.709687315043503, "learning_rate": 2.2178515258345885e-06, "loss": 0.7258075475692749, "step": 2732 }, { "epoch": 2.1587677725118484, "grad_norm": 15.272801270549794, "learning_rate": 2.2140341926758186e-06, "loss": 0.622929036617279, "step": 2733 }, { "epoch": 2.1595576619273302, "grad_norm": 12.017018764788721, "learning_rate": 2.2102192131101386e-06, "loss": 0.656425952911377, "step": 2734 }, { "epoch": 2.160347551342812, "grad_norm": 13.837307556949694, "learning_rate": 2.2064065903604537e-06, "loss": 0.3320290744304657, "step": 2735 }, { "epoch": 2.161137440758294, "grad_norm": 9.400730803116534, "learning_rate": 2.2025963276476814e-06, "loss": 0.3217647075653076, "step": 2736 }, { "epoch": 2.161927330173776, "grad_norm": 13.942726496241551, "learning_rate": 2.1987884281907425e-06, "loss": 0.5152993202209473, "step": 2737 }, { "epoch": 2.1627172195892577, "grad_norm": 9.822554670458867, "learning_rate": 2.1949828952065643e-06, "loss": 0.23020845651626587, "step": 2738 }, { "epoch": 2.1635071090047395, "grad_norm": 12.584615682614533, "learning_rate": 2.191179731910073e-06, "loss": 0.4029275178909302, "step": 2739 }, { "epoch": 2.1642969984202214, "grad_norm": 11.17103259387213, "learning_rate": 2.1873789415141932e-06, "loss": 0.23068757355213165, "step": 2740 }, { "epoch": 2.165086887835703, "grad_norm": 12.389468057157949, "learning_rate": 2.183580527229846e-06, "loss": 0.39349129796028137, "step": 2741 }, { "epoch": 2.1658767772511847, "grad_norm": 8.11758263289566, "learning_rate": 2.1797844922659437e-06, "loss": 0.2747986912727356, "step": 2742 }, { "epoch": 2.1666666666666665, "grad_norm": 12.530738023381977, "learning_rate": 2.1759908398293896e-06, "loss": 1.14316987991333, "step": 2743 }, { "epoch": 2.1674565560821484, "grad_norm": 14.915166154269375, "learning_rate": 2.1721995731250726e-06, "loss": 0.517180323600769, "step": 2744 }, { "epoch": 2.1682464454976302, "grad_norm": 13.701952179268636, "learning_rate": 2.1684106953558693e-06, "loss": 0.25364458560943604, "step": 2745 }, { "epoch": 2.169036334913112, "grad_norm": 9.011375942635011, "learning_rate": 2.1646242097226343e-06, "loss": 0.34542322158813477, "step": 2746 }, { "epoch": 2.169826224328594, "grad_norm": 9.781858990537309, "learning_rate": 2.1608401194242035e-06, "loss": 0.24171094596385956, "step": 2747 }, { "epoch": 2.170616113744076, "grad_norm": 8.982304890802853, "learning_rate": 2.1570584276573896e-06, "loss": 0.609094500541687, "step": 2748 }, { "epoch": 2.1714060031595577, "grad_norm": 9.10557044009763, "learning_rate": 2.1532791376169778e-06, "loss": 0.2869632840156555, "step": 2749 }, { "epoch": 2.1721958925750395, "grad_norm": 9.939923288953379, "learning_rate": 2.1495022524957244e-06, "loss": 0.39264094829559326, "step": 2750 }, { "epoch": 2.1729857819905214, "grad_norm": 9.585598670064135, "learning_rate": 2.1457277754843547e-06, "loss": 0.18833236396312714, "step": 2751 }, { "epoch": 2.1737756714060033, "grad_norm": 11.521955780403779, "learning_rate": 2.14195570977156e-06, "loss": 0.9987523555755615, "step": 2752 }, { "epoch": 2.174565560821485, "grad_norm": 19.37585048155451, "learning_rate": 2.1381860585439906e-06, "loss": 0.46828562021255493, "step": 2753 }, { "epoch": 2.175355450236967, "grad_norm": 11.086157538641421, "learning_rate": 2.134418824986267e-06, "loss": 0.4600408673286438, "step": 2754 }, { "epoch": 2.176145339652449, "grad_norm": 7.742526132754247, "learning_rate": 2.1306540122809543e-06, "loss": 0.27102628350257874, "step": 2755 }, { "epoch": 2.1769352290679307, "grad_norm": 19.82779184404181, "learning_rate": 2.1268916236085814e-06, "loss": 0.5771945714950562, "step": 2756 }, { "epoch": 2.177725118483412, "grad_norm": 14.081906680861074, "learning_rate": 2.1231316621476234e-06, "loss": 0.3828513026237488, "step": 2757 }, { "epoch": 2.178515007898894, "grad_norm": 12.677665354894604, "learning_rate": 2.1193741310745125e-06, "loss": 1.0411120653152466, "step": 2758 }, { "epoch": 2.179304897314376, "grad_norm": 8.072309116801922, "learning_rate": 2.115619033563624e-06, "loss": 0.1945279985666275, "step": 2759 }, { "epoch": 2.1800947867298577, "grad_norm": 13.107958264184125, "learning_rate": 2.111866372787272e-06, "loss": 0.32704418897628784, "step": 2760 }, { "epoch": 2.1808846761453395, "grad_norm": 10.8975500647945, "learning_rate": 2.1081161519157168e-06, "loss": 0.4165365695953369, "step": 2761 }, { "epoch": 2.1816745655608214, "grad_norm": 15.891308461676624, "learning_rate": 2.104368374117161e-06, "loss": 0.7583081722259521, "step": 2762 }, { "epoch": 2.1824644549763033, "grad_norm": 12.34192529563348, "learning_rate": 2.100623042557739e-06, "loss": 0.39596104621887207, "step": 2763 }, { "epoch": 2.183254344391785, "grad_norm": 12.790970766784817, "learning_rate": 2.0968801604015176e-06, "loss": 0.33872219920158386, "step": 2764 }, { "epoch": 2.184044233807267, "grad_norm": 8.883238538058585, "learning_rate": 2.0931397308104986e-06, "loss": 0.48357391357421875, "step": 2765 }, { "epoch": 2.184834123222749, "grad_norm": 10.033364618014366, "learning_rate": 2.0894017569446034e-06, "loss": 0.2329304814338684, "step": 2766 }, { "epoch": 2.1856240126382307, "grad_norm": 10.963546856917345, "learning_rate": 2.0856662419616908e-06, "loss": 0.38120099902153015, "step": 2767 }, { "epoch": 2.1864139020537126, "grad_norm": 8.659173313868209, "learning_rate": 2.081933189017533e-06, "loss": 0.18225795030593872, "step": 2768 }, { "epoch": 2.1872037914691944, "grad_norm": 12.579078453532555, "learning_rate": 2.0782026012658268e-06, "loss": 0.6677108407020569, "step": 2769 }, { "epoch": 2.1879936808846763, "grad_norm": 10.496831262975576, "learning_rate": 2.074474481858185e-06, "loss": 0.7098184823989868, "step": 2770 }, { "epoch": 2.188783570300158, "grad_norm": 15.314620074905251, "learning_rate": 2.0707488339441338e-06, "loss": 0.8517345786094666, "step": 2771 }, { "epoch": 2.18957345971564, "grad_norm": 11.322801114701715, "learning_rate": 2.067025660671114e-06, "loss": 0.15628886222839355, "step": 2772 }, { "epoch": 2.190363349131122, "grad_norm": 10.857872805242897, "learning_rate": 2.0633049651844744e-06, "loss": 0.23775914311408997, "step": 2773 }, { "epoch": 2.1911532385466037, "grad_norm": 17.367672664117908, "learning_rate": 2.0595867506274707e-06, "loss": 1.0561261177062988, "step": 2774 }, { "epoch": 2.191943127962085, "grad_norm": 10.613741381941095, "learning_rate": 2.055871020141263e-06, "loss": 0.7364283204078674, "step": 2775 }, { "epoch": 2.192733017377567, "grad_norm": 11.141296214016752, "learning_rate": 2.052157776864912e-06, "loss": 0.5597008466720581, "step": 2776 }, { "epoch": 2.193522906793049, "grad_norm": 16.153450357978947, "learning_rate": 2.0484470239353786e-06, "loss": 0.48830780386924744, "step": 2777 }, { "epoch": 2.1943127962085307, "grad_norm": 13.351249645259788, "learning_rate": 2.044738764487519e-06, "loss": 1.164239764213562, "step": 2778 }, { "epoch": 2.1951026856240126, "grad_norm": 9.298702632023925, "learning_rate": 2.0410330016540824e-06, "loss": 0.6914669275283813, "step": 2779 }, { "epoch": 2.1958925750394944, "grad_norm": 19.26030750116404, "learning_rate": 2.0373297385657105e-06, "loss": 0.9622442722320557, "step": 2780 }, { "epoch": 2.1966824644549763, "grad_norm": 10.373277943662284, "learning_rate": 2.0336289783509306e-06, "loss": 0.2636718153953552, "step": 2781 }, { "epoch": 2.197472353870458, "grad_norm": 13.914270193422176, "learning_rate": 2.0299307241361587e-06, "loss": 0.5753885507583618, "step": 2782 }, { "epoch": 2.19826224328594, "grad_norm": 11.117981102297946, "learning_rate": 2.0262349790456908e-06, "loss": 0.3335786461830139, "step": 2783 }, { "epoch": 2.199052132701422, "grad_norm": 12.558104305781397, "learning_rate": 2.0225417462017054e-06, "loss": 0.348050594329834, "step": 2784 }, { "epoch": 2.1998420221169037, "grad_norm": 11.355102467965308, "learning_rate": 2.0188510287242564e-06, "loss": 1.1008378267288208, "step": 2785 }, { "epoch": 2.2006319115323856, "grad_norm": 7.680728739198105, "learning_rate": 2.0151628297312765e-06, "loss": 0.573356568813324, "step": 2786 }, { "epoch": 2.2014218009478674, "grad_norm": 7.8089582056318525, "learning_rate": 2.0114771523385682e-06, "loss": 0.12702372670173645, "step": 2787 }, { "epoch": 2.2022116903633493, "grad_norm": 9.904120419329733, "learning_rate": 2.0077939996598023e-06, "loss": 0.33679264783859253, "step": 2788 }, { "epoch": 2.203001579778831, "grad_norm": 9.182229422297102, "learning_rate": 2.004113374806516e-06, "loss": 0.6996742486953735, "step": 2789 }, { "epoch": 2.2037914691943126, "grad_norm": 19.940382025619883, "learning_rate": 2.0004352808881183e-06, "loss": 0.4126805067062378, "step": 2790 }, { "epoch": 2.2045813586097944, "grad_norm": 8.37724044555483, "learning_rate": 1.996759721011873e-06, "loss": 0.43272721767425537, "step": 2791 }, { "epoch": 2.2053712480252763, "grad_norm": 10.772306642802901, "learning_rate": 1.9930866982829067e-06, "loss": 0.31847819685935974, "step": 2792 }, { "epoch": 2.206161137440758, "grad_norm": 8.767557779912506, "learning_rate": 1.9894162158041972e-06, "loss": 0.3612692654132843, "step": 2793 }, { "epoch": 2.20695102685624, "grad_norm": 7.606502171835215, "learning_rate": 1.9857482766765812e-06, "loss": 0.21649795770645142, "step": 2794 }, { "epoch": 2.207740916271722, "grad_norm": 13.255090811561212, "learning_rate": 1.9820828839987483e-06, "loss": 0.3744838535785675, "step": 2795 }, { "epoch": 2.2085308056872037, "grad_norm": 8.919411850780316, "learning_rate": 1.9784200408672332e-06, "loss": 0.26096653938293457, "step": 2796 }, { "epoch": 2.2093206951026856, "grad_norm": 9.147600079448718, "learning_rate": 1.9747597503764177e-06, "loss": 0.47567954659461975, "step": 2797 }, { "epoch": 2.2101105845181674, "grad_norm": 9.819631053417764, "learning_rate": 1.9711020156185266e-06, "loss": 0.4913148880004883, "step": 2798 }, { "epoch": 2.2109004739336493, "grad_norm": 11.328664528684907, "learning_rate": 1.9674468396836273e-06, "loss": 0.6570562720298767, "step": 2799 }, { "epoch": 2.211690363349131, "grad_norm": 8.021273430434308, "learning_rate": 1.963794225659624e-06, "loss": 0.5904330015182495, "step": 2800 }, { "epoch": 2.212480252764613, "grad_norm": 8.57411699896354, "learning_rate": 1.960144176632257e-06, "loss": 0.6471877098083496, "step": 2801 }, { "epoch": 2.213270142180095, "grad_norm": 11.950981978395122, "learning_rate": 1.9564966956850995e-06, "loss": 0.5829299688339233, "step": 2802 }, { "epoch": 2.2140600315955767, "grad_norm": 8.451163149947735, "learning_rate": 1.952851785899556e-06, "loss": 0.35751497745513916, "step": 2803 }, { "epoch": 2.2148499210110586, "grad_norm": 10.138877718616586, "learning_rate": 1.949209450354858e-06, "loss": 0.4311722218990326, "step": 2804 }, { "epoch": 2.2156398104265405, "grad_norm": 12.683217627573496, "learning_rate": 1.9455696921280627e-06, "loss": 0.7441365718841553, "step": 2805 }, { "epoch": 2.2164296998420223, "grad_norm": 11.694728091873205, "learning_rate": 1.941932514294049e-06, "loss": 0.49206316471099854, "step": 2806 }, { "epoch": 2.217219589257504, "grad_norm": 11.855236520924457, "learning_rate": 1.938297919925518e-06, "loss": 0.5097864866256714, "step": 2807 }, { "epoch": 2.2180094786729856, "grad_norm": 12.318836475278742, "learning_rate": 1.9346659120929867e-06, "loss": 0.6787229776382446, "step": 2808 }, { "epoch": 2.2187993680884674, "grad_norm": 11.391591651407479, "learning_rate": 1.9310364938647863e-06, "loss": 0.3987847864627838, "step": 2809 }, { "epoch": 2.2195892575039493, "grad_norm": 10.502147605835487, "learning_rate": 1.9274096683070625e-06, "loss": 0.5808389782905579, "step": 2810 }, { "epoch": 2.220379146919431, "grad_norm": 15.889506259074903, "learning_rate": 1.9237854384837685e-06, "loss": 0.6458317041397095, "step": 2811 }, { "epoch": 2.221169036334913, "grad_norm": 10.72441475075481, "learning_rate": 1.920163807456666e-06, "loss": 0.22764013707637787, "step": 2812 }, { "epoch": 2.221958925750395, "grad_norm": 9.406804023326472, "learning_rate": 1.916544778285319e-06, "loss": 0.3760148286819458, "step": 2813 }, { "epoch": 2.2227488151658767, "grad_norm": 12.999087321799392, "learning_rate": 1.912928354027101e-06, "loss": 0.9230769872665405, "step": 2814 }, { "epoch": 2.2235387045813586, "grad_norm": 5.261146266935435, "learning_rate": 1.9093145377371734e-06, "loss": 0.22567349672317505, "step": 2815 }, { "epoch": 2.2243285939968405, "grad_norm": 9.418952716249674, "learning_rate": 1.9057033324685014e-06, "loss": 0.3293178677558899, "step": 2816 }, { "epoch": 2.2251184834123223, "grad_norm": 7.627242194894803, "learning_rate": 1.9020947412718428e-06, "loss": 0.3232775926589966, "step": 2817 }, { "epoch": 2.225908372827804, "grad_norm": 6.888057264096037, "learning_rate": 1.8984887671957492e-06, "loss": 0.3350941240787506, "step": 2818 }, { "epoch": 2.226698262243286, "grad_norm": 9.593533038856789, "learning_rate": 1.8948854132865601e-06, "loss": 0.4311079978942871, "step": 2819 }, { "epoch": 2.227488151658768, "grad_norm": 13.454745411900346, "learning_rate": 1.8912846825883973e-06, "loss": 1.0597552061080933, "step": 2820 }, { "epoch": 2.2282780410742498, "grad_norm": 11.058976812584511, "learning_rate": 1.8876865781431714e-06, "loss": 0.6466134786605835, "step": 2821 }, { "epoch": 2.2290679304897316, "grad_norm": 9.06033016129208, "learning_rate": 1.8840911029905718e-06, "loss": 0.2965662479400635, "step": 2822 }, { "epoch": 2.229857819905213, "grad_norm": 10.351708737498239, "learning_rate": 1.8804982601680705e-06, "loss": 0.367464542388916, "step": 2823 }, { "epoch": 2.230647709320695, "grad_norm": 9.6500211642715, "learning_rate": 1.8769080527109113e-06, "loss": 0.48031240701675415, "step": 2824 }, { "epoch": 2.2314375987361768, "grad_norm": 10.833034804804822, "learning_rate": 1.8733204836521156e-06, "loss": 0.40878814458847046, "step": 2825 }, { "epoch": 2.2322274881516586, "grad_norm": 7.26374801360143, "learning_rate": 1.8697355560224679e-06, "loss": 0.5350329279899597, "step": 2826 }, { "epoch": 2.2330173775671405, "grad_norm": 7.132708625630138, "learning_rate": 1.8661532728505315e-06, "loss": 0.12886342406272888, "step": 2827 }, { "epoch": 2.2338072669826223, "grad_norm": 17.58232884596969, "learning_rate": 1.8625736371626307e-06, "loss": 0.37391525506973267, "step": 2828 }, { "epoch": 2.234597156398104, "grad_norm": 9.783002261543379, "learning_rate": 1.8589966519828523e-06, "loss": 0.47310611605644226, "step": 2829 }, { "epoch": 2.235387045813586, "grad_norm": 6.751501626334738, "learning_rate": 1.8554223203330473e-06, "loss": 0.15924470126628876, "step": 2830 }, { "epoch": 2.236176935229068, "grad_norm": 11.510963325216071, "learning_rate": 1.8518506452328182e-06, "loss": 0.589087188243866, "step": 2831 }, { "epoch": 2.2369668246445498, "grad_norm": 11.300997192020064, "learning_rate": 1.848281629699532e-06, "loss": 0.29100021719932556, "step": 2832 }, { "epoch": 2.2377567140600316, "grad_norm": 11.201992942679986, "learning_rate": 1.8447152767483039e-06, "loss": 0.4221479296684265, "step": 2833 }, { "epoch": 2.2385466034755135, "grad_norm": 13.129885124330743, "learning_rate": 1.8411515893920002e-06, "loss": 0.5831780433654785, "step": 2834 }, { "epoch": 2.2393364928909953, "grad_norm": 8.6407107464604, "learning_rate": 1.8375905706412362e-06, "loss": 0.1891106814146042, "step": 2835 }, { "epoch": 2.240126382306477, "grad_norm": 11.674843768645822, "learning_rate": 1.834032223504371e-06, "loss": 0.7293417453765869, "step": 2836 }, { "epoch": 2.240916271721959, "grad_norm": 13.500251067265657, "learning_rate": 1.83047655098751e-06, "loss": 0.6112027168273926, "step": 2837 }, { "epoch": 2.241706161137441, "grad_norm": 10.949569000417599, "learning_rate": 1.8269235560944954e-06, "loss": 0.7212045192718506, "step": 2838 }, { "epoch": 2.242496050552923, "grad_norm": 15.306834691971645, "learning_rate": 1.823373241826909e-06, "loss": 0.3620745539665222, "step": 2839 }, { "epoch": 2.2432859399684046, "grad_norm": 11.812488548682289, "learning_rate": 1.819825611184069e-06, "loss": 0.6836791038513184, "step": 2840 }, { "epoch": 2.244075829383886, "grad_norm": 12.12932684339931, "learning_rate": 1.8162806671630252e-06, "loss": 0.6339531540870667, "step": 2841 }, { "epoch": 2.244865718799368, "grad_norm": 17.039607010297708, "learning_rate": 1.8127384127585574e-06, "loss": 0.6890298128128052, "step": 2842 }, { "epoch": 2.2456556082148498, "grad_norm": 9.659644016427855, "learning_rate": 1.8091988509631741e-06, "loss": 0.7656424641609192, "step": 2843 }, { "epoch": 2.2464454976303316, "grad_norm": 11.125303849970079, "learning_rate": 1.8056619847671087e-06, "loss": 0.4176521897315979, "step": 2844 }, { "epoch": 2.2472353870458135, "grad_norm": 10.295484759083562, "learning_rate": 1.8021278171583163e-06, "loss": 0.4512486457824707, "step": 2845 }, { "epoch": 2.2480252764612954, "grad_norm": 14.130813395934341, "learning_rate": 1.7985963511224757e-06, "loss": 0.2794567346572876, "step": 2846 }, { "epoch": 2.248815165876777, "grad_norm": 7.986427853036559, "learning_rate": 1.7950675896429815e-06, "loss": 0.5515426993370056, "step": 2847 }, { "epoch": 2.249605055292259, "grad_norm": 10.914377369413538, "learning_rate": 1.7915415357009398e-06, "loss": 0.5858911275863647, "step": 2848 }, { "epoch": 2.250394944707741, "grad_norm": 9.057658354905428, "learning_rate": 1.7880181922751743e-06, "loss": 0.4116283357143402, "step": 2849 }, { "epoch": 2.251184834123223, "grad_norm": 7.630261787099066, "learning_rate": 1.7844975623422151e-06, "loss": 0.11056342720985413, "step": 2850 }, { "epoch": 2.2519747235387046, "grad_norm": 10.503693991312414, "learning_rate": 1.7809796488763058e-06, "loss": 0.3872639834880829, "step": 2851 }, { "epoch": 2.2527646129541865, "grad_norm": 12.035802077065455, "learning_rate": 1.7774644548493908e-06, "loss": 0.6129223108291626, "step": 2852 }, { "epoch": 2.2535545023696684, "grad_norm": 9.46607219708113, "learning_rate": 1.7739519832311147e-06, "loss": 0.7763924598693848, "step": 2853 }, { "epoch": 2.2543443917851502, "grad_norm": 6.681381059967548, "learning_rate": 1.770442236988824e-06, "loss": 0.14335396885871887, "step": 2854 }, { "epoch": 2.255134281200632, "grad_norm": 18.45511369704355, "learning_rate": 1.7669352190875687e-06, "loss": 0.8193086385726929, "step": 2855 }, { "epoch": 2.2559241706161135, "grad_norm": 13.554836834181051, "learning_rate": 1.7634309324900872e-06, "loss": 0.43514060974121094, "step": 2856 }, { "epoch": 2.2567140600315954, "grad_norm": 7.454792364779305, "learning_rate": 1.7599293801568114e-06, "loss": 0.2814521789550781, "step": 2857 }, { "epoch": 2.257503949447077, "grad_norm": 10.368313203853804, "learning_rate": 1.7564305650458662e-06, "loss": 0.41086602210998535, "step": 2858 }, { "epoch": 2.258293838862559, "grad_norm": 13.204982194940747, "learning_rate": 1.7529344901130579e-06, "loss": 0.8798295259475708, "step": 2859 }, { "epoch": 2.259083728278041, "grad_norm": 7.7989794475969925, "learning_rate": 1.749441158311886e-06, "loss": 0.5868876576423645, "step": 2860 }, { "epoch": 2.259873617693523, "grad_norm": 11.119432491809754, "learning_rate": 1.7459505725935282e-06, "loss": 0.737459123134613, "step": 2861 }, { "epoch": 2.2606635071090047, "grad_norm": 8.550440775085379, "learning_rate": 1.7424627359068418e-06, "loss": 0.5273802280426025, "step": 2862 }, { "epoch": 2.2614533965244865, "grad_norm": 17.591370161982017, "learning_rate": 1.738977651198363e-06, "loss": 0.33475935459136963, "step": 2863 }, { "epoch": 2.2622432859399684, "grad_norm": 18.24717692489189, "learning_rate": 1.7354953214123033e-06, "loss": 0.6660683751106262, "step": 2864 }, { "epoch": 2.2630331753554502, "grad_norm": 18.92375097618639, "learning_rate": 1.732015749490546e-06, "loss": 0.5045152902603149, "step": 2865 }, { "epoch": 2.263823064770932, "grad_norm": 11.503325523793645, "learning_rate": 1.7285389383726448e-06, "loss": 0.4045574367046356, "step": 2866 }, { "epoch": 2.264612954186414, "grad_norm": 6.49265889702697, "learning_rate": 1.7250648909958218e-06, "loss": 0.15654566884040833, "step": 2867 }, { "epoch": 2.265402843601896, "grad_norm": 21.090448949983475, "learning_rate": 1.7215936102949626e-06, "loss": 0.48715391755104065, "step": 2868 }, { "epoch": 2.2661927330173777, "grad_norm": 16.416122390825365, "learning_rate": 1.7181250992026177e-06, "loss": 0.43794485926628113, "step": 2869 }, { "epoch": 2.2669826224328595, "grad_norm": 9.994750391150157, "learning_rate": 1.7146593606489958e-06, "loss": 0.24812400341033936, "step": 2870 }, { "epoch": 2.2677725118483414, "grad_norm": 12.269825935627136, "learning_rate": 1.7111963975619644e-06, "loss": 0.3631921410560608, "step": 2871 }, { "epoch": 2.2685624012638232, "grad_norm": 9.202619856134907, "learning_rate": 1.7077362128670471e-06, "loss": 0.402981162071228, "step": 2872 }, { "epoch": 2.269352290679305, "grad_norm": 15.450749001959284, "learning_rate": 1.7042788094874162e-06, "loss": 0.4597552418708801, "step": 2873 }, { "epoch": 2.270142180094787, "grad_norm": 14.523705053291398, "learning_rate": 1.7008241903439032e-06, "loss": 0.8005387783050537, "step": 2874 }, { "epoch": 2.2709320695102684, "grad_norm": 15.590177412312109, "learning_rate": 1.6973723583549773e-06, "loss": 0.9203214645385742, "step": 2875 }, { "epoch": 2.2717219589257502, "grad_norm": 10.511158290122776, "learning_rate": 1.693923316436758e-06, "loss": 0.4327046573162079, "step": 2876 }, { "epoch": 2.272511848341232, "grad_norm": 8.438221638698291, "learning_rate": 1.690477067503008e-06, "loss": 0.5756062269210815, "step": 2877 }, { "epoch": 2.273301737756714, "grad_norm": 10.785045606484534, "learning_rate": 1.6870336144651279e-06, "loss": 0.4346384108066559, "step": 2878 }, { "epoch": 2.274091627172196, "grad_norm": 23.099471736106224, "learning_rate": 1.6835929602321632e-06, "loss": 1.036636233329773, "step": 2879 }, { "epoch": 2.2748815165876777, "grad_norm": 9.814952316628542, "learning_rate": 1.6801551077107846e-06, "loss": 0.35150665044784546, "step": 2880 }, { "epoch": 2.2756714060031595, "grad_norm": 8.217909844120667, "learning_rate": 1.676720059805304e-06, "loss": 0.39682185649871826, "step": 2881 }, { "epoch": 2.2764612954186414, "grad_norm": 10.897341078417234, "learning_rate": 1.6732878194176583e-06, "loss": 0.7163082361221313, "step": 2882 }, { "epoch": 2.2772511848341233, "grad_norm": 9.957341822543633, "learning_rate": 1.6698583894474191e-06, "loss": 0.30133479833602905, "step": 2883 }, { "epoch": 2.278041074249605, "grad_norm": 13.035107378741438, "learning_rate": 1.6664317727917783e-06, "loss": 0.35737180709838867, "step": 2884 }, { "epoch": 2.278830963665087, "grad_norm": 7.354777374068423, "learning_rate": 1.6630079723455555e-06, "loss": 0.44452938437461853, "step": 2885 }, { "epoch": 2.279620853080569, "grad_norm": 15.178155573408157, "learning_rate": 1.6595869910011847e-06, "loss": 0.4684889614582062, "step": 2886 }, { "epoch": 2.2804107424960507, "grad_norm": 13.101785923414612, "learning_rate": 1.6561688316487218e-06, "loss": 0.5309923887252808, "step": 2887 }, { "epoch": 2.2812006319115326, "grad_norm": 18.864692186332565, "learning_rate": 1.652753497175843e-06, "loss": 0.44382137060165405, "step": 2888 }, { "epoch": 2.2819905213270144, "grad_norm": 11.720692846924454, "learning_rate": 1.649340990467832e-06, "loss": 0.5143547058105469, "step": 2889 }, { "epoch": 2.282780410742496, "grad_norm": 7.264023340638442, "learning_rate": 1.6459313144075879e-06, "loss": 0.5263423323631287, "step": 2890 }, { "epoch": 2.2835703001579777, "grad_norm": 10.926449502075325, "learning_rate": 1.642524471875611e-06, "loss": 0.39519965648651123, "step": 2891 }, { "epoch": 2.2843601895734595, "grad_norm": 6.753518451586659, "learning_rate": 1.6391204657500175e-06, "loss": 0.2852955758571625, "step": 2892 }, { "epoch": 2.2851500789889414, "grad_norm": 17.425208641245916, "learning_rate": 1.6357192989065224e-06, "loss": 0.6628504991531372, "step": 2893 }, { "epoch": 2.2859399684044233, "grad_norm": 8.821318692301833, "learning_rate": 1.632320974218442e-06, "loss": 0.3219633102416992, "step": 2894 }, { "epoch": 2.286729857819905, "grad_norm": 9.0269356351622, "learning_rate": 1.6289254945566924e-06, "loss": 0.4846106171607971, "step": 2895 }, { "epoch": 2.287519747235387, "grad_norm": 13.754083021538255, "learning_rate": 1.625532862789786e-06, "loss": 0.6081640720367432, "step": 2896 }, { "epoch": 2.288309636650869, "grad_norm": 6.98421162588746, "learning_rate": 1.622143081783829e-06, "loss": 0.3637077510356903, "step": 2897 }, { "epoch": 2.2890995260663507, "grad_norm": 8.83511177282514, "learning_rate": 1.6187561544025198e-06, "loss": 0.9372393488883972, "step": 2898 }, { "epoch": 2.2898894154818326, "grad_norm": 14.596564979975735, "learning_rate": 1.6153720835071456e-06, "loss": 0.5858408212661743, "step": 2899 }, { "epoch": 2.2906793048973144, "grad_norm": 8.954576114923759, "learning_rate": 1.6119908719565808e-06, "loss": 0.6942223310470581, "step": 2900 }, { "epoch": 2.2914691943127963, "grad_norm": 4.994809407224409, "learning_rate": 1.6086125226072841e-06, "loss": 0.10130809992551804, "step": 2901 }, { "epoch": 2.292259083728278, "grad_norm": 13.657220031311276, "learning_rate": 1.6052370383132959e-06, "loss": 0.8644411563873291, "step": 2902 }, { "epoch": 2.29304897314376, "grad_norm": 8.017431818886724, "learning_rate": 1.601864421926237e-06, "loss": 0.2293100655078888, "step": 2903 }, { "epoch": 2.293838862559242, "grad_norm": 8.496986168821188, "learning_rate": 1.5984946762953047e-06, "loss": 0.29841262102127075, "step": 2904 }, { "epoch": 2.2946287519747237, "grad_norm": 11.503821635042993, "learning_rate": 1.5951278042672703e-06, "loss": 0.2735556662082672, "step": 2905 }, { "epoch": 2.2954186413902056, "grad_norm": 12.231463222308765, "learning_rate": 1.5917638086864778e-06, "loss": 0.22586968541145325, "step": 2906 }, { "epoch": 2.2962085308056874, "grad_norm": 15.045555109566415, "learning_rate": 1.5884026923948465e-06, "loss": 0.6831101179122925, "step": 2907 }, { "epoch": 2.296998420221169, "grad_norm": 9.987448369916713, "learning_rate": 1.585044458231853e-06, "loss": 0.4950708746910095, "step": 2908 }, { "epoch": 2.2977883096366507, "grad_norm": 12.921655770502397, "learning_rate": 1.5816891090345465e-06, "loss": 0.4142148494720459, "step": 2909 }, { "epoch": 2.2985781990521326, "grad_norm": 11.818337429892228, "learning_rate": 1.5783366476375355e-06, "loss": 0.7360016703605652, "step": 2910 }, { "epoch": 2.2993680884676144, "grad_norm": 11.946060281453493, "learning_rate": 1.5749870768729942e-06, "loss": 0.43478429317474365, "step": 2911 }, { "epoch": 2.3001579778830963, "grad_norm": 12.943633330633444, "learning_rate": 1.5716403995706504e-06, "loss": 0.3442850112915039, "step": 2912 }, { "epoch": 2.300947867298578, "grad_norm": 13.412914057941535, "learning_rate": 1.5682966185577846e-06, "loss": 0.4648604989051819, "step": 2913 }, { "epoch": 2.30173775671406, "grad_norm": 7.870032588866806, "learning_rate": 1.564955736659236e-06, "loss": 0.25151491165161133, "step": 2914 }, { "epoch": 2.302527646129542, "grad_norm": 12.706146933959786, "learning_rate": 1.561617756697391e-06, "loss": 0.7445260882377625, "step": 2915 }, { "epoch": 2.3033175355450237, "grad_norm": 14.267775682697732, "learning_rate": 1.5582826814921898e-06, "loss": 0.4260343313217163, "step": 2916 }, { "epoch": 2.3041074249605056, "grad_norm": 12.34278465900189, "learning_rate": 1.5549505138611126e-06, "loss": 0.27285605669021606, "step": 2917 }, { "epoch": 2.3048973143759874, "grad_norm": 8.37869237695817, "learning_rate": 1.5516212566191874e-06, "loss": 0.24186520278453827, "step": 2918 }, { "epoch": 2.3056872037914693, "grad_norm": 16.50332542092082, "learning_rate": 1.5482949125789765e-06, "loss": 0.7350625395774841, "step": 2919 }, { "epoch": 2.306477093206951, "grad_norm": 9.451218272417687, "learning_rate": 1.5449714845505919e-06, "loss": 0.48754340410232544, "step": 2920 }, { "epoch": 2.307266982622433, "grad_norm": 8.596522267263161, "learning_rate": 1.5416509753416742e-06, "loss": 0.4277498126029968, "step": 2921 }, { "epoch": 2.308056872037915, "grad_norm": 9.543021614361946, "learning_rate": 1.5383333877574014e-06, "loss": 0.7987942099571228, "step": 2922 }, { "epoch": 2.3088467614533963, "grad_norm": 8.956197097997833, "learning_rate": 1.5350187246004827e-06, "loss": 0.3484036922454834, "step": 2923 }, { "epoch": 2.309636650868878, "grad_norm": 14.554160691726453, "learning_rate": 1.5317069886711527e-06, "loss": 0.262870728969574, "step": 2924 }, { "epoch": 2.31042654028436, "grad_norm": 10.028179610842129, "learning_rate": 1.5283981827671817e-06, "loss": 0.4316098093986511, "step": 2925 }, { "epoch": 2.311216429699842, "grad_norm": 8.395938728397983, "learning_rate": 1.525092309683857e-06, "loss": 0.29740965366363525, "step": 2926 }, { "epoch": 2.3120063191153237, "grad_norm": 13.787916527860657, "learning_rate": 1.5217893722139927e-06, "loss": 0.3893413841724396, "step": 2927 }, { "epoch": 2.3127962085308056, "grad_norm": 9.912191066457186, "learning_rate": 1.51848937314792e-06, "loss": 0.32815465331077576, "step": 2928 }, { "epoch": 2.3135860979462874, "grad_norm": 9.266231242165011, "learning_rate": 1.5151923152734899e-06, "loss": 0.3238118290901184, "step": 2929 }, { "epoch": 2.3143759873617693, "grad_norm": 13.526154892749354, "learning_rate": 1.511898201376067e-06, "loss": 0.47903206944465637, "step": 2930 }, { "epoch": 2.315165876777251, "grad_norm": 7.3178550699031915, "learning_rate": 1.5086070342385301e-06, "loss": 0.5913638472557068, "step": 2931 }, { "epoch": 2.315955766192733, "grad_norm": 10.488519945078615, "learning_rate": 1.5053188166412675e-06, "loss": 0.4839520752429962, "step": 2932 }, { "epoch": 2.316745655608215, "grad_norm": 8.255353237433672, "learning_rate": 1.5020335513621765e-06, "loss": 0.44816941022872925, "step": 2933 }, { "epoch": 2.3175355450236967, "grad_norm": 9.243776086558636, "learning_rate": 1.4987512411766597e-06, "loss": 0.2721986472606659, "step": 2934 }, { "epoch": 2.3183254344391786, "grad_norm": 10.746251103777091, "learning_rate": 1.4954718888576247e-06, "loss": 0.3570769131183624, "step": 2935 }, { "epoch": 2.3191153238546605, "grad_norm": 8.639831387338281, "learning_rate": 1.4921954971754783e-06, "loss": 0.3641059994697571, "step": 2936 }, { "epoch": 2.3199052132701423, "grad_norm": 11.092618448863458, "learning_rate": 1.4889220688981265e-06, "loss": 0.2935643196105957, "step": 2937 }, { "epoch": 2.320695102685624, "grad_norm": 12.23607825667408, "learning_rate": 1.4856516067909715e-06, "loss": 0.6410992741584778, "step": 2938 }, { "epoch": 2.321484992101106, "grad_norm": 16.623306809469454, "learning_rate": 1.4823841136169132e-06, "loss": 1.6279677152633667, "step": 2939 }, { "epoch": 2.322274881516588, "grad_norm": 12.421816386616667, "learning_rate": 1.479119592136341e-06, "loss": 0.5754894018173218, "step": 2940 }, { "epoch": 2.3230647709320698, "grad_norm": 8.991638214444569, "learning_rate": 1.4758580451071303e-06, "loss": 0.5960466861724854, "step": 2941 }, { "epoch": 2.323854660347551, "grad_norm": 12.162545193825485, "learning_rate": 1.4725994752846473e-06, "loss": 0.3044765889644623, "step": 2942 }, { "epoch": 2.324644549763033, "grad_norm": 19.440244075455958, "learning_rate": 1.4693438854217423e-06, "loss": 0.34671056270599365, "step": 2943 }, { "epoch": 2.325434439178515, "grad_norm": 11.430703439045205, "learning_rate": 1.4660912782687508e-06, "loss": 0.43449515104293823, "step": 2944 }, { "epoch": 2.3262243285939967, "grad_norm": 10.157383176145405, "learning_rate": 1.4628416565734859e-06, "loss": 0.3315383791923523, "step": 2945 }, { "epoch": 2.3270142180094786, "grad_norm": 17.3305490576594, "learning_rate": 1.4595950230812362e-06, "loss": 0.888778567314148, "step": 2946 }, { "epoch": 2.3278041074249605, "grad_norm": 16.32977159230478, "learning_rate": 1.4563513805347672e-06, "loss": 0.8644918203353882, "step": 2947 }, { "epoch": 2.3285939968404423, "grad_norm": 14.638072535644602, "learning_rate": 1.4531107316743231e-06, "loss": 0.47349095344543457, "step": 2948 }, { "epoch": 2.329383886255924, "grad_norm": 9.49689056502259, "learning_rate": 1.4498730792376126e-06, "loss": 0.5828062295913696, "step": 2949 }, { "epoch": 2.330173775671406, "grad_norm": 11.520314966686117, "learning_rate": 1.446638425959817e-06, "loss": 0.3956340551376343, "step": 2950 }, { "epoch": 2.330963665086888, "grad_norm": 8.269907045653584, "learning_rate": 1.4434067745735792e-06, "loss": 0.2273710072040558, "step": 2951 }, { "epoch": 2.3317535545023698, "grad_norm": 19.223537730273925, "learning_rate": 1.4401781278090077e-06, "loss": 0.2646605968475342, "step": 2952 }, { "epoch": 2.3325434439178516, "grad_norm": 7.308582717188896, "learning_rate": 1.436952488393678e-06, "loss": 0.25829195976257324, "step": 2953 }, { "epoch": 2.3333333333333335, "grad_norm": 9.018243729685395, "learning_rate": 1.4337298590526193e-06, "loss": 0.6489487290382385, "step": 2954 }, { "epoch": 2.3341232227488153, "grad_norm": 11.471038810226908, "learning_rate": 1.4305102425083195e-06, "loss": 0.23926126956939697, "step": 2955 }, { "epoch": 2.3349131121642968, "grad_norm": 8.516369253631089, "learning_rate": 1.4272936414807215e-06, "loss": 0.7570828199386597, "step": 2956 }, { "epoch": 2.3357030015797786, "grad_norm": 9.3142967522863, "learning_rate": 1.4240800586872194e-06, "loss": 0.9574933052062988, "step": 2957 }, { "epoch": 2.3364928909952605, "grad_norm": 18.891161628854636, "learning_rate": 1.4208694968426594e-06, "loss": 1.0689260959625244, "step": 2958 }, { "epoch": 2.3372827804107423, "grad_norm": 11.717390658258914, "learning_rate": 1.4176619586593343e-06, "loss": 0.5372721552848816, "step": 2959 }, { "epoch": 2.338072669826224, "grad_norm": 12.175691953414255, "learning_rate": 1.4144574468469836e-06, "loss": 1.1094560623168945, "step": 2960 }, { "epoch": 2.338862559241706, "grad_norm": 10.010738282134813, "learning_rate": 1.4112559641127882e-06, "loss": 0.3446424603462219, "step": 2961 }, { "epoch": 2.339652448657188, "grad_norm": 13.582629438685526, "learning_rate": 1.4080575131613727e-06, "loss": 0.3784876763820648, "step": 2962 }, { "epoch": 2.3404423380726698, "grad_norm": 12.572883408640878, "learning_rate": 1.4048620966947979e-06, "loss": 0.206849604845047, "step": 2963 }, { "epoch": 2.3412322274881516, "grad_norm": 11.77936634392298, "learning_rate": 1.4016697174125627e-06, "loss": 0.41492050886154175, "step": 2964 }, { "epoch": 2.3420221169036335, "grad_norm": 13.508111308050823, "learning_rate": 1.3984803780115997e-06, "loss": 0.5502463579177856, "step": 2965 }, { "epoch": 2.3428120063191153, "grad_norm": 9.14917705724563, "learning_rate": 1.3952940811862715e-06, "loss": 0.31409573554992676, "step": 2966 }, { "epoch": 2.343601895734597, "grad_norm": 12.299355989140265, "learning_rate": 1.3921108296283765e-06, "loss": 0.562318742275238, "step": 2967 }, { "epoch": 2.344391785150079, "grad_norm": 10.934243215627218, "learning_rate": 1.3889306260271318e-06, "loss": 0.4536178708076477, "step": 2968 }, { "epoch": 2.345181674565561, "grad_norm": 8.937500050751812, "learning_rate": 1.3857534730691857e-06, "loss": 0.34480780363082886, "step": 2969 }, { "epoch": 2.345971563981043, "grad_norm": 14.281522624319098, "learning_rate": 1.3825793734386074e-06, "loss": 1.0019593238830566, "step": 2970 }, { "epoch": 2.3467614533965246, "grad_norm": 12.232172084092497, "learning_rate": 1.3794083298168848e-06, "loss": 0.8148961663246155, "step": 2971 }, { "epoch": 2.3475513428120065, "grad_norm": 14.943915817149255, "learning_rate": 1.376240344882931e-06, "loss": 0.294752836227417, "step": 2972 }, { "epoch": 2.3483412322274884, "grad_norm": 10.941172869384477, "learning_rate": 1.3730754213130648e-06, "loss": 0.793616533279419, "step": 2973 }, { "epoch": 2.34913112164297, "grad_norm": 19.66512269635939, "learning_rate": 1.3699135617810266e-06, "loss": 0.858728289604187, "step": 2974 }, { "epoch": 2.3499210110584516, "grad_norm": 12.311189719186588, "learning_rate": 1.3667547689579636e-06, "loss": 0.5237981677055359, "step": 2975 }, { "epoch": 2.3507109004739335, "grad_norm": 27.368898959383774, "learning_rate": 1.3635990455124371e-06, "loss": 0.45691031217575073, "step": 2976 }, { "epoch": 2.3515007898894154, "grad_norm": 12.118450870594108, "learning_rate": 1.3604463941104119e-06, "loss": 0.3164505660533905, "step": 2977 }, { "epoch": 2.352290679304897, "grad_norm": 10.789370739328524, "learning_rate": 1.3572968174152595e-06, "loss": 0.6960369348526001, "step": 2978 }, { "epoch": 2.353080568720379, "grad_norm": 8.417646546087006, "learning_rate": 1.3541503180877497e-06, "loss": 0.45459938049316406, "step": 2979 }, { "epoch": 2.353870458135861, "grad_norm": 10.279601142928406, "learning_rate": 1.3510068987860554e-06, "loss": 0.38531941175460815, "step": 2980 }, { "epoch": 2.354660347551343, "grad_norm": 10.475719668500435, "learning_rate": 1.3478665621657505e-06, "loss": 0.3190561532974243, "step": 2981 }, { "epoch": 2.3554502369668247, "grad_norm": 12.300957016080215, "learning_rate": 1.3447293108798005e-06, "loss": 0.6847068667411804, "step": 2982 }, { "epoch": 2.3562401263823065, "grad_norm": 18.557938231221407, "learning_rate": 1.3415951475785666e-06, "loss": 0.6536878347396851, "step": 2983 }, { "epoch": 2.3570300157977884, "grad_norm": 9.51452055614094, "learning_rate": 1.338464074909796e-06, "loss": 1.2326545715332031, "step": 2984 }, { "epoch": 2.3578199052132702, "grad_norm": 7.117469162686578, "learning_rate": 1.3353360955186346e-06, "loss": 0.2746868431568146, "step": 2985 }, { "epoch": 2.358609794628752, "grad_norm": 8.473831273234493, "learning_rate": 1.3322112120476066e-06, "loss": 0.7424836754798889, "step": 2986 }, { "epoch": 2.359399684044234, "grad_norm": 8.174865295399881, "learning_rate": 1.3290894271366251e-06, "loss": 0.26962894201278687, "step": 2987 }, { "epoch": 2.360189573459716, "grad_norm": 9.674308066012069, "learning_rate": 1.3259707434229834e-06, "loss": 0.24160385131835938, "step": 2988 }, { "epoch": 2.3609794628751977, "grad_norm": 15.484285096363958, "learning_rate": 1.3228551635413567e-06, "loss": 0.2914643883705139, "step": 2989 }, { "epoch": 2.361769352290679, "grad_norm": 6.8344064852954425, "learning_rate": 1.3197426901237965e-06, "loss": 0.6412686109542847, "step": 2990 }, { "epoch": 2.362559241706161, "grad_norm": 10.510405239917262, "learning_rate": 1.3166333257997305e-06, "loss": 0.23676377534866333, "step": 2991 }, { "epoch": 2.363349131121643, "grad_norm": 18.178644105487813, "learning_rate": 1.3135270731959599e-06, "loss": 0.6876839995384216, "step": 2992 }, { "epoch": 2.3641390205371247, "grad_norm": 14.269241469381564, "learning_rate": 1.3104239349366577e-06, "loss": 0.4241482615470886, "step": 2993 }, { "epoch": 2.3649289099526065, "grad_norm": 12.211602110609782, "learning_rate": 1.3073239136433651e-06, "loss": 0.5298961997032166, "step": 2994 }, { "epoch": 2.3657187993680884, "grad_norm": 8.26732985074683, "learning_rate": 1.3042270119349903e-06, "loss": 0.5399200320243835, "step": 2995 }, { "epoch": 2.3665086887835702, "grad_norm": 12.593489559566338, "learning_rate": 1.3011332324278054e-06, "loss": 0.28332608938217163, "step": 2996 }, { "epoch": 2.367298578199052, "grad_norm": 13.460677136621092, "learning_rate": 1.2980425777354466e-06, "loss": 0.8239960670471191, "step": 2997 }, { "epoch": 2.368088467614534, "grad_norm": 11.353725107540342, "learning_rate": 1.2949550504689084e-06, "loss": 0.3334651589393616, "step": 2998 }, { "epoch": 2.368878357030016, "grad_norm": 10.254492835832023, "learning_rate": 1.2918706532365427e-06, "loss": 0.3909056782722473, "step": 2999 }, { "epoch": 2.3696682464454977, "grad_norm": 14.050213628495184, "learning_rate": 1.2887893886440634e-06, "loss": 0.42461708188056946, "step": 3000 }, { "epoch": 2.3704581358609795, "grad_norm": 9.514180124670807, "learning_rate": 1.2857112592945277e-06, "loss": 0.3221348822116852, "step": 3001 }, { "epoch": 2.3712480252764614, "grad_norm": 13.534122595465016, "learning_rate": 1.2826362677883509e-06, "loss": 0.31083306670188904, "step": 3002 }, { "epoch": 2.3720379146919433, "grad_norm": 10.83312836661239, "learning_rate": 1.279564416723295e-06, "loss": 0.6842789649963379, "step": 3003 }, { "epoch": 2.372827804107425, "grad_norm": 13.613162216619145, "learning_rate": 1.2764957086944729e-06, "loss": 0.2826170325279236, "step": 3004 }, { "epoch": 2.373617693522907, "grad_norm": 7.054250032981525, "learning_rate": 1.2734301462943393e-06, "loss": 0.3503812253475189, "step": 3005 }, { "epoch": 2.374407582938389, "grad_norm": 23.530290090188814, "learning_rate": 1.2703677321126878e-06, "loss": 0.9787487983703613, "step": 3006 }, { "epoch": 2.3751974723538707, "grad_norm": 9.116900547516696, "learning_rate": 1.2673084687366577e-06, "loss": 0.5961554050445557, "step": 3007 }, { "epoch": 2.375987361769352, "grad_norm": 12.370457479829778, "learning_rate": 1.264252358750724e-06, "loss": 0.4279392957687378, "step": 3008 }, { "epoch": 2.376777251184834, "grad_norm": 16.664525922372817, "learning_rate": 1.2611994047367004e-06, "loss": 2.003368854522705, "step": 3009 }, { "epoch": 2.377567140600316, "grad_norm": 9.89227361650676, "learning_rate": 1.2581496092737315e-06, "loss": 0.36015745997428894, "step": 3010 }, { "epoch": 2.3783570300157977, "grad_norm": 7.456303625403205, "learning_rate": 1.2551029749382966e-06, "loss": 0.38664817810058594, "step": 3011 }, { "epoch": 2.3791469194312795, "grad_norm": 10.16486281921813, "learning_rate": 1.2520595043041967e-06, "loss": 0.1998920887708664, "step": 3012 }, { "epoch": 2.3799368088467614, "grad_norm": 12.477795753447527, "learning_rate": 1.249019199942571e-06, "loss": 0.5245856046676636, "step": 3013 }, { "epoch": 2.3807266982622433, "grad_norm": 15.497688569467634, "learning_rate": 1.2459820644218772e-06, "loss": 0.5648780465126038, "step": 3014 }, { "epoch": 2.381516587677725, "grad_norm": 9.766778610490318, "learning_rate": 1.242948100307898e-06, "loss": 0.3348008990287781, "step": 3015 }, { "epoch": 2.382306477093207, "grad_norm": 9.459152445457095, "learning_rate": 1.2399173101637362e-06, "loss": 0.28159070014953613, "step": 3016 }, { "epoch": 2.383096366508689, "grad_norm": 10.550046979381259, "learning_rate": 1.23688969654981e-06, "loss": 0.5117573738098145, "step": 3017 }, { "epoch": 2.3838862559241707, "grad_norm": 15.689874055565582, "learning_rate": 1.2338652620238617e-06, "loss": 0.40390482544898987, "step": 3018 }, { "epoch": 2.3846761453396526, "grad_norm": 13.629010440932046, "learning_rate": 1.2308440091409418e-06, "loss": 0.255649209022522, "step": 3019 }, { "epoch": 2.3854660347551344, "grad_norm": 12.962233890538123, "learning_rate": 1.2278259404534148e-06, "loss": 0.7356714010238647, "step": 3020 }, { "epoch": 2.3862559241706163, "grad_norm": 12.685586645489847, "learning_rate": 1.2248110585109546e-06, "loss": 0.30903008580207825, "step": 3021 }, { "epoch": 2.387045813586098, "grad_norm": 9.857261027285547, "learning_rate": 1.2217993658605442e-06, "loss": 0.19482699036598206, "step": 3022 }, { "epoch": 2.3878357030015795, "grad_norm": 10.203386869299976, "learning_rate": 1.2187908650464713e-06, "loss": 0.777467668056488, "step": 3023 }, { "epoch": 2.3886255924170614, "grad_norm": 13.960859010293253, "learning_rate": 1.2157855586103268e-06, "loss": 0.42737501859664917, "step": 3024 }, { "epoch": 2.3894154818325433, "grad_norm": 9.02890660772045, "learning_rate": 1.212783449091004e-06, "loss": 0.19785018265247345, "step": 3025 }, { "epoch": 2.390205371248025, "grad_norm": 16.20027560463624, "learning_rate": 1.2097845390246944e-06, "loss": 0.4012932777404785, "step": 3026 }, { "epoch": 2.390995260663507, "grad_norm": 13.935718162104166, "learning_rate": 1.2067888309448872e-06, "loss": 1.0133466720581055, "step": 3027 }, { "epoch": 2.391785150078989, "grad_norm": 17.86275453891239, "learning_rate": 1.2037963273823667e-06, "loss": 0.7729724049568176, "step": 3028 }, { "epoch": 2.3925750394944707, "grad_norm": 19.729430525861485, "learning_rate": 1.2008070308652097e-06, "loss": 0.6262521147727966, "step": 3029 }, { "epoch": 2.3933649289099526, "grad_norm": 9.043064178522485, "learning_rate": 1.1978209439187843e-06, "loss": 0.33055561780929565, "step": 3030 }, { "epoch": 2.3941548183254344, "grad_norm": 10.08632808389374, "learning_rate": 1.194838069065744e-06, "loss": 0.3754257559776306, "step": 3031 }, { "epoch": 2.3949447077409163, "grad_norm": 16.377512511153483, "learning_rate": 1.191858408826036e-06, "loss": 0.7418103218078613, "step": 3032 }, { "epoch": 2.395734597156398, "grad_norm": 13.891209536245656, "learning_rate": 1.1888819657168832e-06, "loss": 0.6630780100822449, "step": 3033 }, { "epoch": 2.39652448657188, "grad_norm": 9.70934891051542, "learning_rate": 1.185908742252796e-06, "loss": 0.6031774878501892, "step": 3034 }, { "epoch": 2.397314375987362, "grad_norm": 7.781623949513264, "learning_rate": 1.1829387409455628e-06, "loss": 0.3950078785419464, "step": 3035 }, { "epoch": 2.3981042654028437, "grad_norm": 14.63680014242443, "learning_rate": 1.1799719643042494e-06, "loss": 1.1075937747955322, "step": 3036 }, { "epoch": 2.3988941548183256, "grad_norm": 9.35495088180136, "learning_rate": 1.1770084148352013e-06, "loss": 0.5169080495834351, "step": 3037 }, { "epoch": 2.3996840442338074, "grad_norm": 12.0333389179042, "learning_rate": 1.1740480950420346e-06, "loss": 0.6824804544448853, "step": 3038 }, { "epoch": 2.4004739336492893, "grad_norm": 10.918655037249678, "learning_rate": 1.1710910074256353e-06, "loss": 0.21143901348114014, "step": 3039 }, { "epoch": 2.401263823064771, "grad_norm": 12.024208404396846, "learning_rate": 1.1681371544841596e-06, "loss": 0.22546377778053284, "step": 3040 }, { "epoch": 2.402053712480253, "grad_norm": 7.956119102031276, "learning_rate": 1.165186538713035e-06, "loss": 0.4426957070827484, "step": 3041 }, { "epoch": 2.4028436018957344, "grad_norm": 14.526088548416308, "learning_rate": 1.1622391626049512e-06, "loss": 0.46725398302078247, "step": 3042 }, { "epoch": 2.4036334913112163, "grad_norm": 15.015374102906868, "learning_rate": 1.1592950286498617e-06, "loss": 0.3960593044757843, "step": 3043 }, { "epoch": 2.404423380726698, "grad_norm": 8.595894150813733, "learning_rate": 1.156354139334978e-06, "loss": 0.34023603796958923, "step": 3044 }, { "epoch": 2.40521327014218, "grad_norm": 10.668250389515402, "learning_rate": 1.153416497144773e-06, "loss": 0.29296159744262695, "step": 3045 }, { "epoch": 2.406003159557662, "grad_norm": 12.422225116517867, "learning_rate": 1.1504821045609793e-06, "loss": 0.3671707510948181, "step": 3046 }, { "epoch": 2.4067930489731437, "grad_norm": 10.146132230382701, "learning_rate": 1.1475509640625803e-06, "loss": 0.3341038227081299, "step": 3047 }, { "epoch": 2.4075829383886256, "grad_norm": 15.595249640220638, "learning_rate": 1.1446230781258126e-06, "loss": 0.4406832456588745, "step": 3048 }, { "epoch": 2.4083728278041074, "grad_norm": 15.105385899136605, "learning_rate": 1.1416984492241651e-06, "loss": 0.3876189589500427, "step": 3049 }, { "epoch": 2.4091627172195893, "grad_norm": 12.938838417300977, "learning_rate": 1.138777079828372e-06, "loss": 0.3145609498023987, "step": 3050 }, { "epoch": 2.409952606635071, "grad_norm": 8.358585121545957, "learning_rate": 1.1358589724064172e-06, "loss": 0.3923751711845398, "step": 3051 }, { "epoch": 2.410742496050553, "grad_norm": 12.197191053772348, "learning_rate": 1.1329441294235271e-06, "loss": 0.4876922369003296, "step": 3052 }, { "epoch": 2.411532385466035, "grad_norm": 22.74826926426158, "learning_rate": 1.1300325533421708e-06, "loss": 1.1759089231491089, "step": 3053 }, { "epoch": 2.4123222748815167, "grad_norm": 9.507426051564869, "learning_rate": 1.1271242466220566e-06, "loss": 0.7912863492965698, "step": 3054 }, { "epoch": 2.4131121642969986, "grad_norm": 28.164313247406366, "learning_rate": 1.1242192117201329e-06, "loss": 2.359377384185791, "step": 3055 }, { "epoch": 2.41390205371248, "grad_norm": 6.939852525178187, "learning_rate": 1.1213174510905818e-06, "loss": 0.1508610099554062, "step": 3056 }, { "epoch": 2.414691943127962, "grad_norm": 8.079273130567692, "learning_rate": 1.1184189671848205e-06, "loss": 0.26179447770118713, "step": 3057 }, { "epoch": 2.4154818325434437, "grad_norm": 8.63717491884411, "learning_rate": 1.1155237624514975e-06, "loss": 0.3997868001461029, "step": 3058 }, { "epoch": 2.4162717219589256, "grad_norm": 12.57415659403395, "learning_rate": 1.1126318393364905e-06, "loss": 0.31383663415908813, "step": 3059 }, { "epoch": 2.4170616113744074, "grad_norm": 10.339245467585227, "learning_rate": 1.109743200282909e-06, "loss": 0.3091076612472534, "step": 3060 }, { "epoch": 2.4178515007898893, "grad_norm": 9.227956180667832, "learning_rate": 1.106857847731081e-06, "loss": 0.28240686655044556, "step": 3061 }, { "epoch": 2.418641390205371, "grad_norm": 14.195925477576337, "learning_rate": 1.1039757841185638e-06, "loss": 0.58428555727005, "step": 3062 }, { "epoch": 2.419431279620853, "grad_norm": 12.117805874205489, "learning_rate": 1.1010970118801335e-06, "loss": 0.36473608016967773, "step": 3063 }, { "epoch": 2.420221169036335, "grad_norm": 8.135434340440062, "learning_rate": 1.0982215334477852e-06, "loss": 0.5204439163208008, "step": 3064 }, { "epoch": 2.4210110584518167, "grad_norm": 11.478951662361853, "learning_rate": 1.0953493512507369e-06, "loss": 0.7073631882667542, "step": 3065 }, { "epoch": 2.4218009478672986, "grad_norm": 8.874708226579822, "learning_rate": 1.0924804677154132e-06, "loss": 0.3390922546386719, "step": 3066 }, { "epoch": 2.4225908372827805, "grad_norm": 8.771829165557099, "learning_rate": 1.0896148852654576e-06, "loss": 0.28562629222869873, "step": 3067 }, { "epoch": 2.4233807266982623, "grad_norm": 12.196152581610507, "learning_rate": 1.0867526063217225e-06, "loss": 0.7520745396614075, "step": 3068 }, { "epoch": 2.424170616113744, "grad_norm": 15.941013046283597, "learning_rate": 1.0838936333022732e-06, "loss": 0.7924416661262512, "step": 3069 }, { "epoch": 2.424960505529226, "grad_norm": 16.208712282712163, "learning_rate": 1.0810379686223782e-06, "loss": 0.4997054934501648, "step": 3070 }, { "epoch": 2.425750394944708, "grad_norm": 12.626153196177519, "learning_rate": 1.0781856146945135e-06, "loss": 0.31034407019615173, "step": 3071 }, { "epoch": 2.4265402843601898, "grad_norm": 13.13529853421417, "learning_rate": 1.075336573928355e-06, "loss": 1.2663923501968384, "step": 3072 }, { "epoch": 2.4273301737756716, "grad_norm": 11.619655409461624, "learning_rate": 1.0724908487307812e-06, "loss": 0.4666603207588196, "step": 3073 }, { "epoch": 2.4281200631911535, "grad_norm": 11.29095734565805, "learning_rate": 1.0696484415058732e-06, "loss": 0.6091010570526123, "step": 3074 }, { "epoch": 2.428909952606635, "grad_norm": 12.496992069841568, "learning_rate": 1.066809354654904e-06, "loss": 0.3018745183944702, "step": 3075 }, { "epoch": 2.4296998420221168, "grad_norm": 10.57772764970064, "learning_rate": 1.063973590576346e-06, "loss": 0.7717863917350769, "step": 3076 }, { "epoch": 2.4304897314375986, "grad_norm": 14.741370815856945, "learning_rate": 1.0611411516658566e-06, "loss": 0.47512930631637573, "step": 3077 }, { "epoch": 2.4312796208530805, "grad_norm": 14.21167289494127, "learning_rate": 1.0583120403162943e-06, "loss": 0.6081647872924805, "step": 3078 }, { "epoch": 2.4320695102685623, "grad_norm": 9.193770485318598, "learning_rate": 1.0554862589177007e-06, "loss": 0.34148019552230835, "step": 3079 }, { "epoch": 2.432859399684044, "grad_norm": 9.062778818641565, "learning_rate": 1.0526638098573045e-06, "loss": 0.3882153630256653, "step": 3080 }, { "epoch": 2.433649289099526, "grad_norm": 18.929160213629444, "learning_rate": 1.0498446955195202e-06, "loss": 0.3846644163131714, "step": 3081 }, { "epoch": 2.434439178515008, "grad_norm": 8.998677826427556, "learning_rate": 1.047028918285945e-06, "loss": 0.3000609576702118, "step": 3082 }, { "epoch": 2.4352290679304898, "grad_norm": 10.101377476509365, "learning_rate": 1.0442164805353565e-06, "loss": 0.3316442370414734, "step": 3083 }, { "epoch": 2.4360189573459716, "grad_norm": 15.80218064139609, "learning_rate": 1.0414073846437122e-06, "loss": 0.15290002524852753, "step": 3084 }, { "epoch": 2.4368088467614535, "grad_norm": 9.603248142881998, "learning_rate": 1.0386016329841448e-06, "loss": 0.30186790227890015, "step": 3085 }, { "epoch": 2.4375987361769353, "grad_norm": 14.04175823933125, "learning_rate": 1.0357992279269623e-06, "loss": 0.4339219033718109, "step": 3086 }, { "epoch": 2.438388625592417, "grad_norm": 18.009198275639264, "learning_rate": 1.033000171839646e-06, "loss": 1.9742562770843506, "step": 3087 }, { "epoch": 2.439178515007899, "grad_norm": 22.24916433433492, "learning_rate": 1.0302044670868483e-06, "loss": 0.504414439201355, "step": 3088 }, { "epoch": 2.4399684044233805, "grad_norm": 10.361343950481873, "learning_rate": 1.027412116030389e-06, "loss": 1.0488673448562622, "step": 3089 }, { "epoch": 2.4407582938388623, "grad_norm": 10.797759458558541, "learning_rate": 1.0246231210292557e-06, "loss": 0.3521267771720886, "step": 3090 }, { "epoch": 2.441548183254344, "grad_norm": 12.14235046863475, "learning_rate": 1.0218374844396011e-06, "loss": 0.3208717703819275, "step": 3091 }, { "epoch": 2.442338072669826, "grad_norm": 14.3011282222263, "learning_rate": 1.0190552086147393e-06, "loss": 0.7246259450912476, "step": 3092 }, { "epoch": 2.443127962085308, "grad_norm": 14.472676250973395, "learning_rate": 1.0162762959051464e-06, "loss": 0.3144262433052063, "step": 3093 }, { "epoch": 2.4439178515007898, "grad_norm": 8.531330890483545, "learning_rate": 1.013500748658457e-06, "loss": 0.4461020231246948, "step": 3094 }, { "epoch": 2.4447077409162716, "grad_norm": 14.749179854381705, "learning_rate": 1.010728569219463e-06, "loss": 0.37278565764427185, "step": 3095 }, { "epoch": 2.4454976303317535, "grad_norm": 9.553911314657903, "learning_rate": 1.0079597599301088e-06, "loss": 0.5785685777664185, "step": 3096 }, { "epoch": 2.4462875197472354, "grad_norm": 11.059506181609464, "learning_rate": 1.0051943231294965e-06, "loss": 1.0506287813186646, "step": 3097 }, { "epoch": 2.447077409162717, "grad_norm": 15.548120320030591, "learning_rate": 1.0024322611538762e-06, "loss": 0.4273751974105835, "step": 3098 }, { "epoch": 2.447867298578199, "grad_norm": 9.759199951892318, "learning_rate": 9.996735763366444e-07, "loss": 0.37685832381248474, "step": 3099 }, { "epoch": 2.448657187993681, "grad_norm": 11.701020741919134, "learning_rate": 9.96918271008348e-07, "loss": 0.4007868766784668, "step": 3100 }, { "epoch": 2.449447077409163, "grad_norm": 11.350172953468489, "learning_rate": 9.941663474966772e-07, "loss": 0.5525588989257812, "step": 3101 }, { "epoch": 2.4502369668246446, "grad_norm": 15.30001249449009, "learning_rate": 9.914178081264685e-07, "loss": 0.6951796412467957, "step": 3102 }, { "epoch": 2.4510268562401265, "grad_norm": 7.995616395973255, "learning_rate": 9.886726552196974e-07, "loss": 0.47238144278526306, "step": 3103 }, { "epoch": 2.4518167456556084, "grad_norm": 10.984193552146008, "learning_rate": 9.859308910954745e-07, "loss": 0.32252442836761475, "step": 3104 }, { "epoch": 2.4526066350710902, "grad_norm": 8.04105675908032, "learning_rate": 9.831925180700525e-07, "loss": 0.21019114553928375, "step": 3105 }, { "epoch": 2.453396524486572, "grad_norm": 27.322091310870565, "learning_rate": 9.804575384568194e-07, "loss": 1.3946754932403564, "step": 3106 }, { "epoch": 2.454186413902054, "grad_norm": 8.839342829542206, "learning_rate": 9.777259545662944e-07, "loss": 0.5038160085678101, "step": 3107 }, { "epoch": 2.4549763033175354, "grad_norm": 21.324783217152557, "learning_rate": 9.749977687061279e-07, "loss": 0.518517017364502, "step": 3108 }, { "epoch": 2.455766192733017, "grad_norm": 8.481164678205802, "learning_rate": 9.722729831811007e-07, "loss": 0.8147182464599609, "step": 3109 }, { "epoch": 2.456556082148499, "grad_norm": 6.780024339611966, "learning_rate": 9.695516002931204e-07, "loss": 0.3741002380847931, "step": 3110 }, { "epoch": 2.457345971563981, "grad_norm": 11.801344448984926, "learning_rate": 9.668336223412207e-07, "loss": 0.3355671763420105, "step": 3111 }, { "epoch": 2.458135860979463, "grad_norm": 18.426072152940076, "learning_rate": 9.641190516215583e-07, "loss": 0.5405136346817017, "step": 3112 }, { "epoch": 2.4589257503949447, "grad_norm": 8.59802906534028, "learning_rate": 9.614078904274105e-07, "loss": 0.4377972483634949, "step": 3113 }, { "epoch": 2.4597156398104265, "grad_norm": 14.687076152913516, "learning_rate": 9.587001410491764e-07, "loss": 0.46500271558761597, "step": 3114 }, { "epoch": 2.4605055292259084, "grad_norm": 11.107539453005398, "learning_rate": 9.559958057743712e-07, "loss": 0.5390537977218628, "step": 3115 }, { "epoch": 2.4612954186413902, "grad_norm": 16.500533704000823, "learning_rate": 9.532948868876258e-07, "loss": 0.21158595383167267, "step": 3116 }, { "epoch": 2.462085308056872, "grad_norm": 13.41292359415877, "learning_rate": 9.505973866706858e-07, "loss": 0.5755499601364136, "step": 3117 }, { "epoch": 2.462875197472354, "grad_norm": 21.715368481608397, "learning_rate": 9.47903307402408e-07, "loss": 1.7295933961868286, "step": 3118 }, { "epoch": 2.463665086887836, "grad_norm": 16.12344079053901, "learning_rate": 9.452126513587601e-07, "loss": 0.6120498180389404, "step": 3119 }, { "epoch": 2.4644549763033177, "grad_norm": 6.872808233122644, "learning_rate": 9.42525420812816e-07, "loss": 0.20094534754753113, "step": 3120 }, { "epoch": 2.4652448657187995, "grad_norm": 13.205846064827124, "learning_rate": 9.398416180347581e-07, "loss": 0.3877865672111511, "step": 3121 }, { "epoch": 2.4660347551342814, "grad_norm": 32.63764306543489, "learning_rate": 9.371612452918711e-07, "loss": 1.0538610219955444, "step": 3122 }, { "epoch": 2.466824644549763, "grad_norm": 10.156083117031175, "learning_rate": 9.34484304848543e-07, "loss": 0.6839322447776794, "step": 3123 }, { "epoch": 2.4676145339652447, "grad_norm": 10.388055079887849, "learning_rate": 9.318107989662611e-07, "loss": 0.19889391958713531, "step": 3124 }, { "epoch": 2.4684044233807265, "grad_norm": 7.682004289712716, "learning_rate": 9.291407299036148e-07, "loss": 0.29008305072784424, "step": 3125 }, { "epoch": 2.4691943127962084, "grad_norm": 8.701364307694792, "learning_rate": 9.264740999162836e-07, "loss": 0.28695201873779297, "step": 3126 }, { "epoch": 2.4699842022116902, "grad_norm": 17.465630884728238, "learning_rate": 9.238109112570475e-07, "loss": 0.29782527685165405, "step": 3127 }, { "epoch": 2.470774091627172, "grad_norm": 8.305085250067776, "learning_rate": 9.21151166175776e-07, "loss": 0.21786969900131226, "step": 3128 }, { "epoch": 2.471563981042654, "grad_norm": 9.419126772200544, "learning_rate": 9.184948669194299e-07, "loss": 0.6029007434844971, "step": 3129 }, { "epoch": 2.472353870458136, "grad_norm": 20.242949144706834, "learning_rate": 9.158420157320613e-07, "loss": 0.539847731590271, "step": 3130 }, { "epoch": 2.4731437598736177, "grad_norm": 14.865152633768327, "learning_rate": 9.131926148548087e-07, "loss": 0.6652476787567139, "step": 3131 }, { "epoch": 2.4739336492890995, "grad_norm": 10.195973961976483, "learning_rate": 9.105466665258916e-07, "loss": 0.6767930388450623, "step": 3132 }, { "epoch": 2.4747235387045814, "grad_norm": 12.913760166214779, "learning_rate": 9.07904172980616e-07, "loss": 0.30187326669692993, "step": 3133 }, { "epoch": 2.4755134281200633, "grad_norm": 7.210724493670894, "learning_rate": 9.052651364513709e-07, "loss": 0.2819286584854126, "step": 3134 }, { "epoch": 2.476303317535545, "grad_norm": 9.694770018852635, "learning_rate": 9.026295591676232e-07, "loss": 1.3492515087127686, "step": 3135 }, { "epoch": 2.477093206951027, "grad_norm": 10.663258311074385, "learning_rate": 8.999974433559172e-07, "loss": 0.6058721542358398, "step": 3136 }, { "epoch": 2.477883096366509, "grad_norm": 8.609940369345201, "learning_rate": 8.973687912398698e-07, "loss": 0.3588424324989319, "step": 3137 }, { "epoch": 2.4786729857819907, "grad_norm": 10.99741800368398, "learning_rate": 8.947436050401792e-07, "loss": 0.9377810955047607, "step": 3138 }, { "epoch": 2.4794628751974725, "grad_norm": 13.297554259312957, "learning_rate": 8.92121886974609e-07, "loss": 0.3509410619735718, "step": 3139 }, { "epoch": 2.4802527646129544, "grad_norm": 25.874105611022717, "learning_rate": 8.895036392579965e-07, "loss": 1.956540822982788, "step": 3140 }, { "epoch": 2.4810426540284363, "grad_norm": 13.02959939002725, "learning_rate": 8.868888641022449e-07, "loss": 0.32808127999305725, "step": 3141 }, { "epoch": 2.4818325434439177, "grad_norm": 22.382659848118195, "learning_rate": 8.842775637163259e-07, "loss": 1.4966964721679688, "step": 3142 }, { "epoch": 2.4826224328593995, "grad_norm": 6.597640303742646, "learning_rate": 8.816697403062736e-07, "loss": 0.2421848326921463, "step": 3143 }, { "epoch": 2.4834123222748814, "grad_norm": 10.171804451221856, "learning_rate": 8.790653960751861e-07, "loss": 0.3181907534599304, "step": 3144 }, { "epoch": 2.4842022116903633, "grad_norm": 12.678509259303947, "learning_rate": 8.764645332232225e-07, "loss": 2.06329345703125, "step": 3145 }, { "epoch": 2.484992101105845, "grad_norm": 8.062598004826272, "learning_rate": 8.738671539476001e-07, "loss": 0.40570273995399475, "step": 3146 }, { "epoch": 2.485781990521327, "grad_norm": 8.067583181720805, "learning_rate": 8.712732604425933e-07, "loss": 0.2615022361278534, "step": 3147 }, { "epoch": 2.486571879936809, "grad_norm": 11.514174208308086, "learning_rate": 8.686828548995318e-07, "loss": 0.3940156102180481, "step": 3148 }, { "epoch": 2.4873617693522907, "grad_norm": 13.106691868280555, "learning_rate": 8.660959395067991e-07, "loss": 0.33123183250427246, "step": 3149 }, { "epoch": 2.4881516587677726, "grad_norm": 6.521694530585903, "learning_rate": 8.635125164498293e-07, "loss": 0.1495874524116516, "step": 3150 }, { "epoch": 2.4889415481832544, "grad_norm": 8.547476041571496, "learning_rate": 8.609325879111069e-07, "loss": 0.5456651449203491, "step": 3151 }, { "epoch": 2.4897314375987363, "grad_norm": 10.933870809001716, "learning_rate": 8.583561560701647e-07, "loss": 0.3192444145679474, "step": 3152 }, { "epoch": 2.490521327014218, "grad_norm": 28.363445895171516, "learning_rate": 8.557832231035796e-07, "loss": 0.5993216037750244, "step": 3153 }, { "epoch": 2.4913112164297, "grad_norm": 10.933363522689742, "learning_rate": 8.532137911849747e-07, "loss": 0.3621593117713928, "step": 3154 }, { "epoch": 2.492101105845182, "grad_norm": 7.744021564422991, "learning_rate": 8.506478624850145e-07, "loss": 0.20899435877799988, "step": 3155 }, { "epoch": 2.4928909952606633, "grad_norm": 16.455158224717778, "learning_rate": 8.480854391714039e-07, "loss": 0.5033141374588013, "step": 3156 }, { "epoch": 2.493680884676145, "grad_norm": 8.898300886169093, "learning_rate": 8.45526523408885e-07, "loss": 0.4480706751346588, "step": 3157 }, { "epoch": 2.494470774091627, "grad_norm": 7.0747670856799045, "learning_rate": 8.42971117359242e-07, "loss": 0.21309760212898254, "step": 3158 }, { "epoch": 2.495260663507109, "grad_norm": 9.196654852828132, "learning_rate": 8.404192231812875e-07, "loss": 0.3876749873161316, "step": 3159 }, { "epoch": 2.4960505529225907, "grad_norm": 8.41438123486234, "learning_rate": 8.378708430308702e-07, "loss": 0.3482446074485779, "step": 3160 }, { "epoch": 2.4968404423380726, "grad_norm": 9.952515369779197, "learning_rate": 8.353259790608698e-07, "loss": 0.19175337255001068, "step": 3161 }, { "epoch": 2.4976303317535544, "grad_norm": 15.820391137731752, "learning_rate": 8.32784633421197e-07, "loss": 1.1132574081420898, "step": 3162 }, { "epoch": 2.4984202211690363, "grad_norm": 9.122774774351079, "learning_rate": 8.302468082587906e-07, "loss": 0.39598995447158813, "step": 3163 }, { "epoch": 2.499210110584518, "grad_norm": 10.71112514476097, "learning_rate": 8.277125057176095e-07, "loss": 0.37614136934280396, "step": 3164 }, { "epoch": 2.5, "grad_norm": 15.433700062046594, "learning_rate": 8.25181727938642e-07, "loss": 0.4549494981765747, "step": 3165 }, { "epoch": 2.500789889415482, "grad_norm": 6.775255209163935, "learning_rate": 8.226544770598993e-07, "loss": 0.38162803649902344, "step": 3166 }, { "epoch": 2.5015797788309637, "grad_norm": 9.07190974562414, "learning_rate": 8.201307552164106e-07, "loss": 0.4735422730445862, "step": 3167 }, { "epoch": 2.5023696682464456, "grad_norm": 9.713947704476643, "learning_rate": 8.176105645402244e-07, "loss": 0.3699314594268799, "step": 3168 }, { "epoch": 2.5031595576619274, "grad_norm": 11.905314094509981, "learning_rate": 8.150939071604069e-07, "loss": 0.2857322692871094, "step": 3169 }, { "epoch": 2.5039494470774093, "grad_norm": 11.738198518074968, "learning_rate": 8.125807852030349e-07, "loss": 0.8294199109077454, "step": 3170 }, { "epoch": 2.504739336492891, "grad_norm": 23.694230717994056, "learning_rate": 8.10071200791206e-07, "loss": 0.8081961870193481, "step": 3171 }, { "epoch": 2.505529225908373, "grad_norm": 11.356117033254058, "learning_rate": 8.075651560450237e-07, "loss": 0.45390763878822327, "step": 3172 }, { "epoch": 2.506319115323855, "grad_norm": 9.254162743946639, "learning_rate": 8.050626530816036e-07, "loss": 0.39210453629493713, "step": 3173 }, { "epoch": 2.5071090047393367, "grad_norm": 14.960564989579739, "learning_rate": 8.025636940150677e-07, "loss": 0.4236029386520386, "step": 3174 }, { "epoch": 2.5078988941548186, "grad_norm": 9.28804391772842, "learning_rate": 8.000682809565457e-07, "loss": 0.38558968901634216, "step": 3175 }, { "epoch": 2.5086887835703, "grad_norm": 9.760069575355349, "learning_rate": 7.975764160141702e-07, "loss": 0.197739839553833, "step": 3176 }, { "epoch": 2.509478672985782, "grad_norm": 8.394775554288064, "learning_rate": 7.950881012930766e-07, "loss": 0.29299861192703247, "step": 3177 }, { "epoch": 2.5102685624012637, "grad_norm": 13.432882997987198, "learning_rate": 7.926033388954019e-07, "loss": 0.6216456294059753, "step": 3178 }, { "epoch": 2.5110584518167456, "grad_norm": 11.340453335100433, "learning_rate": 7.901221309202817e-07, "loss": 0.4832969307899475, "step": 3179 }, { "epoch": 2.5118483412322274, "grad_norm": 12.980596557657627, "learning_rate": 7.876444794638477e-07, "loss": 0.3053157329559326, "step": 3180 }, { "epoch": 2.5126382306477093, "grad_norm": 10.678528762002763, "learning_rate": 7.851703866192295e-07, "loss": 0.5949424505233765, "step": 3181 }, { "epoch": 2.513428120063191, "grad_norm": 8.615764477300809, "learning_rate": 7.826998544765473e-07, "loss": 0.3822169005870819, "step": 3182 }, { "epoch": 2.514218009478673, "grad_norm": 12.219849039480954, "learning_rate": 7.802328851229163e-07, "loss": 0.35568392276763916, "step": 3183 }, { "epoch": 2.515007898894155, "grad_norm": 12.387025000660175, "learning_rate": 7.77769480642439e-07, "loss": 0.386673241853714, "step": 3184 }, { "epoch": 2.5157977883096367, "grad_norm": 8.52886409021774, "learning_rate": 7.753096431162083e-07, "loss": 0.24550145864486694, "step": 3185 }, { "epoch": 2.5165876777251186, "grad_norm": 8.816997119450642, "learning_rate": 7.728533746223032e-07, "loss": 0.3060890734195709, "step": 3186 }, { "epoch": 2.5173775671406005, "grad_norm": 12.72993681147683, "learning_rate": 7.70400677235787e-07, "loss": 0.4496222734451294, "step": 3187 }, { "epoch": 2.518167456556082, "grad_norm": 14.450644987050909, "learning_rate": 7.679515530287068e-07, "loss": 0.3027800917625427, "step": 3188 }, { "epoch": 2.5189573459715637, "grad_norm": 6.8289260496500335, "learning_rate": 7.655060040700895e-07, "loss": 0.21853289008140564, "step": 3189 }, { "epoch": 2.5197472353870456, "grad_norm": 16.18749221673546, "learning_rate": 7.630640324259453e-07, "loss": 0.3517822027206421, "step": 3190 }, { "epoch": 2.5205371248025275, "grad_norm": 7.317050751605334, "learning_rate": 7.606256401592599e-07, "loss": 0.20418161153793335, "step": 3191 }, { "epoch": 2.5213270142180093, "grad_norm": 11.575307062369431, "learning_rate": 7.581908293299923e-07, "loss": 0.29537758231163025, "step": 3192 }, { "epoch": 2.522116903633491, "grad_norm": 13.330694266343132, "learning_rate": 7.557596019950797e-07, "loss": 0.24356764554977417, "step": 3193 }, { "epoch": 2.522906793048973, "grad_norm": 12.770819836355157, "learning_rate": 7.533319602084321e-07, "loss": 0.9563419818878174, "step": 3194 }, { "epoch": 2.523696682464455, "grad_norm": 9.323681305876523, "learning_rate": 7.50907906020929e-07, "loss": 0.4368267059326172, "step": 3195 }, { "epoch": 2.5244865718799367, "grad_norm": 12.737056250718048, "learning_rate": 7.484874414804206e-07, "loss": 0.8397213816642761, "step": 3196 }, { "epoch": 2.5252764612954186, "grad_norm": 11.13269574815902, "learning_rate": 7.460705686317205e-07, "loss": 0.6406710743904114, "step": 3197 }, { "epoch": 2.5260663507109005, "grad_norm": 15.377479065191766, "learning_rate": 7.43657289516611e-07, "loss": 0.5515921115875244, "step": 3198 }, { "epoch": 2.5268562401263823, "grad_norm": 10.942333674945784, "learning_rate": 7.412476061738405e-07, "loss": 0.47541213035583496, "step": 3199 }, { "epoch": 2.527646129541864, "grad_norm": 11.44586983157694, "learning_rate": 7.388415206391164e-07, "loss": 0.2634442150592804, "step": 3200 }, { "epoch": 2.528436018957346, "grad_norm": 12.935860176811287, "learning_rate": 7.364390349451073e-07, "loss": 1.6421760320663452, "step": 3201 }, { "epoch": 2.529225908372828, "grad_norm": 6.618453794857769, "learning_rate": 7.340401511214418e-07, "loss": 0.18729032576084137, "step": 3202 }, { "epoch": 2.5300157977883098, "grad_norm": 21.158575104978276, "learning_rate": 7.316448711947038e-07, "loss": 0.6561132073402405, "step": 3203 }, { "epoch": 2.5308056872037916, "grad_norm": 17.41414326406974, "learning_rate": 7.292531971884348e-07, "loss": 1.311091423034668, "step": 3204 }, { "epoch": 2.5315955766192735, "grad_norm": 9.054118102670634, "learning_rate": 7.268651311231278e-07, "loss": 0.37653642892837524, "step": 3205 }, { "epoch": 2.5323854660347553, "grad_norm": 7.875373579401623, "learning_rate": 7.244806750162298e-07, "loss": 0.13674522936344147, "step": 3206 }, { "epoch": 2.533175355450237, "grad_norm": 7.790372139492247, "learning_rate": 7.22099830882137e-07, "loss": 0.37409287691116333, "step": 3207 }, { "epoch": 2.533965244865719, "grad_norm": 21.5175970885638, "learning_rate": 7.197226007321939e-07, "loss": 0.5270158052444458, "step": 3208 }, { "epoch": 2.5347551342812005, "grad_norm": 9.240238834016377, "learning_rate": 7.173489865746924e-07, "loss": 0.57960045337677, "step": 3209 }, { "epoch": 2.5355450236966823, "grad_norm": 17.71089132438801, "learning_rate": 7.149789904148696e-07, "loss": 0.9655189514160156, "step": 3210 }, { "epoch": 2.536334913112164, "grad_norm": 7.9060126053659685, "learning_rate": 7.126126142549067e-07, "loss": 0.8784974217414856, "step": 3211 }, { "epoch": 2.537124802527646, "grad_norm": 15.460935801679469, "learning_rate": 7.102498600939256e-07, "loss": 0.9071961641311646, "step": 3212 }, { "epoch": 2.537914691943128, "grad_norm": 9.491362341174666, "learning_rate": 7.078907299279886e-07, "loss": 0.99891197681427, "step": 3213 }, { "epoch": 2.5387045813586098, "grad_norm": 13.553645359898077, "learning_rate": 7.05535225750097e-07, "loss": 0.4606255292892456, "step": 3214 }, { "epoch": 2.5394944707740916, "grad_norm": 11.434760827242846, "learning_rate": 7.031833495501878e-07, "loss": 0.38194912672042847, "step": 3215 }, { "epoch": 2.5402843601895735, "grad_norm": 8.816989170036669, "learning_rate": 7.008351033151345e-07, "loss": 0.5898439884185791, "step": 3216 }, { "epoch": 2.5410742496050553, "grad_norm": 8.941678431636664, "learning_rate": 6.984904890287419e-07, "loss": 0.6821322441101074, "step": 3217 }, { "epoch": 2.541864139020537, "grad_norm": 11.879397851452923, "learning_rate": 6.961495086717518e-07, "loss": 0.30271491408348083, "step": 3218 }, { "epoch": 2.542654028436019, "grad_norm": 11.904426225856726, "learning_rate": 6.938121642218277e-07, "loss": 0.8798356056213379, "step": 3219 }, { "epoch": 2.543443917851501, "grad_norm": 9.833714180030546, "learning_rate": 6.914784576535671e-07, "loss": 0.4576849341392517, "step": 3220 }, { "epoch": 2.544233807266983, "grad_norm": 7.60568754755486, "learning_rate": 6.891483909384927e-07, "loss": 0.39021506905555725, "step": 3221 }, { "epoch": 2.545023696682464, "grad_norm": 13.385710625425459, "learning_rate": 6.868219660450542e-07, "loss": 0.6809737682342529, "step": 3222 }, { "epoch": 2.545813586097946, "grad_norm": 14.296223483394565, "learning_rate": 6.844991849386234e-07, "loss": 0.354898601770401, "step": 3223 }, { "epoch": 2.546603475513428, "grad_norm": 7.507430954335532, "learning_rate": 6.821800495814906e-07, "loss": 0.3235066831111908, "step": 3224 }, { "epoch": 2.5473933649289098, "grad_norm": 8.831847194569248, "learning_rate": 6.798645619328709e-07, "loss": 0.6501250267028809, "step": 3225 }, { "epoch": 2.5481832543443916, "grad_norm": 11.08888965436733, "learning_rate": 6.775527239488939e-07, "loss": 0.3581928312778473, "step": 3226 }, { "epoch": 2.5489731437598735, "grad_norm": 14.993261176629105, "learning_rate": 6.752445375826111e-07, "loss": 0.47264599800109863, "step": 3227 }, { "epoch": 2.5497630331753554, "grad_norm": 12.12552691044683, "learning_rate": 6.729400047839834e-07, "loss": 1.0418339967727661, "step": 3228 }, { "epoch": 2.550552922590837, "grad_norm": 9.816976412231398, "learning_rate": 6.706391274998908e-07, "loss": 0.40765923261642456, "step": 3229 }, { "epoch": 2.551342812006319, "grad_norm": 8.814268750339222, "learning_rate": 6.683419076741166e-07, "loss": 0.659870982170105, "step": 3230 }, { "epoch": 2.552132701421801, "grad_norm": 12.387930895767527, "learning_rate": 6.660483472473644e-07, "loss": 0.6081492900848389, "step": 3231 }, { "epoch": 2.552922590837283, "grad_norm": 17.021655475649457, "learning_rate": 6.637584481572407e-07, "loss": 0.8503941297531128, "step": 3232 }, { "epoch": 2.5537124802527646, "grad_norm": 10.612469129748503, "learning_rate": 6.614722123382583e-07, "loss": 0.5053238868713379, "step": 3233 }, { "epoch": 2.5545023696682465, "grad_norm": 11.199796923866582, "learning_rate": 6.591896417218391e-07, "loss": 0.5718584656715393, "step": 3234 }, { "epoch": 2.5552922590837284, "grad_norm": 24.003473547596283, "learning_rate": 6.569107382363027e-07, "loss": 0.8553175330162048, "step": 3235 }, { "epoch": 2.5560821484992102, "grad_norm": 8.724785188614117, "learning_rate": 6.546355038068774e-07, "loss": 0.2981413006782532, "step": 3236 }, { "epoch": 2.556872037914692, "grad_norm": 8.108209067959447, "learning_rate": 6.523639403556875e-07, "loss": 0.27729976177215576, "step": 3237 }, { "epoch": 2.557661927330174, "grad_norm": 7.579505693226836, "learning_rate": 6.500960498017578e-07, "loss": 0.3648611903190613, "step": 3238 }, { "epoch": 2.558451816745656, "grad_norm": 13.670618984625548, "learning_rate": 6.478318340610091e-07, "loss": 0.4756515324115753, "step": 3239 }, { "epoch": 2.5592417061611377, "grad_norm": 16.165515463439487, "learning_rate": 6.45571295046259e-07, "loss": 0.6007115840911865, "step": 3240 }, { "epoch": 2.5600315955766195, "grad_norm": 9.184975673193794, "learning_rate": 6.433144346672177e-07, "loss": 0.22796334326267242, "step": 3241 }, { "epoch": 2.5608214849921014, "grad_norm": 11.242675742774068, "learning_rate": 6.410612548304884e-07, "loss": 0.5558523535728455, "step": 3242 }, { "epoch": 2.561611374407583, "grad_norm": 17.912335926244754, "learning_rate": 6.388117574395652e-07, "loss": 0.45684516429901123, "step": 3243 }, { "epoch": 2.5624012638230647, "grad_norm": 6.42831978379293, "learning_rate": 6.365659443948307e-07, "loss": 0.37593698501586914, "step": 3244 }, { "epoch": 2.5631911532385465, "grad_norm": 14.380203287837352, "learning_rate": 6.343238175935551e-07, "loss": 1.0076820850372314, "step": 3245 }, { "epoch": 2.5639810426540284, "grad_norm": 11.649489711574187, "learning_rate": 6.320853789298942e-07, "loss": 0.6352476477622986, "step": 3246 }, { "epoch": 2.5647709320695102, "grad_norm": 11.141609677532234, "learning_rate": 6.298506302948886e-07, "loss": 0.4481988549232483, "step": 3247 }, { "epoch": 2.565560821484992, "grad_norm": 11.697367831752132, "learning_rate": 6.276195735764617e-07, "loss": 0.5129117965698242, "step": 3248 }, { "epoch": 2.566350710900474, "grad_norm": 10.308816622041837, "learning_rate": 6.253922106594162e-07, "loss": 0.6752769351005554, "step": 3249 }, { "epoch": 2.567140600315956, "grad_norm": 10.354709362911104, "learning_rate": 6.231685434254375e-07, "loss": 0.22525343298912048, "step": 3250 }, { "epoch": 2.5679304897314377, "grad_norm": 11.05644990012399, "learning_rate": 6.209485737530873e-07, "loss": 0.35317540168762207, "step": 3251 }, { "epoch": 2.5687203791469195, "grad_norm": 6.477124764043402, "learning_rate": 6.187323035178012e-07, "loss": 0.22543203830718994, "step": 3252 }, { "epoch": 2.5695102685624014, "grad_norm": 14.30255153474572, "learning_rate": 6.165197345918927e-07, "loss": 0.46911317110061646, "step": 3253 }, { "epoch": 2.5703001579778832, "grad_norm": 9.552175646905189, "learning_rate": 6.14310868844546e-07, "loss": 0.33642643690109253, "step": 3254 }, { "epoch": 2.5710900473933647, "grad_norm": 6.838188468089331, "learning_rate": 6.121057081418202e-07, "loss": 0.33152109384536743, "step": 3255 }, { "epoch": 2.5718799368088465, "grad_norm": 10.0723559433558, "learning_rate": 6.099042543466427e-07, "loss": 0.4825100898742676, "step": 3256 }, { "epoch": 2.5726698262243284, "grad_norm": 9.82952412822834, "learning_rate": 6.077065093188062e-07, "loss": 0.3012405037879944, "step": 3257 }, { "epoch": 2.5734597156398102, "grad_norm": 9.586040497432494, "learning_rate": 6.055124749149738e-07, "loss": 0.23892341554164886, "step": 3258 }, { "epoch": 2.574249605055292, "grad_norm": 12.603578521658129, "learning_rate": 6.033221529886745e-07, "loss": 0.6413030624389648, "step": 3259 }, { "epoch": 2.575039494470774, "grad_norm": 8.672198769457234, "learning_rate": 6.011355453902984e-07, "loss": 0.3947061598300934, "step": 3260 }, { "epoch": 2.575829383886256, "grad_norm": 15.44081680860732, "learning_rate": 5.989526539670992e-07, "loss": 1.9373308420181274, "step": 3261 }, { "epoch": 2.5766192733017377, "grad_norm": 15.004738901323284, "learning_rate": 5.967734805631913e-07, "loss": 0.90089350938797, "step": 3262 }, { "epoch": 2.5774091627172195, "grad_norm": 11.506465084275762, "learning_rate": 5.945980270195451e-07, "loss": 0.742828369140625, "step": 3263 }, { "epoch": 2.5781990521327014, "grad_norm": 13.32230582597808, "learning_rate": 5.924262951739929e-07, "loss": 0.24065065383911133, "step": 3264 }, { "epoch": 2.5789889415481833, "grad_norm": 9.779711772862912, "learning_rate": 5.902582868612211e-07, "loss": 0.5692986249923706, "step": 3265 }, { "epoch": 2.579778830963665, "grad_norm": 8.091630298353223, "learning_rate": 5.880940039127703e-07, "loss": 0.3704443573951721, "step": 3266 }, { "epoch": 2.580568720379147, "grad_norm": 10.939765166542545, "learning_rate": 5.859334481570328e-07, "loss": 0.5513951778411865, "step": 3267 }, { "epoch": 2.581358609794629, "grad_norm": 12.859593353523778, "learning_rate": 5.837766214192536e-07, "loss": 0.3680616319179535, "step": 3268 }, { "epoch": 2.5821484992101107, "grad_norm": 13.927295492601075, "learning_rate": 5.816235255215275e-07, "loss": 0.25186580419540405, "step": 3269 }, { "epoch": 2.5829383886255926, "grad_norm": 9.056287052188528, "learning_rate": 5.794741622827966e-07, "loss": 0.361020565032959, "step": 3270 }, { "epoch": 2.5837282780410744, "grad_norm": 9.11489390301563, "learning_rate": 5.773285335188499e-07, "loss": 0.567996084690094, "step": 3271 }, { "epoch": 2.5845181674565563, "grad_norm": 7.932039650484051, "learning_rate": 5.751866410423224e-07, "loss": 0.4094735085964203, "step": 3272 }, { "epoch": 2.585308056872038, "grad_norm": 11.984895953206886, "learning_rate": 5.730484866626912e-07, "loss": 0.30167537927627563, "step": 3273 }, { "epoch": 2.58609794628752, "grad_norm": 9.289484227401946, "learning_rate": 5.70914072186276e-07, "loss": 0.32061922550201416, "step": 3274 }, { "epoch": 2.586887835703002, "grad_norm": 9.622010065894505, "learning_rate": 5.687833994162378e-07, "loss": 0.32422295212745667, "step": 3275 }, { "epoch": 2.5876777251184833, "grad_norm": 14.227211464394562, "learning_rate": 5.666564701525762e-07, "loss": 0.27812737226486206, "step": 3276 }, { "epoch": 2.588467614533965, "grad_norm": 10.998826927820959, "learning_rate": 5.64533286192126e-07, "loss": 0.4728177785873413, "step": 3277 }, { "epoch": 2.589257503949447, "grad_norm": 13.428356160162433, "learning_rate": 5.624138493285636e-07, "loss": 0.36791884899139404, "step": 3278 }, { "epoch": 2.590047393364929, "grad_norm": 10.44768694966881, "learning_rate": 5.602981613523933e-07, "loss": 0.5437135100364685, "step": 3279 }, { "epoch": 2.5908372827804107, "grad_norm": 8.729760092755804, "learning_rate": 5.581862240509561e-07, "loss": 0.3248332440853119, "step": 3280 }, { "epoch": 2.5916271721958926, "grad_norm": 7.7718729951563725, "learning_rate": 5.560780392084236e-07, "loss": 0.30842357873916626, "step": 3281 }, { "epoch": 2.5924170616113744, "grad_norm": 8.64870834436161, "learning_rate": 5.539736086057968e-07, "loss": 0.5332027673721313, "step": 3282 }, { "epoch": 2.5932069510268563, "grad_norm": 12.471905432522792, "learning_rate": 5.518729340209067e-07, "loss": 0.3546110689640045, "step": 3283 }, { "epoch": 2.593996840442338, "grad_norm": 10.520884370675867, "learning_rate": 5.497760172284105e-07, "loss": 0.5727818012237549, "step": 3284 }, { "epoch": 2.59478672985782, "grad_norm": 21.842408865407872, "learning_rate": 5.476828599997891e-07, "loss": 0.9324047565460205, "step": 3285 }, { "epoch": 2.595576619273302, "grad_norm": 15.695330799147337, "learning_rate": 5.455934641033473e-07, "loss": 0.7432706952095032, "step": 3286 }, { "epoch": 2.5963665086887837, "grad_norm": 8.672936031824056, "learning_rate": 5.43507831304217e-07, "loss": 0.32435929775238037, "step": 3287 }, { "epoch": 2.597156398104265, "grad_norm": 12.82117590286232, "learning_rate": 5.414259633643454e-07, "loss": 0.31739306449890137, "step": 3288 }, { "epoch": 2.597946287519747, "grad_norm": 8.894198702503063, "learning_rate": 5.39347862042503e-07, "loss": 0.23482373356819153, "step": 3289 }, { "epoch": 2.598736176935229, "grad_norm": 9.107176351218104, "learning_rate": 5.372735290942749e-07, "loss": 0.37216585874557495, "step": 3290 }, { "epoch": 2.5995260663507107, "grad_norm": 12.065946085010948, "learning_rate": 5.352029662720643e-07, "loss": 0.45027846097946167, "step": 3291 }, { "epoch": 2.6003159557661926, "grad_norm": 9.353380690591916, "learning_rate": 5.331361753250908e-07, "loss": 0.5409231781959534, "step": 3292 }, { "epoch": 2.6011058451816744, "grad_norm": 9.465254647955879, "learning_rate": 5.31073157999386e-07, "loss": 0.30727618932724, "step": 3293 }, { "epoch": 2.6018957345971563, "grad_norm": 9.607201625715247, "learning_rate": 5.290139160377944e-07, "loss": 0.31384027004241943, "step": 3294 }, { "epoch": 2.602685624012638, "grad_norm": 14.736505951040836, "learning_rate": 5.269584511799674e-07, "loss": 0.9769009351730347, "step": 3295 }, { "epoch": 2.60347551342812, "grad_norm": 11.551220516709076, "learning_rate": 5.249067651623713e-07, "loss": 0.4081469178199768, "step": 3296 }, { "epoch": 2.604265402843602, "grad_norm": 11.149694766419422, "learning_rate": 5.228588597182771e-07, "loss": 0.31222042441368103, "step": 3297 }, { "epoch": 2.6050552922590837, "grad_norm": 12.284974464013962, "learning_rate": 5.208147365777605e-07, "loss": 0.3729371428489685, "step": 3298 }, { "epoch": 2.6058451816745656, "grad_norm": 6.66143671120458, "learning_rate": 5.187743974677051e-07, "loss": 0.23765571415424347, "step": 3299 }, { "epoch": 2.6066350710900474, "grad_norm": 10.189036379966337, "learning_rate": 5.167378441117948e-07, "loss": 0.5407176613807678, "step": 3300 }, { "epoch": 2.6074249605055293, "grad_norm": 10.508990210906765, "learning_rate": 5.147050782305174e-07, "loss": 0.4038906693458557, "step": 3301 }, { "epoch": 2.608214849921011, "grad_norm": 10.671951482144292, "learning_rate": 5.126761015411602e-07, "loss": 0.3577304482460022, "step": 3302 }, { "epoch": 2.609004739336493, "grad_norm": 16.67545595391455, "learning_rate": 5.106509157578088e-07, "loss": 0.6719971895217896, "step": 3303 }, { "epoch": 2.609794628751975, "grad_norm": 8.313398028205375, "learning_rate": 5.086295225913468e-07, "loss": 0.417365163564682, "step": 3304 }, { "epoch": 2.6105845181674567, "grad_norm": 6.802692561793048, "learning_rate": 5.066119237494543e-07, "loss": 0.20259469747543335, "step": 3305 }, { "epoch": 2.6113744075829386, "grad_norm": 14.896512148391754, "learning_rate": 5.045981209366058e-07, "loss": 0.6620730757713318, "step": 3306 }, { "epoch": 2.6121642969984205, "grad_norm": 10.671850166031216, "learning_rate": 5.025881158540674e-07, "loss": 0.6920949220657349, "step": 3307 }, { "epoch": 2.6129541864139023, "grad_norm": 9.483878722714492, "learning_rate": 5.005819101998993e-07, "loss": 0.24593792855739594, "step": 3308 }, { "epoch": 2.6137440758293837, "grad_norm": 12.335044317508292, "learning_rate": 4.985795056689496e-07, "loss": 0.38339120149612427, "step": 3309 }, { "epoch": 2.6145339652448656, "grad_norm": 11.267803688952569, "learning_rate": 4.965809039528557e-07, "loss": 0.7271929383277893, "step": 3310 }, { "epoch": 2.6153238546603474, "grad_norm": 9.841740710130463, "learning_rate": 4.945861067400459e-07, "loss": 0.33051010966300964, "step": 3311 }, { "epoch": 2.6161137440758293, "grad_norm": 8.575258139149547, "learning_rate": 4.925951157157282e-07, "loss": 0.40669572353363037, "step": 3312 }, { "epoch": 2.616903633491311, "grad_norm": 14.064126439379152, "learning_rate": 4.906079325618995e-07, "loss": 0.4142283797264099, "step": 3313 }, { "epoch": 2.617693522906793, "grad_norm": 10.926203888261908, "learning_rate": 4.886245589573379e-07, "loss": 0.3864701986312866, "step": 3314 }, { "epoch": 2.618483412322275, "grad_norm": 8.095460272657196, "learning_rate": 4.86644996577606e-07, "loss": 0.6239743828773499, "step": 3315 }, { "epoch": 2.6192733017377567, "grad_norm": 9.5988025752161, "learning_rate": 4.846692470950442e-07, "loss": 0.44168534874916077, "step": 3316 }, { "epoch": 2.6200631911532386, "grad_norm": 12.25520966680489, "learning_rate": 4.826973121787704e-07, "loss": 0.7248414754867554, "step": 3317 }, { "epoch": 2.6208530805687205, "grad_norm": 11.504118164211533, "learning_rate": 4.807291934946828e-07, "loss": 0.388072669506073, "step": 3318 }, { "epoch": 2.6216429699842023, "grad_norm": 11.714875357297148, "learning_rate": 4.787648927054534e-07, "loss": 0.31926900148391724, "step": 3319 }, { "epoch": 2.622432859399684, "grad_norm": 9.583077187413663, "learning_rate": 4.7680441147053225e-07, "loss": 0.25406989455223083, "step": 3320 }, { "epoch": 2.623222748815166, "grad_norm": 12.602149801934326, "learning_rate": 4.748477514461386e-07, "loss": 1.1567286252975464, "step": 3321 }, { "epoch": 2.6240126382306475, "grad_norm": 12.380509221098798, "learning_rate": 4.728949142852668e-07, "loss": 0.2009027898311615, "step": 3322 }, { "epoch": 2.6248025276461293, "grad_norm": 37.105558180160095, "learning_rate": 4.709459016376777e-07, "loss": 0.7523494362831116, "step": 3323 }, { "epoch": 2.625592417061611, "grad_norm": 8.134017867193721, "learning_rate": 4.6900071514990543e-07, "loss": 0.2318010777235031, "step": 3324 }, { "epoch": 2.626382306477093, "grad_norm": 8.83131524505039, "learning_rate": 4.670593564652498e-07, "loss": 0.43371960520744324, "step": 3325 }, { "epoch": 2.627172195892575, "grad_norm": 10.963397183621947, "learning_rate": 4.6512182722377677e-07, "loss": 0.36909347772598267, "step": 3326 }, { "epoch": 2.6279620853080567, "grad_norm": 12.084093646962547, "learning_rate": 4.63188129062318e-07, "loss": 0.388027161359787, "step": 3327 }, { "epoch": 2.6287519747235386, "grad_norm": 9.478263087592277, "learning_rate": 4.6125826361446633e-07, "loss": 0.3623710870742798, "step": 3328 }, { "epoch": 2.6295418641390205, "grad_norm": 18.041943384926824, "learning_rate": 4.593322325105798e-07, "loss": 2.725191831588745, "step": 3329 }, { "epoch": 2.6303317535545023, "grad_norm": 10.053745549005518, "learning_rate": 4.574100373777762e-07, "loss": 0.9186097383499146, "step": 3330 }, { "epoch": 2.631121642969984, "grad_norm": 16.56032999257091, "learning_rate": 4.554916798399311e-07, "loss": 0.7610374689102173, "step": 3331 }, { "epoch": 2.631911532385466, "grad_norm": 9.799384700311915, "learning_rate": 4.5357716151768037e-07, "loss": 0.22280161082744598, "step": 3332 }, { "epoch": 2.632701421800948, "grad_norm": 8.292734812546973, "learning_rate": 4.5166648402841464e-07, "loss": 0.3859997093677521, "step": 3333 }, { "epoch": 2.6334913112164298, "grad_norm": 8.969496231310965, "learning_rate": 4.49759648986281e-07, "loss": 1.129380226135254, "step": 3334 }, { "epoch": 2.6342812006319116, "grad_norm": 14.025626351449633, "learning_rate": 4.4785665800217925e-07, "loss": 0.7709635496139526, "step": 3335 }, { "epoch": 2.6350710900473935, "grad_norm": 12.132670588201732, "learning_rate": 4.459575126837634e-07, "loss": 0.31990846991539, "step": 3336 }, { "epoch": 2.6358609794628753, "grad_norm": 12.500867440865886, "learning_rate": 4.440622146354373e-07, "loss": 0.7797756195068359, "step": 3337 }, { "epoch": 2.636650868878357, "grad_norm": 13.48812236217527, "learning_rate": 4.421707654583546e-07, "loss": 0.7479414939880371, "step": 3338 }, { "epoch": 2.637440758293839, "grad_norm": 19.89516186489078, "learning_rate": 4.402831667504187e-07, "loss": 0.7601022720336914, "step": 3339 }, { "epoch": 2.638230647709321, "grad_norm": 11.98723585736858, "learning_rate": 4.38399420106278e-07, "loss": 0.5701296329498291, "step": 3340 }, { "epoch": 2.639020537124803, "grad_norm": 5.891135933647762, "learning_rate": 4.365195271173289e-07, "loss": 0.18816259503364563, "step": 3341 }, { "epoch": 2.639810426540284, "grad_norm": 14.919973147368829, "learning_rate": 4.3464348937170996e-07, "loss": 0.6145678162574768, "step": 3342 }, { "epoch": 2.640600315955766, "grad_norm": 13.275675106320598, "learning_rate": 4.327713084543056e-07, "loss": 0.7048325538635254, "step": 3343 }, { "epoch": 2.641390205371248, "grad_norm": 12.503928032714008, "learning_rate": 4.3090298594674006e-07, "loss": 0.3908374607563019, "step": 3344 }, { "epoch": 2.6421800947867298, "grad_norm": 11.72770662305032, "learning_rate": 4.290385234273775e-07, "loss": 0.505962610244751, "step": 3345 }, { "epoch": 2.6429699842022116, "grad_norm": 12.690173043224384, "learning_rate": 4.2717792247132293e-07, "loss": 0.4017457962036133, "step": 3346 }, { "epoch": 2.6437598736176935, "grad_norm": 8.36874609608357, "learning_rate": 4.253211846504163e-07, "loss": 0.22178924083709717, "step": 3347 }, { "epoch": 2.6445497630331753, "grad_norm": 10.103545343043429, "learning_rate": 4.234683115332383e-07, "loss": 0.2969557046890259, "step": 3348 }, { "epoch": 2.645339652448657, "grad_norm": 13.225582438481474, "learning_rate": 4.216193046851019e-07, "loss": 0.37480732798576355, "step": 3349 }, { "epoch": 2.646129541864139, "grad_norm": 10.84422500520785, "learning_rate": 4.1977416566805264e-07, "loss": 0.9092705845832825, "step": 3350 }, { "epoch": 2.646919431279621, "grad_norm": 9.789088606323828, "learning_rate": 4.179328960408696e-07, "loss": 0.31875336170196533, "step": 3351 }, { "epoch": 2.647709320695103, "grad_norm": 10.486891620887812, "learning_rate": 4.160954973590664e-07, "loss": 0.3058662414550781, "step": 3352 }, { "epoch": 2.6484992101105846, "grad_norm": 13.733159335396302, "learning_rate": 4.1426197117488134e-07, "loss": 2.2363317012786865, "step": 3353 }, { "epoch": 2.6492890995260665, "grad_norm": 6.946032041163559, "learning_rate": 4.1243231903728363e-07, "loss": 0.2704191505908966, "step": 3354 }, { "epoch": 2.650078988941548, "grad_norm": 11.504621566189082, "learning_rate": 4.106065424919703e-07, "loss": 0.3017812967300415, "step": 3355 }, { "epoch": 2.65086887835703, "grad_norm": 10.321032506154271, "learning_rate": 4.087846430813613e-07, "loss": 1.0040827989578247, "step": 3356 }, { "epoch": 2.6516587677725116, "grad_norm": 11.294039143603817, "learning_rate": 4.069666223446056e-07, "loss": 0.4513833522796631, "step": 3357 }, { "epoch": 2.6524486571879935, "grad_norm": 14.456974047266456, "learning_rate": 4.051524818175723e-07, "loss": 0.23808935284614563, "step": 3358 }, { "epoch": 2.6532385466034754, "grad_norm": 7.5143794681952745, "learning_rate": 4.033422230328526e-07, "loss": 0.2904347777366638, "step": 3359 }, { "epoch": 2.654028436018957, "grad_norm": 7.991560366796248, "learning_rate": 4.0153584751976007e-07, "loss": 0.4038187563419342, "step": 3360 }, { "epoch": 2.654818325434439, "grad_norm": 9.497887673841467, "learning_rate": 3.99733356804326e-07, "loss": 0.31666648387908936, "step": 3361 }, { "epoch": 2.655608214849921, "grad_norm": 10.401304732363979, "learning_rate": 3.9793475240930077e-07, "loss": 0.4911503493785858, "step": 3362 }, { "epoch": 2.656398104265403, "grad_norm": 12.221183465688934, "learning_rate": 3.9614003585415117e-07, "loss": 0.8834859728813171, "step": 3363 }, { "epoch": 2.6571879936808847, "grad_norm": 11.390696610946822, "learning_rate": 3.943492086550599e-07, "loss": 0.6366713047027588, "step": 3364 }, { "epoch": 2.6579778830963665, "grad_norm": 15.422428455701008, "learning_rate": 3.9256227232492337e-07, "loss": 0.3374771773815155, "step": 3365 }, { "epoch": 2.6587677725118484, "grad_norm": 9.583693820140757, "learning_rate": 3.907792283733514e-07, "loss": 0.7819290161132812, "step": 3366 }, { "epoch": 2.6595576619273302, "grad_norm": 12.981199540184159, "learning_rate": 3.8900007830666555e-07, "loss": 0.5065968036651611, "step": 3367 }, { "epoch": 2.660347551342812, "grad_norm": 7.236244572582725, "learning_rate": 3.872248236278975e-07, "loss": 0.6708056926727295, "step": 3368 }, { "epoch": 2.661137440758294, "grad_norm": 8.694974340614756, "learning_rate": 3.854534658367881e-07, "loss": 0.26976072788238525, "step": 3369 }, { "epoch": 2.661927330173776, "grad_norm": 11.235824129714906, "learning_rate": 3.836860064297854e-07, "loss": 0.3245980441570282, "step": 3370 }, { "epoch": 2.6627172195892577, "grad_norm": 16.011419931261926, "learning_rate": 3.819224469000482e-07, "loss": 0.7259745597839355, "step": 3371 }, { "epoch": 2.6635071090047395, "grad_norm": 19.22540678332816, "learning_rate": 3.8016278873743375e-07, "loss": 0.45876407623291016, "step": 3372 }, { "epoch": 2.6642969984202214, "grad_norm": 8.780171480326908, "learning_rate": 3.7840703342850893e-07, "loss": 0.278285413980484, "step": 3373 }, { "epoch": 2.6650868878357032, "grad_norm": 17.47655948792051, "learning_rate": 3.766551824565406e-07, "loss": 0.5746378898620605, "step": 3374 }, { "epoch": 2.665876777251185, "grad_norm": 23.25805650807008, "learning_rate": 3.7490723730149836e-07, "loss": 0.507803201675415, "step": 3375 }, { "epoch": 2.6666666666666665, "grad_norm": 9.306113512629594, "learning_rate": 3.731631994400536e-07, "loss": 0.18884757161140442, "step": 3376 }, { "epoch": 2.6674565560821484, "grad_norm": 8.348124341525704, "learning_rate": 3.7142307034557345e-07, "loss": 0.29510441422462463, "step": 3377 }, { "epoch": 2.6682464454976302, "grad_norm": 10.578489755562448, "learning_rate": 3.696868514881258e-07, "loss": 0.33782392740249634, "step": 3378 }, { "epoch": 2.669036334913112, "grad_norm": 14.26143918553158, "learning_rate": 3.679545443344723e-07, "loss": 0.9008026719093323, "step": 3379 }, { "epoch": 2.669826224328594, "grad_norm": 11.857039310446325, "learning_rate": 3.662261503480741e-07, "loss": 0.5593395233154297, "step": 3380 }, { "epoch": 2.670616113744076, "grad_norm": 10.652642297722368, "learning_rate": 3.6450167098908253e-07, "loss": 0.31769973039627075, "step": 3381 }, { "epoch": 2.6714060031595577, "grad_norm": 9.458143348474115, "learning_rate": 3.6278110771434504e-07, "loss": 0.2788546681404114, "step": 3382 }, { "epoch": 2.6721958925750395, "grad_norm": 13.225514562775611, "learning_rate": 3.610644619773973e-07, "loss": 0.6898187398910522, "step": 3383 }, { "epoch": 2.6729857819905214, "grad_norm": 10.118377023292018, "learning_rate": 3.59351735228467e-07, "loss": 0.2825648784637451, "step": 3384 }, { "epoch": 2.6737756714060033, "grad_norm": 5.075483514495616, "learning_rate": 3.576429289144734e-07, "loss": 0.23227611184120178, "step": 3385 }, { "epoch": 2.674565560821485, "grad_norm": 11.943311728775157, "learning_rate": 3.559380444790206e-07, "loss": 0.8256229758262634, "step": 3386 }, { "epoch": 2.675355450236967, "grad_norm": 8.112570938757484, "learning_rate": 3.54237083362402e-07, "loss": 0.5945410132408142, "step": 3387 }, { "epoch": 2.6761453396524484, "grad_norm": 15.634285640069708, "learning_rate": 3.525400470015916e-07, "loss": 0.6739033460617065, "step": 3388 }, { "epoch": 2.6769352290679302, "grad_norm": 20.357153137616915, "learning_rate": 3.508469368302542e-07, "loss": 0.45265576243400574, "step": 3389 }, { "epoch": 2.677725118483412, "grad_norm": 9.899660066154995, "learning_rate": 3.4915775427873445e-07, "loss": 0.4432603120803833, "step": 3390 }, { "epoch": 2.678515007898894, "grad_norm": 8.138889689930242, "learning_rate": 3.4747250077405925e-07, "loss": 0.19528892636299133, "step": 3391 }, { "epoch": 2.679304897314376, "grad_norm": 18.23444496802974, "learning_rate": 3.4579117773993586e-07, "loss": 0.5927311778068542, "step": 3392 }, { "epoch": 2.6800947867298577, "grad_norm": 10.188403198684156, "learning_rate": 3.4411378659675197e-07, "loss": 0.2367173135280609, "step": 3393 }, { "epoch": 2.6808846761453395, "grad_norm": 11.657906732572709, "learning_rate": 3.424403287615724e-07, "loss": 0.23523610830307007, "step": 3394 }, { "epoch": 2.6816745655608214, "grad_norm": 15.05635601715627, "learning_rate": 3.4077080564814126e-07, "loss": 0.5322354435920715, "step": 3395 }, { "epoch": 2.6824644549763033, "grad_norm": 12.057487107339211, "learning_rate": 3.391052186668753e-07, "loss": 0.7865498661994934, "step": 3396 }, { "epoch": 2.683254344391785, "grad_norm": 8.968136456469345, "learning_rate": 3.374435692248695e-07, "loss": 0.2981482148170471, "step": 3397 }, { "epoch": 2.684044233807267, "grad_norm": 7.657461394448841, "learning_rate": 3.3578585872589e-07, "loss": 0.39448219537734985, "step": 3398 }, { "epoch": 2.684834123222749, "grad_norm": 13.924417235813834, "learning_rate": 3.3413208857037636e-07, "loss": 0.33557915687561035, "step": 3399 }, { "epoch": 2.6856240126382307, "grad_norm": 8.80765991011228, "learning_rate": 3.324822601554389e-07, "loss": 0.4689452350139618, "step": 3400 }, { "epoch": 2.6864139020537126, "grad_norm": 10.376386874091963, "learning_rate": 3.308363748748583e-07, "loss": 0.5030757188796997, "step": 3401 }, { "epoch": 2.6872037914691944, "grad_norm": 7.506351165902702, "learning_rate": 3.2919443411908335e-07, "loss": 0.4964003562927246, "step": 3402 }, { "epoch": 2.6879936808846763, "grad_norm": 8.750015580357847, "learning_rate": 3.275564392752306e-07, "loss": 0.6735177040100098, "step": 3403 }, { "epoch": 2.688783570300158, "grad_norm": 8.400292003551172, "learning_rate": 3.25922391727086e-07, "loss": 0.5502939224243164, "step": 3404 }, { "epoch": 2.68957345971564, "grad_norm": 17.77933866658459, "learning_rate": 3.2429229285509565e-07, "loss": 0.561028003692627, "step": 3405 }, { "epoch": 2.690363349131122, "grad_norm": 8.989376021851474, "learning_rate": 3.226661440363732e-07, "loss": 0.4054949879646301, "step": 3406 }, { "epoch": 2.6911532385466037, "grad_norm": 8.662923922835425, "learning_rate": 3.210439466446941e-07, "loss": 0.18396064639091492, "step": 3407 }, { "epoch": 2.6919431279620856, "grad_norm": 13.879543447285586, "learning_rate": 3.194257020504976e-07, "loss": 0.49747684597969055, "step": 3408 }, { "epoch": 2.692733017377567, "grad_norm": 10.218308650070451, "learning_rate": 3.178114116208819e-07, "loss": 0.2940269708633423, "step": 3409 }, { "epoch": 2.693522906793049, "grad_norm": 11.95090160706378, "learning_rate": 3.1620107671960274e-07, "loss": 0.5844002366065979, "step": 3410 }, { "epoch": 2.6943127962085307, "grad_norm": 12.15425735480325, "learning_rate": 3.14594698707078e-07, "loss": 0.3902283012866974, "step": 3411 }, { "epoch": 2.6951026856240126, "grad_norm": 7.527465679424551, "learning_rate": 3.1299227894038e-07, "loss": 0.25770941376686096, "step": 3412 }, { "epoch": 2.6958925750394944, "grad_norm": 19.33893813349674, "learning_rate": 3.113938187732396e-07, "loss": 0.4086335003376007, "step": 3413 }, { "epoch": 2.6966824644549763, "grad_norm": 11.241807816989384, "learning_rate": 3.097993195560406e-07, "loss": 0.28467923402786255, "step": 3414 }, { "epoch": 2.697472353870458, "grad_norm": 6.848242031731855, "learning_rate": 3.082087826358221e-07, "loss": 0.460833340883255, "step": 3415 }, { "epoch": 2.69826224328594, "grad_norm": 13.881441071873525, "learning_rate": 3.0662220935627264e-07, "loss": 0.5943915843963623, "step": 3416 }, { "epoch": 2.699052132701422, "grad_norm": 13.318701652240515, "learning_rate": 3.0503960105773664e-07, "loss": 0.3852251470088959, "step": 3417 }, { "epoch": 2.6998420221169037, "grad_norm": 11.40666777900844, "learning_rate": 3.034609590772064e-07, "loss": 0.43630069494247437, "step": 3418 }, { "epoch": 2.7006319115323856, "grad_norm": 17.06726177611801, "learning_rate": 3.0188628474832283e-07, "loss": 0.3444702625274658, "step": 3419 }, { "epoch": 2.7014218009478674, "grad_norm": 11.317050973138297, "learning_rate": 3.0031557940137846e-07, "loss": 0.5595932006835938, "step": 3420 }, { "epoch": 2.7022116903633493, "grad_norm": 6.595748201513425, "learning_rate": 2.987488443633063e-07, "loss": 0.3122694492340088, "step": 3421 }, { "epoch": 2.7030015797788307, "grad_norm": 9.417440233234284, "learning_rate": 2.971860809576926e-07, "loss": 0.602641761302948, "step": 3422 }, { "epoch": 2.7037914691943126, "grad_norm": 17.899529937935377, "learning_rate": 2.956272905047641e-07, "loss": 1.1811764240264893, "step": 3423 }, { "epoch": 2.7045813586097944, "grad_norm": 21.27161915680074, "learning_rate": 2.9407247432139184e-07, "loss": 0.5429356694221497, "step": 3424 }, { "epoch": 2.7053712480252763, "grad_norm": 14.45817636342026, "learning_rate": 2.9252163372109013e-07, "loss": 0.2626524567604065, "step": 3425 }, { "epoch": 2.706161137440758, "grad_norm": 11.966013741915225, "learning_rate": 2.9097477001401364e-07, "loss": 0.3152087926864624, "step": 3426 }, { "epoch": 2.70695102685624, "grad_norm": 9.01180406739463, "learning_rate": 2.8943188450695824e-07, "loss": 0.5591740608215332, "step": 3427 }, { "epoch": 2.707740916271722, "grad_norm": 15.155219955926972, "learning_rate": 2.878929785033585e-07, "loss": 0.8030872941017151, "step": 3428 }, { "epoch": 2.7085308056872037, "grad_norm": 9.0050236739937, "learning_rate": 2.8635805330328783e-07, "loss": 0.6855502128601074, "step": 3429 }, { "epoch": 2.7093206951026856, "grad_norm": 9.140820391889852, "learning_rate": 2.8482711020345556e-07, "loss": 0.8755874037742615, "step": 3430 }, { "epoch": 2.7101105845181674, "grad_norm": 15.402621823538889, "learning_rate": 2.833001504972077e-07, "loss": 0.429756760597229, "step": 3431 }, { "epoch": 2.7109004739336493, "grad_norm": 9.610037912675201, "learning_rate": 2.8177717547452463e-07, "loss": 0.405164510011673, "step": 3432 }, { "epoch": 2.711690363349131, "grad_norm": 13.695365189160531, "learning_rate": 2.8025818642202054e-07, "loss": 0.6108412742614746, "step": 3433 }, { "epoch": 2.712480252764613, "grad_norm": 13.619752565690785, "learning_rate": 2.787431846229427e-07, "loss": 0.454412043094635, "step": 3434 }, { "epoch": 2.713270142180095, "grad_norm": 11.53614981565061, "learning_rate": 2.7723217135716906e-07, "loss": 0.4228717088699341, "step": 3435 }, { "epoch": 2.7140600315955767, "grad_norm": 6.706916975241827, "learning_rate": 2.757251479012102e-07, "loss": 0.2297818660736084, "step": 3436 }, { "epoch": 2.7148499210110586, "grad_norm": 15.153915751186066, "learning_rate": 2.742221155282027e-07, "loss": 0.441479355096817, "step": 3437 }, { "epoch": 2.7156398104265405, "grad_norm": 12.736343688491294, "learning_rate": 2.727230755079141e-07, "loss": 0.3135189712047577, "step": 3438 }, { "epoch": 2.7164296998420223, "grad_norm": 10.469524983647013, "learning_rate": 2.712280291067382e-07, "loss": 0.2524583637714386, "step": 3439 }, { "epoch": 2.717219589257504, "grad_norm": 10.21269953062072, "learning_rate": 2.6973697758769404e-07, "loss": 0.6804049015045166, "step": 3440 }, { "epoch": 2.718009478672986, "grad_norm": 13.439002602740715, "learning_rate": 2.68249922210429e-07, "loss": 1.7227437496185303, "step": 3441 }, { "epoch": 2.7187993680884674, "grad_norm": 9.105196897219319, "learning_rate": 2.667668642312121e-07, "loss": 0.24244064092636108, "step": 3442 }, { "epoch": 2.7195892575039493, "grad_norm": 17.032916970105017, "learning_rate": 2.6528780490293394e-07, "loss": 0.42305219173431396, "step": 3443 }, { "epoch": 2.720379146919431, "grad_norm": 8.341873292787225, "learning_rate": 2.638127454751083e-07, "loss": 0.3290414810180664, "step": 3444 }, { "epoch": 2.721169036334913, "grad_norm": 14.438304518877404, "learning_rate": 2.6234168719387275e-07, "loss": 0.7699002027511597, "step": 3445 }, { "epoch": 2.721958925750395, "grad_norm": 11.533451670720245, "learning_rate": 2.6087463130198053e-07, "loss": 0.39063939452171326, "step": 3446 }, { "epoch": 2.7227488151658767, "grad_norm": 7.533668401408119, "learning_rate": 2.594115790388069e-07, "loss": 0.30158624053001404, "step": 3447 }, { "epoch": 2.7235387045813586, "grad_norm": 9.966662834105735, "learning_rate": 2.5795253164034084e-07, "loss": 0.7435629367828369, "step": 3448 }, { "epoch": 2.7243285939968405, "grad_norm": 14.725256543363951, "learning_rate": 2.564974903391915e-07, "loss": 0.22876577079296112, "step": 3449 }, { "epoch": 2.7251184834123223, "grad_norm": 11.42689474920191, "learning_rate": 2.550464563645827e-07, "loss": 0.48019081354141235, "step": 3450 }, { "epoch": 2.725908372827804, "grad_norm": 6.525526730403295, "learning_rate": 2.5359943094235284e-07, "loss": 0.3264992833137512, "step": 3451 }, { "epoch": 2.726698262243286, "grad_norm": 12.867989920941069, "learning_rate": 2.521564152949535e-07, "loss": 0.6327470541000366, "step": 3452 }, { "epoch": 2.727488151658768, "grad_norm": 33.14582147619613, "learning_rate": 2.5071741064144893e-07, "loss": 0.7746727466583252, "step": 3453 }, { "epoch": 2.7282780410742498, "grad_norm": 11.842666297046986, "learning_rate": 2.4928241819751506e-07, "loss": 0.41545653343200684, "step": 3454 }, { "epoch": 2.729067930489731, "grad_norm": 10.904910444914098, "learning_rate": 2.4785143917543886e-07, "loss": 0.30271655321121216, "step": 3455 }, { "epoch": 2.729857819905213, "grad_norm": 7.237418545407075, "learning_rate": 2.464244747841155e-07, "loss": 0.2641463577747345, "step": 3456 }, { "epoch": 2.730647709320695, "grad_norm": 12.797520847903757, "learning_rate": 2.4500152622904895e-07, "loss": 0.18360668420791626, "step": 3457 }, { "epoch": 2.7314375987361768, "grad_norm": 6.889781589750479, "learning_rate": 2.435825947123516e-07, "loss": 0.2461070865392685, "step": 3458 }, { "epoch": 2.7322274881516586, "grad_norm": 9.695805170509885, "learning_rate": 2.4216768143274115e-07, "loss": 0.5353419780731201, "step": 3459 }, { "epoch": 2.7330173775671405, "grad_norm": 10.343918221842735, "learning_rate": 2.4075678758554047e-07, "loss": 0.6046707630157471, "step": 3460 }, { "epoch": 2.7338072669826223, "grad_norm": 9.007895090494918, "learning_rate": 2.3934991436267816e-07, "loss": 0.2909160852432251, "step": 3461 }, { "epoch": 2.734597156398104, "grad_norm": 10.62496325664592, "learning_rate": 2.3794706295268476e-07, "loss": 0.30107566714286804, "step": 3462 }, { "epoch": 2.735387045813586, "grad_norm": 13.67484568966304, "learning_rate": 2.365482345406933e-07, "loss": 0.3950386643409729, "step": 3463 }, { "epoch": 2.736176935229068, "grad_norm": 21.77362986226219, "learning_rate": 2.3515343030844073e-07, "loss": 1.0076971054077148, "step": 3464 }, { "epoch": 2.7369668246445498, "grad_norm": 7.730693964041083, "learning_rate": 2.3376265143426003e-07, "loss": 0.24211300909519196, "step": 3465 }, { "epoch": 2.7377567140600316, "grad_norm": 9.296217895983373, "learning_rate": 2.3237589909308632e-07, "loss": 0.2887963056564331, "step": 3466 }, { "epoch": 2.7385466034755135, "grad_norm": 10.840522993677942, "learning_rate": 2.309931744564531e-07, "loss": 0.41945865750312805, "step": 3467 }, { "epoch": 2.7393364928909953, "grad_norm": 8.917010556292599, "learning_rate": 2.2961447869248977e-07, "loss": 0.20327429473400116, "step": 3468 }, { "epoch": 2.740126382306477, "grad_norm": 16.489207308906405, "learning_rate": 2.2823981296592468e-07, "loss": 0.445822149515152, "step": 3469 }, { "epoch": 2.740916271721959, "grad_norm": 13.177843752844728, "learning_rate": 2.2686917843807832e-07, "loss": 0.4169418513774872, "step": 3470 }, { "epoch": 2.741706161137441, "grad_norm": 18.679760356479285, "learning_rate": 2.2550257626686835e-07, "loss": 0.29727548360824585, "step": 3471 }, { "epoch": 2.742496050552923, "grad_norm": 17.416569754051274, "learning_rate": 2.2414000760680344e-07, "loss": 0.9264640808105469, "step": 3472 }, { "epoch": 2.7432859399684046, "grad_norm": 11.51848657708186, "learning_rate": 2.2278147360898726e-07, "loss": 0.31270313262939453, "step": 3473 }, { "epoch": 2.7440758293838865, "grad_norm": 9.677231027214653, "learning_rate": 2.2142697542111403e-07, "loss": 0.519598126411438, "step": 3474 }, { "epoch": 2.7448657187993684, "grad_norm": 8.89666307610365, "learning_rate": 2.2007651418746777e-07, "loss": 0.26155680418014526, "step": 3475 }, { "epoch": 2.7456556082148498, "grad_norm": 8.068195381347717, "learning_rate": 2.1873009104892207e-07, "loss": 0.26666006445884705, "step": 3476 }, { "epoch": 2.7464454976303316, "grad_norm": 9.967802346446724, "learning_rate": 2.1738770714293978e-07, "loss": 0.4388732612133026, "step": 3477 }, { "epoch": 2.7472353870458135, "grad_norm": 12.682142726744404, "learning_rate": 2.160493636035721e-07, "loss": 1.5601736307144165, "step": 3478 }, { "epoch": 2.7480252764612954, "grad_norm": 8.216179392592059, "learning_rate": 2.1471506156145572e-07, "loss": 0.34426096081733704, "step": 3479 }, { "epoch": 2.748815165876777, "grad_norm": 12.700475916611518, "learning_rate": 2.133848021438134e-07, "loss": 0.3579084873199463, "step": 3480 }, { "epoch": 2.749605055292259, "grad_norm": 8.827090645854712, "learning_rate": 2.1205858647445175e-07, "loss": 0.3631330728530884, "step": 3481 }, { "epoch": 2.750394944707741, "grad_norm": 9.001270743632745, "learning_rate": 2.107364156737629e-07, "loss": 0.30411210656166077, "step": 3482 }, { "epoch": 2.751184834123223, "grad_norm": 9.409837583682123, "learning_rate": 2.0941829085872168e-07, "loss": 0.23499026894569397, "step": 3483 }, { "epoch": 2.7519747235387046, "grad_norm": 12.705050308923921, "learning_rate": 2.0810421314288342e-07, "loss": 1.2970982789993286, "step": 3484 }, { "epoch": 2.7527646129541865, "grad_norm": 8.03711225808964, "learning_rate": 2.067941836363857e-07, "loss": 0.5299091339111328, "step": 3485 }, { "epoch": 2.7535545023696684, "grad_norm": 11.220356334155515, "learning_rate": 2.0548820344594544e-07, "loss": 0.4197993278503418, "step": 3486 }, { "epoch": 2.7543443917851502, "grad_norm": 12.91965843833499, "learning_rate": 2.04186273674859e-07, "loss": 0.6132807731628418, "step": 3487 }, { "epoch": 2.7551342812006316, "grad_norm": 18.240887411131222, "learning_rate": 2.02888395423001e-07, "loss": 1.271854281425476, "step": 3488 }, { "epoch": 2.7559241706161135, "grad_norm": 10.732674912361663, "learning_rate": 2.0159456978682378e-07, "loss": 0.3073996901512146, "step": 3489 }, { "epoch": 2.7567140600315954, "grad_norm": 9.252251075885143, "learning_rate": 2.0030479785935532e-07, "loss": 0.2578376531600952, "step": 3490 }, { "epoch": 2.757503949447077, "grad_norm": 8.516995882822272, "learning_rate": 1.9901908073019837e-07, "loss": 0.519225001335144, "step": 3491 }, { "epoch": 2.758293838862559, "grad_norm": 8.43300426338786, "learning_rate": 1.9773741948553194e-07, "loss": 0.199580118060112, "step": 3492 }, { "epoch": 2.759083728278041, "grad_norm": 9.32908519660615, "learning_rate": 1.964598152081071e-07, "loss": 0.4582338333129883, "step": 3493 }, { "epoch": 2.759873617693523, "grad_norm": 9.046945945787343, "learning_rate": 1.9518626897724878e-07, "loss": 0.25943028926849365, "step": 3494 }, { "epoch": 2.7606635071090047, "grad_norm": 11.314219665735752, "learning_rate": 1.93916781868852e-07, "loss": 0.4258866608142853, "step": 3495 }, { "epoch": 2.7614533965244865, "grad_norm": 10.20689716332431, "learning_rate": 1.9265135495538488e-07, "loss": 0.271173357963562, "step": 3496 }, { "epoch": 2.7622432859399684, "grad_norm": 16.200438350903056, "learning_rate": 1.9138998930588348e-07, "loss": 0.4719555377960205, "step": 3497 }, { "epoch": 2.7630331753554502, "grad_norm": 11.738594703612247, "learning_rate": 1.901326859859537e-07, "loss": 0.980524480342865, "step": 3498 }, { "epoch": 2.763823064770932, "grad_norm": 17.919465561173997, "learning_rate": 1.888794460577692e-07, "loss": 0.9541090726852417, "step": 3499 }, { "epoch": 2.764612954186414, "grad_norm": 13.890459920478602, "learning_rate": 1.8763027058007145e-07, "loss": 0.69322669506073, "step": 3500 }, { "epoch": 2.765402843601896, "grad_norm": 16.321197407525634, "learning_rate": 1.8638516060816903e-07, "loss": 0.42115840315818787, "step": 3501 }, { "epoch": 2.7661927330173777, "grad_norm": 10.96242137226945, "learning_rate": 1.8514411719393445e-07, "loss": 0.46676504611968994, "step": 3502 }, { "epoch": 2.7669826224328595, "grad_norm": 8.812275087497374, "learning_rate": 1.8390714138580457e-07, "loss": 0.5044353604316711, "step": 3503 }, { "epoch": 2.7677725118483414, "grad_norm": 17.943245247505377, "learning_rate": 1.8267423422878073e-07, "loss": 0.41426870226860046, "step": 3504 }, { "epoch": 2.7685624012638232, "grad_norm": 10.71591258305487, "learning_rate": 1.814453967644264e-07, "loss": 0.6353201270103455, "step": 3505 }, { "epoch": 2.769352290679305, "grad_norm": 10.317665072167928, "learning_rate": 1.8022063003086952e-07, "loss": 0.20802390575408936, "step": 3506 }, { "epoch": 2.770142180094787, "grad_norm": 9.95179330655714, "learning_rate": 1.7899993506279577e-07, "loss": 0.38674095273017883, "step": 3507 }, { "epoch": 2.770932069510269, "grad_norm": 9.564138951257297, "learning_rate": 1.7778331289145246e-07, "loss": 0.37485527992248535, "step": 3508 }, { "epoch": 2.7717219589257502, "grad_norm": 11.837113131259644, "learning_rate": 1.765707645446446e-07, "loss": 1.245941162109375, "step": 3509 }, { "epoch": 2.772511848341232, "grad_norm": 16.993616371219062, "learning_rate": 1.7536229104673952e-07, "loss": 1.4415150880813599, "step": 3510 }, { "epoch": 2.773301737756714, "grad_norm": 18.46978395432566, "learning_rate": 1.741578934186583e-07, "loss": 0.48280513286590576, "step": 3511 }, { "epoch": 2.774091627172196, "grad_norm": 19.046571471804004, "learning_rate": 1.7295757267787982e-07, "loss": 0.6890305280685425, "step": 3512 }, { "epoch": 2.7748815165876777, "grad_norm": 8.374569669050457, "learning_rate": 1.717613298384402e-07, "loss": 0.17919717729091644, "step": 3513 }, { "epoch": 2.7756714060031595, "grad_norm": 14.297372965397592, "learning_rate": 1.7056916591092765e-07, "loss": 0.9247697591781616, "step": 3514 }, { "epoch": 2.7764612954186414, "grad_norm": 12.33201213532176, "learning_rate": 1.6938108190248714e-07, "loss": 0.7695714831352234, "step": 3515 }, { "epoch": 2.7772511848341233, "grad_norm": 11.395421501521598, "learning_rate": 1.681970788168158e-07, "loss": 0.5965884923934937, "step": 3516 }, { "epoch": 2.778041074249605, "grad_norm": 11.322776826507724, "learning_rate": 1.670171576541635e-07, "loss": 0.28291648626327515, "step": 3517 }, { "epoch": 2.778830963665087, "grad_norm": 9.052162465156956, "learning_rate": 1.658413194113312e-07, "loss": 0.26967617869377136, "step": 3518 }, { "epoch": 2.779620853080569, "grad_norm": 8.85381583315956, "learning_rate": 1.6466956508167098e-07, "loss": 0.27431443333625793, "step": 3519 }, { "epoch": 2.7804107424960507, "grad_norm": 10.26241792481637, "learning_rate": 1.635018956550849e-07, "loss": 0.5575605630874634, "step": 3520 }, { "epoch": 2.7812006319115326, "grad_norm": 19.04805022049209, "learning_rate": 1.6233831211802443e-07, "loss": 0.507323145866394, "step": 3521 }, { "epoch": 2.781990521327014, "grad_norm": 12.25593303035708, "learning_rate": 1.6117881545348768e-07, "loss": 0.7778584957122803, "step": 3522 }, { "epoch": 2.782780410742496, "grad_norm": 10.716935186355487, "learning_rate": 1.6002340664102222e-07, "loss": 0.4240133762359619, "step": 3523 }, { "epoch": 2.7835703001579777, "grad_norm": 10.691328997589723, "learning_rate": 1.588720866567206e-07, "loss": 0.3414255976676941, "step": 3524 }, { "epoch": 2.7843601895734595, "grad_norm": 11.768217491281503, "learning_rate": 1.57724856473222e-07, "loss": 0.41171273589134216, "step": 3525 }, { "epoch": 2.7851500789889414, "grad_norm": 11.135806497121866, "learning_rate": 1.5658171705971002e-07, "loss": 0.44822290539741516, "step": 3526 }, { "epoch": 2.7859399684044233, "grad_norm": 12.802286130199466, "learning_rate": 1.5544266938191277e-07, "loss": 0.5907123684883118, "step": 3527 }, { "epoch": 2.786729857819905, "grad_norm": 9.291881799567307, "learning_rate": 1.5430771440210102e-07, "loss": 0.5149095058441162, "step": 3528 }, { "epoch": 2.787519747235387, "grad_norm": 11.903535206167968, "learning_rate": 1.5317685307909003e-07, "loss": 0.3978126645088196, "step": 3529 }, { "epoch": 2.788309636650869, "grad_norm": 11.87266940877088, "learning_rate": 1.5205008636823392e-07, "loss": 0.7672142386436462, "step": 3530 }, { "epoch": 2.7890995260663507, "grad_norm": 12.664993847505396, "learning_rate": 1.509274152214285e-07, "loss": 0.6181859970092773, "step": 3531 }, { "epoch": 2.7898894154818326, "grad_norm": 11.706302565544545, "learning_rate": 1.4980884058711122e-07, "loss": 0.2257220596075058, "step": 3532 }, { "epoch": 2.7906793048973144, "grad_norm": 11.269026746440312, "learning_rate": 1.486943634102561e-07, "loss": 0.411458283662796, "step": 3533 }, { "epoch": 2.7914691943127963, "grad_norm": 11.770992911424287, "learning_rate": 1.4758398463237844e-07, "loss": 0.391770601272583, "step": 3534 }, { "epoch": 2.792259083728278, "grad_norm": 12.179827825593687, "learning_rate": 1.464777051915306e-07, "loss": 0.7870375514030457, "step": 3535 }, { "epoch": 2.79304897314376, "grad_norm": 10.205085198463788, "learning_rate": 1.4537552602229888e-07, "loss": 0.24328409135341644, "step": 3536 }, { "epoch": 2.793838862559242, "grad_norm": 11.707290493646688, "learning_rate": 1.4427744805580902e-07, "loss": 0.7062838077545166, "step": 3537 }, { "epoch": 2.7946287519747237, "grad_norm": 9.449178364796186, "learning_rate": 1.4318347221972118e-07, "loss": 0.4576803743839264, "step": 3538 }, { "epoch": 2.7954186413902056, "grad_norm": 8.800231574547174, "learning_rate": 1.420935994382294e-07, "loss": 0.6782954931259155, "step": 3539 }, { "epoch": 2.7962085308056874, "grad_norm": 20.41988700193614, "learning_rate": 1.4100783063206224e-07, "loss": 0.5219430923461914, "step": 3540 }, { "epoch": 2.7969984202211693, "grad_norm": 7.5166874724474715, "learning_rate": 1.3992616671847981e-07, "loss": 0.3515085279941559, "step": 3541 }, { "epoch": 2.7977883096366507, "grad_norm": 6.932947866396507, "learning_rate": 1.3884860861127558e-07, "loss": 0.47976720333099365, "step": 3542 }, { "epoch": 2.7985781990521326, "grad_norm": 9.491598002493626, "learning_rate": 1.377751572207753e-07, "loss": 0.6151677370071411, "step": 3543 }, { "epoch": 2.7993680884676144, "grad_norm": 9.194411344753396, "learning_rate": 1.3670581345383294e-07, "loss": 0.26150283217430115, "step": 3544 }, { "epoch": 2.8001579778830963, "grad_norm": 17.758549126307468, "learning_rate": 1.3564057821383426e-07, "loss": 0.48430135846138, "step": 3545 }, { "epoch": 2.800947867298578, "grad_norm": 10.109089794707371, "learning_rate": 1.345794524006938e-07, "loss": 0.4719211459159851, "step": 3546 }, { "epoch": 2.80173775671406, "grad_norm": 6.966698821581658, "learning_rate": 1.3352243691085343e-07, "loss": 0.34015411138534546, "step": 3547 }, { "epoch": 2.802527646129542, "grad_norm": 17.133059090598415, "learning_rate": 1.3246953263728323e-07, "loss": 0.662192165851593, "step": 3548 }, { "epoch": 2.8033175355450237, "grad_norm": 13.801086934095315, "learning_rate": 1.3142074046948117e-07, "loss": 0.48745739459991455, "step": 3549 }, { "epoch": 2.8041074249605056, "grad_norm": 9.455351798236373, "learning_rate": 1.3037606129346903e-07, "loss": 0.4470326602458954, "step": 3550 }, { "epoch": 2.8048973143759874, "grad_norm": 16.270417938160158, "learning_rate": 1.293354959917964e-07, "loss": 0.5577215552330017, "step": 3551 }, { "epoch": 2.8056872037914693, "grad_norm": 7.398103500346509, "learning_rate": 1.282990454435362e-07, "loss": 0.33731377124786377, "step": 3552 }, { "epoch": 2.806477093206951, "grad_norm": 11.993082049967715, "learning_rate": 1.2726671052428418e-07, "loss": 0.3734150528907776, "step": 3553 }, { "epoch": 2.807266982622433, "grad_norm": 13.661721906893305, "learning_rate": 1.26238492106161e-07, "loss": 0.2888742685317993, "step": 3554 }, { "epoch": 2.8080568720379144, "grad_norm": 10.344916708225794, "learning_rate": 1.2521439105780909e-07, "loss": 0.3093745708465576, "step": 3555 }, { "epoch": 2.8088467614533963, "grad_norm": 13.865074941166652, "learning_rate": 1.2419440824439188e-07, "loss": 0.4877317547798157, "step": 3556 }, { "epoch": 2.809636650868878, "grad_norm": 10.788397713560993, "learning_rate": 1.231785445275957e-07, "loss": 0.4557979702949524, "step": 3557 }, { "epoch": 2.81042654028436, "grad_norm": 10.285465020965095, "learning_rate": 1.2216680076562347e-07, "loss": 0.3419281840324402, "step": 3558 }, { "epoch": 2.811216429699842, "grad_norm": 8.877423938913571, "learning_rate": 1.2115917781320096e-07, "loss": 0.4035925269126892, "step": 3559 }, { "epoch": 2.8120063191153237, "grad_norm": 13.435791700322584, "learning_rate": 1.2015567652157057e-07, "loss": 0.6396586298942566, "step": 3560 }, { "epoch": 2.8127962085308056, "grad_norm": 19.702185429939885, "learning_rate": 1.1915629773849358e-07, "loss": 1.082403540611267, "step": 3561 }, { "epoch": 2.8135860979462874, "grad_norm": 11.822298832725835, "learning_rate": 1.1816104230825021e-07, "loss": 0.3863110840320587, "step": 3562 }, { "epoch": 2.8143759873617693, "grad_norm": 8.295210827383162, "learning_rate": 1.1716991107163289e-07, "loss": 0.58207106590271, "step": 3563 }, { "epoch": 2.815165876777251, "grad_norm": 14.2016386137118, "learning_rate": 1.1618290486595463e-07, "loss": 0.2098863422870636, "step": 3564 }, { "epoch": 2.815955766192733, "grad_norm": 10.082988120031807, "learning_rate": 1.1520002452503953e-07, "loss": 0.27094680070877075, "step": 3565 }, { "epoch": 2.816745655608215, "grad_norm": 5.875570359193218, "learning_rate": 1.1422127087923007e-07, "loss": 0.21748504042625427, "step": 3566 }, { "epoch": 2.8175355450236967, "grad_norm": 12.438687688635737, "learning_rate": 1.1324664475538038e-07, "loss": 0.5272513031959534, "step": 3567 }, { "epoch": 2.8183254344391786, "grad_norm": 10.889129102864676, "learning_rate": 1.1227614697685629e-07, "loss": 0.9930387139320374, "step": 3568 }, { "epoch": 2.8191153238546605, "grad_norm": 12.382867902327071, "learning_rate": 1.1130977836353862e-07, "loss": 0.3840116858482361, "step": 3569 }, { "epoch": 2.8199052132701423, "grad_norm": 9.33063650473277, "learning_rate": 1.1034753973181877e-07, "loss": 0.24336904287338257, "step": 3570 }, { "epoch": 2.820695102685624, "grad_norm": 15.374029447353267, "learning_rate": 1.0938943189459872e-07, "loss": 0.3328930735588074, "step": 3571 }, { "epoch": 2.821484992101106, "grad_norm": 10.569256308523215, "learning_rate": 1.0843545566129154e-07, "loss": 0.40318596363067627, "step": 3572 }, { "epoch": 2.822274881516588, "grad_norm": 11.764490990287323, "learning_rate": 1.074856118378198e-07, "loss": 0.2967602014541626, "step": 3573 }, { "epoch": 2.8230647709320698, "grad_norm": 11.711306728260894, "learning_rate": 1.0653990122661273e-07, "loss": 0.20409056544303894, "step": 3574 }, { "epoch": 2.8238546603475516, "grad_norm": 8.980141255462513, "learning_rate": 1.0559832462661234e-07, "loss": 0.4850717782974243, "step": 3575 }, { "epoch": 2.824644549763033, "grad_norm": 17.31606580117272, "learning_rate": 1.0466088283326404e-07, "loss": 0.6416319608688354, "step": 3576 }, { "epoch": 2.825434439178515, "grad_norm": 15.344712637864934, "learning_rate": 1.0372757663852262e-07, "loss": 0.46508005261421204, "step": 3577 }, { "epoch": 2.8262243285939967, "grad_norm": 8.57007968363552, "learning_rate": 1.0279840683084741e-07, "loss": 0.8428210616111755, "step": 3578 }, { "epoch": 2.8270142180094786, "grad_norm": 8.550821162807685, "learning_rate": 1.0187337419520493e-07, "loss": 0.682414710521698, "step": 3579 }, { "epoch": 2.8278041074249605, "grad_norm": 7.199168718842685, "learning_rate": 1.0095247951306508e-07, "loss": 0.25026553869247437, "step": 3580 }, { "epoch": 2.8285939968404423, "grad_norm": 12.81831188890812, "learning_rate": 1.0003572356240333e-07, "loss": 0.933559238910675, "step": 3581 }, { "epoch": 2.829383886255924, "grad_norm": 7.6333393554744156, "learning_rate": 9.912310711769734e-08, "loss": 0.26869896054267883, "step": 3582 }, { "epoch": 2.830173775671406, "grad_norm": 9.048728044949662, "learning_rate": 9.821463094992934e-08, "loss": 0.44904714822769165, "step": 3583 }, { "epoch": 2.830963665086888, "grad_norm": 10.975379290838536, "learning_rate": 9.731029582658258e-08, "loss": 0.7174665927886963, "step": 3584 }, { "epoch": 2.8317535545023698, "grad_norm": 9.80556755933507, "learning_rate": 9.641010251164263e-08, "loss": 0.47703051567077637, "step": 3585 }, { "epoch": 2.8325434439178516, "grad_norm": 9.546590438112691, "learning_rate": 9.551405176559558e-08, "loss": 0.32327979803085327, "step": 3586 }, { "epoch": 2.8333333333333335, "grad_norm": 11.700829296565805, "learning_rate": 9.46221443454276e-08, "loss": 0.7538062334060669, "step": 3587 }, { "epoch": 2.834123222748815, "grad_norm": 13.33437853882375, "learning_rate": 9.37343810046254e-08, "loss": 0.5941387414932251, "step": 3588 }, { "epoch": 2.8349131121642968, "grad_norm": 5.894312748053277, "learning_rate": 9.28507624931746e-08, "loss": 0.14909735321998596, "step": 3589 }, { "epoch": 2.8357030015797786, "grad_norm": 11.328914671419902, "learning_rate": 9.197128955755919e-08, "loss": 0.6000841856002808, "step": 3590 }, { "epoch": 2.8364928909952605, "grad_norm": 14.987543569871976, "learning_rate": 9.109596294075983e-08, "loss": 0.4695100784301758, "step": 3591 }, { "epoch": 2.8372827804107423, "grad_norm": 7.157405951875022, "learning_rate": 9.022478338225616e-08, "loss": 0.2676977217197418, "step": 3592 }, { "epoch": 2.838072669826224, "grad_norm": 10.025275842504612, "learning_rate": 8.935775161802274e-08, "loss": 0.6638664603233337, "step": 3593 }, { "epoch": 2.838862559241706, "grad_norm": 7.7262320503218005, "learning_rate": 8.849486838053201e-08, "loss": 0.2409912347793579, "step": 3594 }, { "epoch": 2.839652448657188, "grad_norm": 12.117641612900385, "learning_rate": 8.763613439875085e-08, "loss": 0.4687439203262329, "step": 3595 }, { "epoch": 2.8404423380726698, "grad_norm": 10.35658710588346, "learning_rate": 8.67815503981384e-08, "loss": 0.8544546961784363, "step": 3596 }, { "epoch": 2.8412322274881516, "grad_norm": 11.638767331627365, "learning_rate": 8.59311171006516e-08, "loss": 0.8178413510322571, "step": 3597 }, { "epoch": 2.8420221169036335, "grad_norm": 8.669410862880607, "learning_rate": 8.508483522473909e-08, "loss": 0.4358055293560028, "step": 3598 }, { "epoch": 2.8428120063191153, "grad_norm": 15.987554618443884, "learning_rate": 8.424270548534286e-08, "loss": 0.5043225884437561, "step": 3599 }, { "epoch": 2.843601895734597, "grad_norm": 33.576463764455724, "learning_rate": 8.340472859389714e-08, "loss": 0.38993752002716064, "step": 3600 }, { "epoch": 2.844391785150079, "grad_norm": 12.91658376574621, "learning_rate": 8.257090525832623e-08, "loss": 0.8641099333763123, "step": 3601 }, { "epoch": 2.845181674565561, "grad_norm": 7.742223493684328, "learning_rate": 8.17412361830472e-08, "loss": 0.36251041293144226, "step": 3602 }, { "epoch": 2.845971563981043, "grad_norm": 9.525833528037221, "learning_rate": 8.091572206896769e-08, "loss": 0.4047582745552063, "step": 3603 }, { "epoch": 2.8467614533965246, "grad_norm": 12.327238364324716, "learning_rate": 8.009436361348543e-08, "loss": 0.22032329440116882, "step": 3604 }, { "epoch": 2.8475513428120065, "grad_norm": 8.279880501050302, "learning_rate": 7.927716151048537e-08, "loss": 0.4643440246582031, "step": 3605 }, { "epoch": 2.8483412322274884, "grad_norm": 11.646960594230466, "learning_rate": 7.846411645034302e-08, "loss": 0.20467007160186768, "step": 3606 }, { "epoch": 2.84913112164297, "grad_norm": 11.370802049937053, "learning_rate": 7.765522911992229e-08, "loss": 0.27269160747528076, "step": 3607 }, { "epoch": 2.849921011058452, "grad_norm": 23.699981990614827, "learning_rate": 7.685050020257267e-08, "loss": 0.8710867762565613, "step": 3608 }, { "epoch": 2.8507109004739335, "grad_norm": 13.69662146387405, "learning_rate": 7.604993037813257e-08, "loss": 0.7823088765144348, "step": 3609 }, { "epoch": 2.8515007898894154, "grad_norm": 11.238494785694472, "learning_rate": 7.525352032292599e-08, "loss": 0.47667503356933594, "step": 3610 }, { "epoch": 2.852290679304897, "grad_norm": 11.90408448356978, "learning_rate": 7.44612707097625e-08, "loss": 0.4056919515132904, "step": 3611 }, { "epoch": 2.853080568720379, "grad_norm": 14.64818168207235, "learning_rate": 7.367318220793673e-08, "loss": 0.31270867586135864, "step": 3612 }, { "epoch": 2.853870458135861, "grad_norm": 8.786247946279532, "learning_rate": 7.288925548322945e-08, "loss": 0.18290819227695465, "step": 3613 }, { "epoch": 2.854660347551343, "grad_norm": 9.827541412175302, "learning_rate": 7.210949119790423e-08, "loss": 1.236111044883728, "step": 3614 }, { "epoch": 2.8554502369668247, "grad_norm": 7.902486340561014, "learning_rate": 7.133389001070801e-08, "loss": 0.19697430729866028, "step": 3615 }, { "epoch": 2.8562401263823065, "grad_norm": 10.36680879415055, "learning_rate": 7.056245257687166e-08, "loss": 0.4995325207710266, "step": 3616 }, { "epoch": 2.8570300157977884, "grad_norm": 8.029144851923272, "learning_rate": 6.979517954810777e-08, "loss": 0.35892802476882935, "step": 3617 }, { "epoch": 2.8578199052132702, "grad_norm": 10.443143901118459, "learning_rate": 6.903207157261116e-08, "loss": 0.6018155813217163, "step": 3618 }, { "epoch": 2.858609794628752, "grad_norm": 9.972612916820504, "learning_rate": 6.827312929505837e-08, "loss": 0.3768244683742523, "step": 3619 }, { "epoch": 2.859399684044234, "grad_norm": 9.967954749916855, "learning_rate": 6.75183533566065e-08, "loss": 0.6008504033088684, "step": 3620 }, { "epoch": 2.860189573459716, "grad_norm": 11.826572647957775, "learning_rate": 6.676774439489109e-08, "loss": 0.9027575254440308, "step": 3621 }, { "epoch": 2.860979462875197, "grad_norm": 7.566116639922751, "learning_rate": 6.602130304403153e-08, "loss": 0.3454285264015198, "step": 3622 }, { "epoch": 2.861769352290679, "grad_norm": 11.334110441287553, "learning_rate": 6.527902993462232e-08, "loss": 0.3822672963142395, "step": 3623 }, { "epoch": 2.862559241706161, "grad_norm": 30.42379993258823, "learning_rate": 6.454092569373849e-08, "loss": 1.1499581336975098, "step": 3624 }, { "epoch": 2.863349131121643, "grad_norm": 12.189705870905884, "learning_rate": 6.380699094493292e-08, "loss": 0.28113341331481934, "step": 3625 }, { "epoch": 2.8641390205371247, "grad_norm": 10.306143494702912, "learning_rate": 6.307722630823631e-08, "loss": 0.6603707671165466, "step": 3626 }, { "epoch": 2.8649289099526065, "grad_norm": 12.679232897956698, "learning_rate": 6.235163240015608e-08, "loss": 0.6723718047142029, "step": 3627 }, { "epoch": 2.8657187993680884, "grad_norm": 11.11612011580192, "learning_rate": 6.163020983367685e-08, "loss": 0.28444093465805054, "step": 3628 }, { "epoch": 2.8665086887835702, "grad_norm": 10.621322926022057, "learning_rate": 6.091295921825779e-08, "loss": 0.2627624571323395, "step": 3629 }, { "epoch": 2.867298578199052, "grad_norm": 9.584051079895877, "learning_rate": 6.019988115983533e-08, "loss": 0.6522977948188782, "step": 3630 }, { "epoch": 2.868088467614534, "grad_norm": 10.104067057656607, "learning_rate": 5.949097626081979e-08, "loss": 0.806284487247467, "step": 3631 }, { "epoch": 2.868878357030016, "grad_norm": 9.3150959160621, "learning_rate": 5.878624512009712e-08, "loss": 0.49238792061805725, "step": 3632 }, { "epoch": 2.8696682464454977, "grad_norm": 11.064857677412675, "learning_rate": 5.808568833302552e-08, "loss": 0.3209676146507263, "step": 3633 }, { "epoch": 2.8704581358609795, "grad_norm": 9.937366426134174, "learning_rate": 5.73893064914377e-08, "loss": 0.5651168823242188, "step": 3634 }, { "epoch": 2.8712480252764614, "grad_norm": 9.43706524762976, "learning_rate": 5.669710018364028e-08, "loss": 0.25006648898124695, "step": 3635 }, { "epoch": 2.8720379146919433, "grad_norm": 8.086894664969703, "learning_rate": 5.600906999441047e-08, "loss": 0.289908766746521, "step": 3636 }, { "epoch": 2.872827804107425, "grad_norm": 11.049864934040414, "learning_rate": 5.5325216504999445e-08, "loss": 0.2839067578315735, "step": 3637 }, { "epoch": 2.873617693522907, "grad_norm": 11.326901492566288, "learning_rate": 5.464554029312785e-08, "loss": 0.4851897358894348, "step": 3638 }, { "epoch": 2.874407582938389, "grad_norm": 9.870517653518927, "learning_rate": 5.39700419329886e-08, "loss": 0.3466881215572357, "step": 3639 }, { "epoch": 2.8751974723538707, "grad_norm": 9.471904879535721, "learning_rate": 5.329872199524577e-08, "loss": 0.41402703523635864, "step": 3640 }, { "epoch": 2.8759873617693525, "grad_norm": 11.444565675892793, "learning_rate": 5.263158104703125e-08, "loss": 0.7139902710914612, "step": 3641 }, { "epoch": 2.876777251184834, "grad_norm": 9.863961110827145, "learning_rate": 5.196861965194922e-08, "loss": 0.4099463224411011, "step": 3642 }, { "epoch": 2.877567140600316, "grad_norm": 14.395860031992335, "learning_rate": 5.1309838370071086e-08, "loss": 0.635336697101593, "step": 3643 }, { "epoch": 2.8783570300157977, "grad_norm": 11.549260196243356, "learning_rate": 5.0655237757937236e-08, "loss": 0.5518008470535278, "step": 3644 }, { "epoch": 2.8791469194312795, "grad_norm": 16.914171209055556, "learning_rate": 5.000481836855697e-08, "loss": 0.5935429930686951, "step": 3645 }, { "epoch": 2.8799368088467614, "grad_norm": 10.519627347888093, "learning_rate": 4.935858075140631e-08, "loss": 0.2353929877281189, "step": 3646 }, { "epoch": 2.8807266982622433, "grad_norm": 10.147294910154294, "learning_rate": 4.8716525452428556e-08, "loss": 0.5117212533950806, "step": 3647 }, { "epoch": 2.881516587677725, "grad_norm": 7.79219575426103, "learning_rate": 4.807865301403536e-08, "loss": 0.29170703887939453, "step": 3648 }, { "epoch": 2.882306477093207, "grad_norm": 9.052236405274959, "learning_rate": 4.744496397510234e-08, "loss": 0.34299367666244507, "step": 3649 }, { "epoch": 2.883096366508689, "grad_norm": 12.457687363908496, "learning_rate": 4.681545887097239e-08, "loss": 0.33234238624572754, "step": 3650 }, { "epoch": 2.8838862559241707, "grad_norm": 13.05592857803276, "learning_rate": 4.6190138233453976e-08, "loss": 0.27858805656433105, "step": 3651 }, { "epoch": 2.8846761453396526, "grad_norm": 10.916182057073014, "learning_rate": 4.556900259081898e-08, "loss": 0.6553702354431152, "step": 3652 }, { "epoch": 2.8854660347551344, "grad_norm": 9.39192402230028, "learning_rate": 4.495205246780543e-08, "loss": 0.5300555229187012, "step": 3653 }, { "epoch": 2.8862559241706163, "grad_norm": 5.7960707776723295, "learning_rate": 4.43392883856153e-08, "loss": 0.24335479736328125, "step": 3654 }, { "epoch": 2.8870458135860977, "grad_norm": 7.039387389488856, "learning_rate": 4.373071086191338e-08, "loss": 0.3162982761859894, "step": 3655 }, { "epoch": 2.8878357030015795, "grad_norm": 10.4351163530241, "learning_rate": 4.312632041082787e-08, "loss": 0.36064600944519043, "step": 3656 }, { "epoch": 2.8886255924170614, "grad_norm": 11.795988539884263, "learning_rate": 4.2526117542949774e-08, "loss": 0.343291312456131, "step": 3657 }, { "epoch": 2.8894154818325433, "grad_norm": 10.393934212831047, "learning_rate": 4.193010276533183e-08, "loss": 0.6907198429107666, "step": 3658 }, { "epoch": 2.890205371248025, "grad_norm": 9.56184934798155, "learning_rate": 4.13382765814907e-08, "loss": 0.7030273675918579, "step": 3659 }, { "epoch": 2.890995260663507, "grad_norm": 7.063388291601224, "learning_rate": 4.075063949140201e-08, "loss": 0.17387212812900543, "step": 3660 }, { "epoch": 2.891785150078989, "grad_norm": 8.661731775267791, "learning_rate": 4.016719199150365e-08, "loss": 0.5848299264907837, "step": 3661 }, { "epoch": 2.8925750394944707, "grad_norm": 16.72243976052531, "learning_rate": 3.958793457469412e-08, "loss": 0.4991316795349121, "step": 3662 }, { "epoch": 2.8933649289099526, "grad_norm": 14.820335568585508, "learning_rate": 3.901286773033253e-08, "loss": 0.30617228150367737, "step": 3663 }, { "epoch": 2.8941548183254344, "grad_norm": 13.043213388040584, "learning_rate": 3.8441991944236365e-08, "loss": 0.35002079606056213, "step": 3664 }, { "epoch": 2.8949447077409163, "grad_norm": 7.380501973738135, "learning_rate": 3.787530769868431e-08, "loss": 0.14187008142471313, "step": 3665 }, { "epoch": 2.895734597156398, "grad_norm": 9.224074386705713, "learning_rate": 3.731281547241228e-08, "loss": 0.1989508867263794, "step": 3666 }, { "epoch": 2.89652448657188, "grad_norm": 10.063244268865743, "learning_rate": 3.675451574061517e-08, "loss": 0.33559074997901917, "step": 3667 }, { "epoch": 2.897314375987362, "grad_norm": 17.367117559815597, "learning_rate": 3.620040897494737e-08, "loss": 0.7757275104522705, "step": 3668 }, { "epoch": 2.8981042654028437, "grad_norm": 14.80922413991561, "learning_rate": 3.565049564351997e-08, "loss": 0.5378328561782837, "step": 3669 }, { "epoch": 2.8988941548183256, "grad_norm": 11.674232729531763, "learning_rate": 3.510477621090192e-08, "loss": 0.5047122240066528, "step": 3670 }, { "epoch": 2.8996840442338074, "grad_norm": 9.61930347404772, "learning_rate": 3.456325113811776e-08, "loss": 0.22347621619701385, "step": 3671 }, { "epoch": 2.9004739336492893, "grad_norm": 12.428464114281123, "learning_rate": 3.4025920882649886e-08, "loss": 0.2939353585243225, "step": 3672 }, { "epoch": 2.901263823064771, "grad_norm": 11.787785169855981, "learning_rate": 3.3492785898437407e-08, "loss": 0.6785522699356079, "step": 3673 }, { "epoch": 2.902053712480253, "grad_norm": 10.424690477560073, "learning_rate": 3.296384663587338e-08, "loss": 0.9920533895492554, "step": 3674 }, { "epoch": 2.902843601895735, "grad_norm": 9.203768626957741, "learning_rate": 3.243910354180868e-08, "loss": 0.6544739007949829, "step": 3675 }, { "epoch": 2.9036334913112163, "grad_norm": 8.198475817478249, "learning_rate": 3.1918557059547605e-08, "loss": 0.2281288057565689, "step": 3676 }, { "epoch": 2.904423380726698, "grad_norm": 10.800632093569277, "learning_rate": 3.1402207628848935e-08, "loss": 0.2759685218334198, "step": 3677 }, { "epoch": 2.90521327014218, "grad_norm": 11.368103163964738, "learning_rate": 3.089005568592707e-08, "loss": 0.4425602853298187, "step": 3678 }, { "epoch": 2.906003159557662, "grad_norm": 15.676906486716955, "learning_rate": 3.038210166344924e-08, "loss": 0.9129424691200256, "step": 3679 }, { "epoch": 2.9067930489731437, "grad_norm": 7.319860581451804, "learning_rate": 2.9878345990536626e-08, "loss": 0.5854052305221558, "step": 3680 }, { "epoch": 2.9075829383886256, "grad_norm": 9.087306671915462, "learning_rate": 2.9378789092763816e-08, "loss": 0.2461828887462616, "step": 3681 }, { "epoch": 2.9083728278041074, "grad_norm": 9.89538463931709, "learning_rate": 2.8883431392158768e-08, "loss": 0.6880824565887451, "step": 3682 }, { "epoch": 2.9091627172195893, "grad_norm": 12.748919800585687, "learning_rate": 2.8392273307200068e-08, "loss": 0.9422566294670105, "step": 3683 }, { "epoch": 2.909952606635071, "grad_norm": 8.421188816814379, "learning_rate": 2.7905315252820808e-08, "loss": 0.4385561943054199, "step": 3684 }, { "epoch": 2.910742496050553, "grad_norm": 14.078145544877923, "learning_rate": 2.7422557640404135e-08, "loss": 0.5254925489425659, "step": 3685 }, { "epoch": 2.911532385466035, "grad_norm": 9.082888238767863, "learning_rate": 2.6944000877784925e-08, "loss": 0.5949431657791138, "step": 3686 }, { "epoch": 2.9123222748815167, "grad_norm": 13.607392988749478, "learning_rate": 2.646964536925034e-08, "loss": 0.691329300403595, "step": 3687 }, { "epoch": 2.913112164296998, "grad_norm": 10.530634100048369, "learning_rate": 2.59994915155376e-08, "loss": 0.4718400239944458, "step": 3688 }, { "epoch": 2.91390205371248, "grad_norm": 10.59452097306126, "learning_rate": 2.5533539713832878e-08, "loss": 0.33786359429359436, "step": 3689 }, { "epoch": 2.914691943127962, "grad_norm": 11.039511779455802, "learning_rate": 2.5071790357775183e-08, "loss": 0.8592206239700317, "step": 3690 }, { "epoch": 2.9154818325434437, "grad_norm": 11.096868713143294, "learning_rate": 2.461424383745137e-08, "loss": 0.5247258543968201, "step": 3691 }, { "epoch": 2.9162717219589256, "grad_norm": 9.7943810808923, "learning_rate": 2.4160900539397792e-08, "loss": 0.4834524393081665, "step": 3692 }, { "epoch": 2.9170616113744074, "grad_norm": 14.220945945722814, "learning_rate": 2.3711760846601427e-08, "loss": 0.49882930517196655, "step": 3693 }, { "epoch": 2.9178515007898893, "grad_norm": 9.444978699118497, "learning_rate": 2.326682513849654e-08, "loss": 0.4514763653278351, "step": 3694 }, { "epoch": 2.918641390205371, "grad_norm": 16.735071211993855, "learning_rate": 2.282609379096523e-08, "loss": 0.413520872592926, "step": 3695 }, { "epoch": 2.919431279620853, "grad_norm": 10.18649650301645, "learning_rate": 2.238956717634022e-08, "loss": 0.3149523138999939, "step": 3696 }, { "epoch": 2.920221169036335, "grad_norm": 7.242383152098311, "learning_rate": 2.1957245663399852e-08, "loss": 0.42863062024116516, "step": 3697 }, { "epoch": 2.9210110584518167, "grad_norm": 12.249847711419223, "learning_rate": 2.1529129617371414e-08, "loss": 0.3425447344779968, "step": 3698 }, { "epoch": 2.9218009478672986, "grad_norm": 21.564346952177747, "learning_rate": 2.1105219399927823e-08, "loss": 0.6913363933563232, "step": 3699 }, { "epoch": 2.9225908372827805, "grad_norm": 14.247309861203046, "learning_rate": 2.068551536919039e-08, "loss": 0.5754636526107788, "step": 3700 }, { "epoch": 2.9233807266982623, "grad_norm": 14.729656431110298, "learning_rate": 2.02700178797266e-08, "loss": 0.1754104197025299, "step": 3701 }, { "epoch": 2.924170616113744, "grad_norm": 10.378294101333756, "learning_rate": 1.9858727282549563e-08, "loss": 0.5599175095558167, "step": 3702 }, { "epoch": 2.924960505529226, "grad_norm": 7.713686434655625, "learning_rate": 1.9451643925119667e-08, "loss": 0.429149329662323, "step": 3703 }, { "epoch": 2.925750394944708, "grad_norm": 18.397053591718155, "learning_rate": 1.9048768151341825e-08, "loss": 1.2451764345169067, "step": 3704 }, { "epoch": 2.9265402843601898, "grad_norm": 11.278121609485279, "learning_rate": 1.8650100301566553e-08, "loss": 0.2728927433490753, "step": 3705 }, { "epoch": 2.9273301737756716, "grad_norm": 12.412783868000135, "learning_rate": 1.8255640712590004e-08, "loss": 0.39738449454307556, "step": 3706 }, { "epoch": 2.9281200631911535, "grad_norm": 14.573969152453861, "learning_rate": 1.7865389717653392e-08, "loss": 0.4282408356666565, "step": 3707 }, { "epoch": 2.9289099526066353, "grad_norm": 12.27556396872362, "learning_rate": 1.7479347646441323e-08, "loss": 0.573512852191925, "step": 3708 }, { "epoch": 2.9296998420221168, "grad_norm": 13.78449081143613, "learning_rate": 1.709751482508404e-08, "loss": 0.4519804120063782, "step": 3709 }, { "epoch": 2.9304897314375986, "grad_norm": 8.515540650947582, "learning_rate": 1.6719891576154612e-08, "loss": 0.6025781035423279, "step": 3710 }, { "epoch": 2.9312796208530805, "grad_norm": 9.597411743199306, "learning_rate": 1.634647821867119e-08, "loss": 0.15691038966178894, "step": 3711 }, { "epoch": 2.9320695102685623, "grad_norm": 10.486595198764013, "learning_rate": 1.5977275068093657e-08, "loss": 0.22191372513771057, "step": 3712 }, { "epoch": 2.932859399684044, "grad_norm": 11.847663895398458, "learning_rate": 1.56122824363264e-08, "loss": 0.5614909529685974, "step": 3713 }, { "epoch": 2.933649289099526, "grad_norm": 13.508354632592122, "learning_rate": 1.52515006317161e-08, "loss": 0.42124590277671814, "step": 3714 }, { "epoch": 2.934439178515008, "grad_norm": 12.085187248497775, "learning_rate": 1.4894929959053395e-08, "loss": 0.39891332387924194, "step": 3715 }, { "epoch": 2.9352290679304898, "grad_norm": 11.772324248757105, "learning_rate": 1.4542570719569549e-08, "loss": 0.743476152420044, "step": 3716 }, { "epoch": 2.9360189573459716, "grad_norm": 13.824383157557017, "learning_rate": 1.4194423210939223e-08, "loss": 0.7813572883605957, "step": 3717 }, { "epoch": 2.9368088467614535, "grad_norm": 12.023055278816088, "learning_rate": 1.3850487727278261e-08, "loss": 0.18068841099739075, "step": 3718 }, { "epoch": 2.9375987361769353, "grad_norm": 9.004344869826209, "learning_rate": 1.351076455914424e-08, "loss": 0.35265034437179565, "step": 3719 }, { "epoch": 2.938388625592417, "grad_norm": 6.411410825512506, "learning_rate": 1.3175253993537584e-08, "loss": 0.1986955851316452, "step": 3720 }, { "epoch": 2.9391785150078986, "grad_norm": 15.542536754284288, "learning_rate": 1.284395631389712e-08, "loss": 1.2308954000473022, "step": 3721 }, { "epoch": 2.9399684044233805, "grad_norm": 11.589586159826656, "learning_rate": 1.251687180010508e-08, "loss": 0.3059263229370117, "step": 3722 }, { "epoch": 2.9407582938388623, "grad_norm": 11.848666416715453, "learning_rate": 1.2194000728483758e-08, "loss": 0.381493479013443, "step": 3723 }, { "epoch": 2.941548183254344, "grad_norm": 9.762685828075522, "learning_rate": 1.1875343371795522e-08, "loss": 0.3416656255722046, "step": 3724 }, { "epoch": 2.942338072669826, "grad_norm": 13.146351843743089, "learning_rate": 1.1560899999242813e-08, "loss": 0.24065348505973816, "step": 3725 }, { "epoch": 2.943127962085308, "grad_norm": 8.753449681389933, "learning_rate": 1.1250670876468694e-08, "loss": 0.5435752868652344, "step": 3726 }, { "epoch": 2.9439178515007898, "grad_norm": 9.571620847460116, "learning_rate": 1.0944656265555186e-08, "loss": 0.5217230319976807, "step": 3727 }, { "epoch": 2.9447077409162716, "grad_norm": 13.736435986610593, "learning_rate": 1.0642856425025494e-08, "loss": 0.30995261669158936, "step": 3728 }, { "epoch": 2.9454976303317535, "grad_norm": 11.819774523695497, "learning_rate": 1.0345271609840668e-08, "loss": 0.27620571851730347, "step": 3729 }, { "epoch": 2.9462875197472354, "grad_norm": 15.156276372780589, "learning_rate": 1.0051902071401276e-08, "loss": 0.6031606793403625, "step": 3730 }, { "epoch": 2.947077409162717, "grad_norm": 12.80904870667671, "learning_rate": 9.762748057546845e-09, "loss": 0.4767545461654663, "step": 3731 }, { "epoch": 2.947867298578199, "grad_norm": 9.951774811263586, "learning_rate": 9.477809812555306e-09, "loss": 0.6249617338180542, "step": 3732 }, { "epoch": 2.948657187993681, "grad_norm": 8.066686549216158, "learning_rate": 9.197087577144104e-09, "loss": 0.2534811198711395, "step": 3733 }, { "epoch": 2.949447077409163, "grad_norm": 14.503347622672706, "learning_rate": 8.92058158846798e-09, "loss": 0.259134441614151, "step": 3734 }, { "epoch": 2.9502369668246446, "grad_norm": 12.45013867650853, "learning_rate": 8.648292080119524e-09, "loss": 0.7195329070091248, "step": 3735 }, { "epoch": 2.9510268562401265, "grad_norm": 14.526124154717072, "learning_rate": 8.38021928213084e-09, "loss": 0.5841654539108276, "step": 3736 }, { "epoch": 2.9518167456556084, "grad_norm": 9.850968529140541, "learning_rate": 8.11636342097022e-09, "loss": 0.36491304636001587, "step": 3737 }, { "epoch": 2.9526066350710902, "grad_norm": 10.07563348526924, "learning_rate": 7.856724719544351e-09, "loss": 0.4188252091407776, "step": 3738 }, { "epoch": 2.953396524486572, "grad_norm": 22.212639293967655, "learning_rate": 7.601303397196113e-09, "loss": 0.3442292809486389, "step": 3739 }, { "epoch": 2.954186413902054, "grad_norm": 14.029241381991797, "learning_rate": 7.350099669706235e-09, "loss": 0.21167263388633728, "step": 3740 }, { "epoch": 2.954976303317536, "grad_norm": 12.136596175552866, "learning_rate": 7.103113749293844e-09, "loss": 0.6398590207099915, "step": 3741 }, { "epoch": 2.955766192733017, "grad_norm": 9.741023971909382, "learning_rate": 6.860345844612038e-09, "loss": 0.5108597278594971, "step": 3742 }, { "epoch": 2.956556082148499, "grad_norm": 7.655686126361488, "learning_rate": 6.621796160752314e-09, "loss": 0.36386775970458984, "step": 3743 }, { "epoch": 2.957345971563981, "grad_norm": 10.018875222198334, "learning_rate": 6.387464899242357e-09, "loss": 0.30166739225387573, "step": 3744 }, { "epoch": 2.958135860979463, "grad_norm": 9.095182318951428, "learning_rate": 6.1573522580460346e-09, "loss": 0.45996037125587463, "step": 3745 }, { "epoch": 2.9589257503949447, "grad_norm": 10.26232984567016, "learning_rate": 5.931458431562842e-09, "loss": 0.5725584626197815, "step": 3746 }, { "epoch": 2.9597156398104265, "grad_norm": 8.756926533099401, "learning_rate": 5.709783610629571e-09, "loss": 0.29128578305244446, "step": 3747 }, { "epoch": 2.9605055292259084, "grad_norm": 11.68804184137246, "learning_rate": 5.492327982516976e-09, "loss": 0.5195301175117493, "step": 3748 }, { "epoch": 2.9612954186413902, "grad_norm": 10.545825985768683, "learning_rate": 5.2790917309325505e-09, "loss": 0.5382466912269592, "step": 3749 }, { "epoch": 2.962085308056872, "grad_norm": 11.350603837614045, "learning_rate": 5.070075036018863e-09, "loss": 0.43567579984664917, "step": 3750 }, { "epoch": 2.962875197472354, "grad_norm": 13.409834921169738, "learning_rate": 4.865278074354107e-09, "loss": 0.4768742322921753, "step": 3751 }, { "epoch": 2.963665086887836, "grad_norm": 14.469370990431916, "learning_rate": 4.664701018951001e-09, "loss": 0.48725759983062744, "step": 3752 }, { "epoch": 2.9644549763033177, "grad_norm": 10.832912501886673, "learning_rate": 4.468344039258443e-09, "loss": 0.2995648980140686, "step": 3753 }, { "epoch": 2.9652448657187995, "grad_norm": 17.54919278880869, "learning_rate": 4.2762073011592966e-09, "loss": 0.6087955832481384, "step": 3754 }, { "epoch": 2.966034755134281, "grad_norm": 10.31999554844462, "learning_rate": 4.088290966970943e-09, "loss": 0.8731129169464111, "step": 3755 }, { "epoch": 2.966824644549763, "grad_norm": 10.898544647919328, "learning_rate": 3.904595195445837e-09, "loss": 0.7485541105270386, "step": 3756 }, { "epoch": 2.9676145339652447, "grad_norm": 13.203065381428397, "learning_rate": 3.7251201417703996e-09, "loss": 0.3719606399536133, "step": 3757 }, { "epoch": 2.9684044233807265, "grad_norm": 8.052328293519382, "learning_rate": 3.5498659575666783e-09, "loss": 0.2648746371269226, "step": 3758 }, { "epoch": 2.9691943127962084, "grad_norm": 12.815000426949975, "learning_rate": 3.3788327908890196e-09, "loss": 0.3461490869522095, "step": 3759 }, { "epoch": 2.9699842022116902, "grad_norm": 7.421454114527485, "learning_rate": 3.212020786227399e-09, "loss": 0.3270190358161926, "step": 3760 }, { "epoch": 2.970774091627172, "grad_norm": 10.632206347492682, "learning_rate": 3.0494300845052006e-09, "loss": 0.48663121461868286, "step": 3761 }, { "epoch": 2.971563981042654, "grad_norm": 9.052624248155158, "learning_rate": 2.8910608230786617e-09, "loss": 0.3653707206249237, "step": 3762 }, { "epoch": 2.972353870458136, "grad_norm": 21.507879887876314, "learning_rate": 2.7369131357402045e-09, "loss": 0.6693405508995056, "step": 3763 }, { "epoch": 2.9731437598736177, "grad_norm": 11.871106908449853, "learning_rate": 2.586987152712883e-09, "loss": 0.3541383445262909, "step": 3764 }, { "epoch": 2.9739336492890995, "grad_norm": 9.347340281030029, "learning_rate": 2.441283000655381e-09, "loss": 0.2928946614265442, "step": 3765 }, { "epoch": 2.9747235387045814, "grad_norm": 14.20794932298081, "learning_rate": 2.2998008026592356e-09, "loss": 0.8095124363899231, "step": 3766 }, { "epoch": 2.9755134281200633, "grad_norm": 19.781359673109964, "learning_rate": 2.1625406782482817e-09, "loss": 0.9541232585906982, "step": 3767 }, { "epoch": 2.976303317535545, "grad_norm": 12.307332036811797, "learning_rate": 2.0295027433808736e-09, "loss": 1.270265817642212, "step": 3768 }, { "epoch": 2.977093206951027, "grad_norm": 9.268483761306474, "learning_rate": 1.9006871104482183e-09, "loss": 0.3142393231391907, "step": 3769 }, { "epoch": 2.977883096366509, "grad_norm": 12.935271250550574, "learning_rate": 1.776093888273267e-09, "loss": 2.3558170795440674, "step": 3770 }, { "epoch": 2.9786729857819907, "grad_norm": 14.14628000993969, "learning_rate": 1.655723182113489e-09, "loss": 0.3965626657009125, "step": 3771 }, { "epoch": 2.9794628751974725, "grad_norm": 10.694643173228462, "learning_rate": 1.5395750936580967e-09, "loss": 0.3927484452724457, "step": 3772 }, { "epoch": 2.9802527646129544, "grad_norm": 15.868269189008021, "learning_rate": 1.4276497210297114e-09, "loss": 0.2537468671798706, "step": 3773 }, { "epoch": 2.9810426540284363, "grad_norm": 13.865339454139647, "learning_rate": 1.3199471587832524e-09, "loss": 0.46213477849960327, "step": 3774 }, { "epoch": 2.981832543443918, "grad_norm": 12.317620064332576, "learning_rate": 1.2164674979059378e-09, "loss": 0.6187537312507629, "step": 3775 }, { "epoch": 2.9826224328593995, "grad_norm": 12.70811742821486, "learning_rate": 1.1172108258178382e-09, "loss": 0.9330609440803528, "step": 3776 }, { "epoch": 2.9834123222748814, "grad_norm": 12.521602937647542, "learning_rate": 1.0221772263707686e-09, "loss": 0.48245567083358765, "step": 3777 }, { "epoch": 2.9842022116903633, "grad_norm": 13.143173870731685, "learning_rate": 9.313667798505065e-10, "loss": 0.4393605887889862, "step": 3778 }, { "epoch": 2.984992101105845, "grad_norm": 7.047235748717833, "learning_rate": 8.447795629734634e-10, "loss": 0.32978931069374084, "step": 3779 }, { "epoch": 2.985781990521327, "grad_norm": 8.024033854946516, "learning_rate": 7.624156488883483e-10, "loss": 0.41405847668647766, "step": 3780 }, { "epoch": 2.986571879936809, "grad_norm": 22.169614392633356, "learning_rate": 6.842751071772791e-10, "loss": 0.6391655206680298, "step": 3781 }, { "epoch": 2.9873617693522907, "grad_norm": 12.141908595978165, "learning_rate": 6.103580038530066e-10, "loss": 0.5002020001411438, "step": 3782 }, { "epoch": 2.9881516587677726, "grad_norm": 11.596188780257132, "learning_rate": 5.406644013616902e-10, "loss": 0.745347261428833, "step": 3783 }, { "epoch": 2.9889415481832544, "grad_norm": 16.660637478949464, "learning_rate": 4.751943585801222e-10, "loss": 0.6414915919303894, "step": 3784 }, { "epoch": 2.9897314375987363, "grad_norm": 10.793044072751652, "learning_rate": 4.1394793081739324e-10, "loss": 0.5143662691116333, "step": 3785 }, { "epoch": 2.990521327014218, "grad_norm": 10.877705766316154, "learning_rate": 3.5692516981544744e-10, "loss": 0.35477590560913086, "step": 3786 }, { "epoch": 2.9913112164297, "grad_norm": 10.463002285868905, "learning_rate": 3.041261237463067e-10, "loss": 0.5146927833557129, "step": 3787 }, { "epoch": 2.9921011058451814, "grad_norm": 12.683003185650735, "learning_rate": 2.5555083721595654e-10, "loss": 0.7653088569641113, "step": 3788 }, { "epoch": 2.9928909952606633, "grad_norm": 11.791360620237418, "learning_rate": 2.1119935125990532e-10, "loss": 0.3955351710319519, "step": 3789 }, { "epoch": 2.993680884676145, "grad_norm": 18.909176585649405, "learning_rate": 1.710717033470699e-10, "loss": 0.6973004341125488, "step": 3790 }, { "epoch": 2.994470774091627, "grad_norm": 10.889260209523764, "learning_rate": 1.3516792737700014e-10, "loss": 0.3600936233997345, "step": 3791 }, { "epoch": 2.995260663507109, "grad_norm": 8.488379905621288, "learning_rate": 1.0348805368209924e-10, "loss": 0.3776974678039551, "step": 3792 }, { "epoch": 2.9960505529225907, "grad_norm": 7.3688565178666146, "learning_rate": 7.603210902484837e-11, "loss": 0.2519044876098633, "step": 3793 }, { "epoch": 2.9968404423380726, "grad_norm": 9.212568490304637, "learning_rate": 5.280011660002693e-11, "loss": 0.3771563470363617, "step": 3794 }, { "epoch": 2.9976303317535544, "grad_norm": 10.294914027544094, "learning_rate": 3.3792096034712675e-11, "loss": 0.35696250200271606, "step": 3795 }, { "epoch": 2.9984202211690363, "grad_norm": 6.820884906019979, "learning_rate": 1.9008063386616315e-11, "loss": 0.24771413207054138, "step": 3796 }, { "epoch": 2.999210110584518, "grad_norm": 11.182619500206162, "learning_rate": 8.448031145746883e-12, "loss": 0.6118890047073364, "step": 3797 }, { "epoch": 3.0, "grad_norm": 11.835933309637019, "learning_rate": 2.1120082327463766e-12, "loss": 0.2823507785797119, "step": 3798 }, { "epoch": 3.0, "step": 3798, "total_flos": 10133269585920.0, "train_loss": 1.6510612177336417, "train_runtime": 4328.0202, "train_samples_per_second": 3.51, "train_steps_per_second": 0.878 } ], "logging_steps": 1, "max_steps": 3798, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 10133269585920.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }