{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1887, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001589825119236884, "grad_norm": 35.49121667328633, "learning_rate": 0.0, "loss": 3.658745765686035, "step": 1 }, { "epoch": 0.003179650238473768, "grad_norm": 32.412269860175655, "learning_rate": 5.291005291005291e-08, "loss": 4.507900238037109, "step": 2 }, { "epoch": 0.0047694753577106515, "grad_norm": 33.443946853548105, "learning_rate": 1.0582010582010582e-07, "loss": 3.916531562805176, "step": 3 }, { "epoch": 0.006359300476947536, "grad_norm": 37.291251000399825, "learning_rate": 1.5873015873015874e-07, "loss": 3.8956263065338135, "step": 4 }, { "epoch": 0.00794912559618442, "grad_norm": 41.0555838913476, "learning_rate": 2.1164021164021165e-07, "loss": 4.495701789855957, "step": 5 }, { "epoch": 0.009538950715421303, "grad_norm": 38.94745771005691, "learning_rate": 2.6455026455026455e-07, "loss": 4.289466857910156, "step": 6 }, { "epoch": 0.011128775834658187, "grad_norm": 42.29901893801629, "learning_rate": 3.174603174603175e-07, "loss": 4.111724853515625, "step": 7 }, { "epoch": 0.012718600953895072, "grad_norm": 33.45027607438258, "learning_rate": 3.7037037037037036e-07, "loss": 3.8888838291168213, "step": 8 }, { "epoch": 0.014308426073131956, "grad_norm": 32.67762061785612, "learning_rate": 4.232804232804233e-07, "loss": 3.9692318439483643, "step": 9 }, { "epoch": 0.01589825119236884, "grad_norm": 36.061843620608656, "learning_rate": 4.7619047619047623e-07, "loss": 4.238317489624023, "step": 10 }, { "epoch": 0.017488076311605722, "grad_norm": 32.52520374971258, "learning_rate": 5.291005291005291e-07, "loss": 4.491572380065918, "step": 11 }, { "epoch": 0.019077901430842606, "grad_norm": 38.448364167095846, "learning_rate": 5.82010582010582e-07, "loss": 4.081965446472168, "step": 12 }, { "epoch": 0.02066772655007949, "grad_norm": 38.243259913890064, "learning_rate": 6.34920634920635e-07, "loss": 3.595273971557617, "step": 13 }, { "epoch": 0.022257551669316374, "grad_norm": 34.58522048956281, "learning_rate": 6.878306878306879e-07, "loss": 3.703660011291504, "step": 14 }, { "epoch": 0.02384737678855326, "grad_norm": 29.350503975224882, "learning_rate": 7.407407407407407e-07, "loss": 3.525111675262451, "step": 15 }, { "epoch": 0.025437201907790145, "grad_norm": 27.96937416222195, "learning_rate": 7.936507936507937e-07, "loss": 3.422595977783203, "step": 16 }, { "epoch": 0.02702702702702703, "grad_norm": 26.409953728370734, "learning_rate": 8.465608465608466e-07, "loss": 2.9090871810913086, "step": 17 }, { "epoch": 0.028616852146263912, "grad_norm": 31.509950255139216, "learning_rate": 8.994708994708995e-07, "loss": 4.4084930419921875, "step": 18 }, { "epoch": 0.030206677265500796, "grad_norm": 26.354701063294822, "learning_rate": 9.523809523809525e-07, "loss": 4.3907470703125, "step": 19 }, { "epoch": 0.03179650238473768, "grad_norm": 23.96857278346574, "learning_rate": 1.0052910052910054e-06, "loss": 3.679255962371826, "step": 20 }, { "epoch": 0.033386327503974564, "grad_norm": 35.688302893278596, "learning_rate": 1.0582010582010582e-06, "loss": 4.266496181488037, "step": 21 }, { "epoch": 0.034976152623211444, "grad_norm": 25.110803639662702, "learning_rate": 1.111111111111111e-06, "loss": 3.3960649967193604, "step": 22 }, { "epoch": 0.03656597774244833, "grad_norm": 18.38682934266701, "learning_rate": 1.164021164021164e-06, "loss": 3.22914981842041, "step": 23 }, { "epoch": 0.03815580286168521, "grad_norm": 24.20775912617202, "learning_rate": 1.216931216931217e-06, "loss": 4.515796661376953, "step": 24 }, { "epoch": 0.0397456279809221, "grad_norm": 33.73019056293916, "learning_rate": 1.26984126984127e-06, "loss": 3.897707939147949, "step": 25 }, { "epoch": 0.04133545310015898, "grad_norm": 20.544890832137906, "learning_rate": 1.3227513227513228e-06, "loss": 4.087409496307373, "step": 26 }, { "epoch": 0.04292527821939587, "grad_norm": 20.859207917695514, "learning_rate": 1.3756613756613758e-06, "loss": 3.8370442390441895, "step": 27 }, { "epoch": 0.04451510333863275, "grad_norm": 19.89855554686202, "learning_rate": 1.4285714285714286e-06, "loss": 3.5972700119018555, "step": 28 }, { "epoch": 0.046104928457869634, "grad_norm": 15.825946032493054, "learning_rate": 1.4814814814814815e-06, "loss": 3.3083245754241943, "step": 29 }, { "epoch": 0.04769475357710652, "grad_norm": 15.256333204941079, "learning_rate": 1.5343915343915345e-06, "loss": 3.2843480110168457, "step": 30 }, { "epoch": 0.0492845786963434, "grad_norm": 15.65253909378043, "learning_rate": 1.5873015873015873e-06, "loss": 3.409064292907715, "step": 31 }, { "epoch": 0.05087440381558029, "grad_norm": 19.432929271120607, "learning_rate": 1.6402116402116404e-06, "loss": 3.590700149536133, "step": 32 }, { "epoch": 0.05246422893481717, "grad_norm": 12.289857474808553, "learning_rate": 1.6931216931216932e-06, "loss": 3.363887310028076, "step": 33 }, { "epoch": 0.05405405405405406, "grad_norm": 13.2686181349725, "learning_rate": 1.746031746031746e-06, "loss": 3.2396044731140137, "step": 34 }, { "epoch": 0.05564387917329094, "grad_norm": 13.096435381422967, "learning_rate": 1.798941798941799e-06, "loss": 3.619406223297119, "step": 35 }, { "epoch": 0.057233704292527825, "grad_norm": 11.215843898706959, "learning_rate": 1.8518518518518519e-06, "loss": 3.35813045501709, "step": 36 }, { "epoch": 0.058823529411764705, "grad_norm": 10.649493440987735, "learning_rate": 1.904761904761905e-06, "loss": 3.1609840393066406, "step": 37 }, { "epoch": 0.06041335453100159, "grad_norm": 13.24735699781108, "learning_rate": 1.9576719576719577e-06, "loss": 3.2981178760528564, "step": 38 }, { "epoch": 0.06200317965023847, "grad_norm": 10.225948738076381, "learning_rate": 2.0105820105820108e-06, "loss": 3.604062080383301, "step": 39 }, { "epoch": 0.06359300476947535, "grad_norm": 10.090641137113177, "learning_rate": 2.0634920634920634e-06, "loss": 3.3760879039764404, "step": 40 }, { "epoch": 0.06518282988871224, "grad_norm": 19.205372173322857, "learning_rate": 2.1164021164021164e-06, "loss": 3.1986072063446045, "step": 41 }, { "epoch": 0.06677265500794913, "grad_norm": 12.193860558769504, "learning_rate": 2.1693121693121695e-06, "loss": 3.4260833263397217, "step": 42 }, { "epoch": 0.06836248012718601, "grad_norm": 10.613034738721613, "learning_rate": 2.222222222222222e-06, "loss": 2.6513419151306152, "step": 43 }, { "epoch": 0.06995230524642289, "grad_norm": 11.451444159725924, "learning_rate": 2.275132275132275e-06, "loss": 3.1006345748901367, "step": 44 }, { "epoch": 0.07154213036565978, "grad_norm": 13.125197373192298, "learning_rate": 2.328042328042328e-06, "loss": 3.506385326385498, "step": 45 }, { "epoch": 0.07313195548489666, "grad_norm": 14.894703172780192, "learning_rate": 2.380952380952381e-06, "loss": 3.1718366146087646, "step": 46 }, { "epoch": 0.07472178060413355, "grad_norm": 20.50688643480365, "learning_rate": 2.433862433862434e-06, "loss": 3.573230743408203, "step": 47 }, { "epoch": 0.07631160572337042, "grad_norm": 12.171684621094393, "learning_rate": 2.486772486772487e-06, "loss": 3.4022092819213867, "step": 48 }, { "epoch": 0.07790143084260731, "grad_norm": 10.546372018299202, "learning_rate": 2.53968253968254e-06, "loss": 3.517230272293091, "step": 49 }, { "epoch": 0.0794912559618442, "grad_norm": 11.236950853709759, "learning_rate": 2.5925925925925925e-06, "loss": 3.098986864089966, "step": 50 }, { "epoch": 0.08108108108108109, "grad_norm": 10.377712550018598, "learning_rate": 2.6455026455026455e-06, "loss": 3.3276524543762207, "step": 51 }, { "epoch": 0.08267090620031796, "grad_norm": 11.570551381613551, "learning_rate": 2.6984126984126986e-06, "loss": 3.817161798477173, "step": 52 }, { "epoch": 0.08426073131955485, "grad_norm": 10.609436953514747, "learning_rate": 2.7513227513227516e-06, "loss": 3.2749571800231934, "step": 53 }, { "epoch": 0.08585055643879173, "grad_norm": 10.676393530890982, "learning_rate": 2.8042328042328042e-06, "loss": 2.591442584991455, "step": 54 }, { "epoch": 0.08744038155802862, "grad_norm": 8.786141043027358, "learning_rate": 2.8571428571428573e-06, "loss": 2.674818277359009, "step": 55 }, { "epoch": 0.0890302066772655, "grad_norm": 13.90775039571811, "learning_rate": 2.9100529100529103e-06, "loss": 3.775853157043457, "step": 56 }, { "epoch": 0.09062003179650238, "grad_norm": 15.306506102949852, "learning_rate": 2.962962962962963e-06, "loss": 3.712808609008789, "step": 57 }, { "epoch": 0.09220985691573927, "grad_norm": 11.45788428532906, "learning_rate": 3.015873015873016e-06, "loss": 2.6537160873413086, "step": 58 }, { "epoch": 0.09379968203497616, "grad_norm": 11.474945819053824, "learning_rate": 3.068783068783069e-06, "loss": 3.005936622619629, "step": 59 }, { "epoch": 0.09538950715421304, "grad_norm": 10.112738468503954, "learning_rate": 3.1216931216931216e-06, "loss": 3.352091073989868, "step": 60 }, { "epoch": 0.09697933227344992, "grad_norm": 23.636986765198987, "learning_rate": 3.1746031746031746e-06, "loss": 3.0014185905456543, "step": 61 }, { "epoch": 0.0985691573926868, "grad_norm": 10.538684251581273, "learning_rate": 3.2275132275132277e-06, "loss": 3.156514883041382, "step": 62 }, { "epoch": 0.10015898251192369, "grad_norm": 24.68530877347611, "learning_rate": 3.2804232804232807e-06, "loss": 3.4716105461120605, "step": 63 }, { "epoch": 0.10174880763116058, "grad_norm": 8.880780828279468, "learning_rate": 3.3333333333333333e-06, "loss": 2.882477045059204, "step": 64 }, { "epoch": 0.10333863275039745, "grad_norm": 18.338165806941713, "learning_rate": 3.3862433862433864e-06, "loss": 3.8387961387634277, "step": 65 }, { "epoch": 0.10492845786963434, "grad_norm": 7.3597979784866885, "learning_rate": 3.4391534391534394e-06, "loss": 2.620795726776123, "step": 66 }, { "epoch": 0.10651828298887123, "grad_norm": 8.922781731032991, "learning_rate": 3.492063492063492e-06, "loss": 2.8749918937683105, "step": 67 }, { "epoch": 0.10810810810810811, "grad_norm": 21.37372028445257, "learning_rate": 3.544973544973545e-06, "loss": 3.401191473007202, "step": 68 }, { "epoch": 0.10969793322734499, "grad_norm": 8.887838451374009, "learning_rate": 3.597883597883598e-06, "loss": 2.8735604286193848, "step": 69 }, { "epoch": 0.11128775834658187, "grad_norm": 6.866225935716483, "learning_rate": 3.6507936507936507e-06, "loss": 2.937927484512329, "step": 70 }, { "epoch": 0.11287758346581876, "grad_norm": 6.5543979903091065, "learning_rate": 3.7037037037037037e-06, "loss": 2.569362163543701, "step": 71 }, { "epoch": 0.11446740858505565, "grad_norm": 10.30064064068897, "learning_rate": 3.7566137566137568e-06, "loss": 3.0775258541107178, "step": 72 }, { "epoch": 0.11605723370429252, "grad_norm": 11.18234696698082, "learning_rate": 3.80952380952381e-06, "loss": 3.062443256378174, "step": 73 }, { "epoch": 0.11764705882352941, "grad_norm": 11.066100259352726, "learning_rate": 3.862433862433863e-06, "loss": 2.7398781776428223, "step": 74 }, { "epoch": 0.1192368839427663, "grad_norm": 12.12909564338818, "learning_rate": 3.9153439153439155e-06, "loss": 3.0419538021087646, "step": 75 }, { "epoch": 0.12082670906200318, "grad_norm": 10.757566880795, "learning_rate": 3.968253968253968e-06, "loss": 2.9976096153259277, "step": 76 }, { "epoch": 0.12241653418124006, "grad_norm": 21.28785396901485, "learning_rate": 4.0211640211640215e-06, "loss": 3.4554760456085205, "step": 77 }, { "epoch": 0.12400635930047695, "grad_norm": 8.800500423676487, "learning_rate": 4.074074074074074e-06, "loss": 3.0005970001220703, "step": 78 }, { "epoch": 0.12559618441971382, "grad_norm": 12.667911903178506, "learning_rate": 4.126984126984127e-06, "loss": 2.7835562229156494, "step": 79 }, { "epoch": 0.1271860095389507, "grad_norm": 5.552759331146878, "learning_rate": 4.17989417989418e-06, "loss": 1.1964036226272583, "step": 80 }, { "epoch": 0.1287758346581876, "grad_norm": 9.482995769072225, "learning_rate": 4.232804232804233e-06, "loss": 3.1558589935302734, "step": 81 }, { "epoch": 0.13036565977742448, "grad_norm": 15.867917019873019, "learning_rate": 4.2857142857142855e-06, "loss": 3.090247631072998, "step": 82 }, { "epoch": 0.13195548489666137, "grad_norm": 15.813469147158665, "learning_rate": 4.338624338624339e-06, "loss": 3.1648030281066895, "step": 83 }, { "epoch": 0.13354531001589826, "grad_norm": 9.823156078564097, "learning_rate": 4.3915343915343915e-06, "loss": 2.5610551834106445, "step": 84 }, { "epoch": 0.13513513513513514, "grad_norm": 13.090071748263863, "learning_rate": 4.444444444444444e-06, "loss": 3.0754504203796387, "step": 85 }, { "epoch": 0.13672496025437203, "grad_norm": 10.149629926167787, "learning_rate": 4.497354497354498e-06, "loss": 2.997760772705078, "step": 86 }, { "epoch": 0.1383147853736089, "grad_norm": 18.58538419182332, "learning_rate": 4.55026455026455e-06, "loss": 3.5637764930725098, "step": 87 }, { "epoch": 0.13990461049284578, "grad_norm": 16.999293585853817, "learning_rate": 4.603174603174604e-06, "loss": 2.937600612640381, "step": 88 }, { "epoch": 0.14149443561208266, "grad_norm": 9.804133729065864, "learning_rate": 4.656084656084656e-06, "loss": 3.095571517944336, "step": 89 }, { "epoch": 0.14308426073131955, "grad_norm": 11.531802089182209, "learning_rate": 4.708994708994709e-06, "loss": 3.0191006660461426, "step": 90 }, { "epoch": 0.14467408585055644, "grad_norm": 15.351908734558066, "learning_rate": 4.761904761904762e-06, "loss": 2.917482852935791, "step": 91 }, { "epoch": 0.14626391096979333, "grad_norm": 11.937099193403586, "learning_rate": 4.814814814814815e-06, "loss": 4.431112289428711, "step": 92 }, { "epoch": 0.1478537360890302, "grad_norm": 21.62832537445158, "learning_rate": 4.867724867724868e-06, "loss": 2.640915870666504, "step": 93 }, { "epoch": 0.1494435612082671, "grad_norm": 7.260841410847577, "learning_rate": 4.920634920634921e-06, "loss": 3.0813612937927246, "step": 94 }, { "epoch": 0.151033386327504, "grad_norm": 10.301768618589072, "learning_rate": 4.973544973544974e-06, "loss": 2.7878708839416504, "step": 95 }, { "epoch": 0.15262321144674085, "grad_norm": 10.6973156598335, "learning_rate": 5.026455026455027e-06, "loss": 3.8078625202178955, "step": 96 }, { "epoch": 0.15421303656597773, "grad_norm": 12.524211447252867, "learning_rate": 5.07936507936508e-06, "loss": 3.1541152000427246, "step": 97 }, { "epoch": 0.15580286168521462, "grad_norm": 8.435308952157827, "learning_rate": 5.132275132275133e-06, "loss": 2.810488700866699, "step": 98 }, { "epoch": 0.1573926868044515, "grad_norm": 9.946665987848993, "learning_rate": 5.185185185185185e-06, "loss": 3.088862895965576, "step": 99 }, { "epoch": 0.1589825119236884, "grad_norm": 16.409093732708502, "learning_rate": 5.2380952380952384e-06, "loss": 2.9376237392425537, "step": 100 }, { "epoch": 0.16057233704292528, "grad_norm": 11.980502505351422, "learning_rate": 5.291005291005291e-06, "loss": 3.024081230163574, "step": 101 }, { "epoch": 0.16216216216216217, "grad_norm": 6.313212264071273, "learning_rate": 5.3439153439153445e-06, "loss": 2.360293388366699, "step": 102 }, { "epoch": 0.16375198728139906, "grad_norm": 14.096128703783076, "learning_rate": 5.396825396825397e-06, "loss": 2.982285976409912, "step": 103 }, { "epoch": 0.16534181240063592, "grad_norm": 10.009478552439436, "learning_rate": 5.449735449735451e-06, "loss": 3.326803684234619, "step": 104 }, { "epoch": 0.1669316375198728, "grad_norm": 7.599874356869111, "learning_rate": 5.502645502645503e-06, "loss": 2.5373692512512207, "step": 105 }, { "epoch": 0.1685214626391097, "grad_norm": 16.7898561209663, "learning_rate": 5.555555555555557e-06, "loss": 3.4259955883026123, "step": 106 }, { "epoch": 0.17011128775834658, "grad_norm": 13.958791391532715, "learning_rate": 5.6084656084656084e-06, "loss": 3.3131277561187744, "step": 107 }, { "epoch": 0.17170111287758347, "grad_norm": 12.009776509727333, "learning_rate": 5.661375661375662e-06, "loss": 2.851423978805542, "step": 108 }, { "epoch": 0.17329093799682035, "grad_norm": 16.62552204724523, "learning_rate": 5.7142857142857145e-06, "loss": 3.5492098331451416, "step": 109 }, { "epoch": 0.17488076311605724, "grad_norm": 7.10122679815253, "learning_rate": 5.767195767195768e-06, "loss": 2.4778614044189453, "step": 110 }, { "epoch": 0.17647058823529413, "grad_norm": 12.461370297402736, "learning_rate": 5.820105820105821e-06, "loss": 2.2983148097991943, "step": 111 }, { "epoch": 0.178060413354531, "grad_norm": 8.373213032362216, "learning_rate": 5.873015873015874e-06, "loss": 3.072960138320923, "step": 112 }, { "epoch": 0.17965023847376788, "grad_norm": 8.151798706622255, "learning_rate": 5.925925925925926e-06, "loss": 2.70046329498291, "step": 113 }, { "epoch": 0.18124006359300476, "grad_norm": 29.73366314903388, "learning_rate": 5.978835978835979e-06, "loss": 4.8392863273620605, "step": 114 }, { "epoch": 0.18282988871224165, "grad_norm": 7.301103721049628, "learning_rate": 6.031746031746032e-06, "loss": 2.8294947147369385, "step": 115 }, { "epoch": 0.18441971383147854, "grad_norm": 16.06044084552397, "learning_rate": 6.084656084656085e-06, "loss": 3.274482250213623, "step": 116 }, { "epoch": 0.18600953895071543, "grad_norm": 7.585391293597456, "learning_rate": 6.137566137566138e-06, "loss": 2.9996328353881836, "step": 117 }, { "epoch": 0.1875993640699523, "grad_norm": 20.191267861289145, "learning_rate": 6.1904761904761914e-06, "loss": 3.5223331451416016, "step": 118 }, { "epoch": 0.1891891891891892, "grad_norm": 16.500509647866277, "learning_rate": 6.243386243386243e-06, "loss": 3.3577396869659424, "step": 119 }, { "epoch": 0.1907790143084261, "grad_norm": 8.060506214524787, "learning_rate": 6.296296296296297e-06, "loss": 2.650815486907959, "step": 120 }, { "epoch": 0.19236883942766295, "grad_norm": 12.673879706396006, "learning_rate": 6.349206349206349e-06, "loss": 2.8354992866516113, "step": 121 }, { "epoch": 0.19395866454689983, "grad_norm": 11.414637618226603, "learning_rate": 6.402116402116403e-06, "loss": 2.889648675918579, "step": 122 }, { "epoch": 0.19554848966613672, "grad_norm": 12.922762478535862, "learning_rate": 6.455026455026455e-06, "loss": 3.0907392501831055, "step": 123 }, { "epoch": 0.1971383147853736, "grad_norm": 61.136138891067894, "learning_rate": 6.507936507936509e-06, "loss": 3.7759904861450195, "step": 124 }, { "epoch": 0.1987281399046105, "grad_norm": 10.356849079188658, "learning_rate": 6.560846560846561e-06, "loss": 3.2002317905426025, "step": 125 }, { "epoch": 0.20031796502384738, "grad_norm": 16.78438797563512, "learning_rate": 6.613756613756615e-06, "loss": 2.974229574203491, "step": 126 }, { "epoch": 0.20190779014308427, "grad_norm": 12.81961909287759, "learning_rate": 6.666666666666667e-06, "loss": 2.952580690383911, "step": 127 }, { "epoch": 0.20349761526232116, "grad_norm": 12.190840843520247, "learning_rate": 6.71957671957672e-06, "loss": 3.3993425369262695, "step": 128 }, { "epoch": 0.20508744038155802, "grad_norm": 7.37093293280535, "learning_rate": 6.772486772486773e-06, "loss": 3.5084962844848633, "step": 129 }, { "epoch": 0.2066772655007949, "grad_norm": 8.766940367947488, "learning_rate": 6.825396825396826e-06, "loss": 2.979722499847412, "step": 130 }, { "epoch": 0.2082670906200318, "grad_norm": 18.195649510942715, "learning_rate": 6.878306878306879e-06, "loss": 2.716529130935669, "step": 131 }, { "epoch": 0.20985691573926868, "grad_norm": 10.849621644780795, "learning_rate": 6.931216931216932e-06, "loss": 3.278958797454834, "step": 132 }, { "epoch": 0.21144674085850557, "grad_norm": 11.035071999700392, "learning_rate": 6.984126984126984e-06, "loss": 3.1757240295410156, "step": 133 }, { "epoch": 0.21303656597774245, "grad_norm": 9.638319374477183, "learning_rate": 7.0370370370370375e-06, "loss": 2.755430221557617, "step": 134 }, { "epoch": 0.21462639109697934, "grad_norm": 13.908631086183993, "learning_rate": 7.08994708994709e-06, "loss": 3.236640453338623, "step": 135 }, { "epoch": 0.21621621621621623, "grad_norm": 21.68147330866866, "learning_rate": 7.1428571428571436e-06, "loss": 3.0127792358398438, "step": 136 }, { "epoch": 0.2178060413354531, "grad_norm": 11.192606660727337, "learning_rate": 7.195767195767196e-06, "loss": 2.746997117996216, "step": 137 }, { "epoch": 0.21939586645468998, "grad_norm": 6.435274969530198, "learning_rate": 7.24867724867725e-06, "loss": 2.208346366882324, "step": 138 }, { "epoch": 0.22098569157392686, "grad_norm": 9.977014012465766, "learning_rate": 7.301587301587301e-06, "loss": 2.976306676864624, "step": 139 }, { "epoch": 0.22257551669316375, "grad_norm": 6.835119442466233, "learning_rate": 7.354497354497355e-06, "loss": 2.2386083602905273, "step": 140 }, { "epoch": 0.22416534181240064, "grad_norm": 27.664987866635204, "learning_rate": 7.4074074074074075e-06, "loss": 3.936030626296997, "step": 141 }, { "epoch": 0.22575516693163752, "grad_norm": 13.281763744066357, "learning_rate": 7.460317460317461e-06, "loss": 3.425809383392334, "step": 142 }, { "epoch": 0.2273449920508744, "grad_norm": 8.883887810388247, "learning_rate": 7.5132275132275136e-06, "loss": 2.791560411453247, "step": 143 }, { "epoch": 0.2289348171701113, "grad_norm": 13.575129037863244, "learning_rate": 7.566137566137567e-06, "loss": 3.240875720977783, "step": 144 }, { "epoch": 0.23052464228934816, "grad_norm": 12.951685526951211, "learning_rate": 7.61904761904762e-06, "loss": 3.083731174468994, "step": 145 }, { "epoch": 0.23211446740858505, "grad_norm": 12.57259309996259, "learning_rate": 7.671957671957672e-06, "loss": 2.3552327156066895, "step": 146 }, { "epoch": 0.23370429252782193, "grad_norm": 14.929160501589186, "learning_rate": 7.724867724867726e-06, "loss": 2.911569595336914, "step": 147 }, { "epoch": 0.23529411764705882, "grad_norm": 9.10971441639092, "learning_rate": 7.77777777777778e-06, "loss": 2.477398633956909, "step": 148 }, { "epoch": 0.2368839427662957, "grad_norm": 13.685768768755157, "learning_rate": 7.830687830687831e-06, "loss": 2.6684117317199707, "step": 149 }, { "epoch": 0.2384737678855326, "grad_norm": 32.20168223485909, "learning_rate": 7.883597883597884e-06, "loss": 3.338864803314209, "step": 150 }, { "epoch": 0.24006359300476948, "grad_norm": 7.915241057603823, "learning_rate": 7.936507936507936e-06, "loss": 2.8785102367401123, "step": 151 }, { "epoch": 0.24165341812400637, "grad_norm": 9.990195796041824, "learning_rate": 7.98941798941799e-06, "loss": 2.784539222717285, "step": 152 }, { "epoch": 0.24324324324324326, "grad_norm": 13.621954030852349, "learning_rate": 8.042328042328043e-06, "loss": 2.820026159286499, "step": 153 }, { "epoch": 0.24483306836248012, "grad_norm": 7.4478722820007865, "learning_rate": 8.095238095238097e-06, "loss": 3.184044361114502, "step": 154 }, { "epoch": 0.246422893481717, "grad_norm": 16.120590939401566, "learning_rate": 8.148148148148148e-06, "loss": 2.2940902709960938, "step": 155 }, { "epoch": 0.2480127186009539, "grad_norm": 11.591244973819766, "learning_rate": 8.201058201058202e-06, "loss": 2.3563649654388428, "step": 156 }, { "epoch": 0.24960254372019078, "grad_norm": 9.161949195292504, "learning_rate": 8.253968253968254e-06, "loss": 3.106217861175537, "step": 157 }, { "epoch": 0.25119236883942764, "grad_norm": 15.37253533460567, "learning_rate": 8.306878306878307e-06, "loss": 3.1488471031188965, "step": 158 }, { "epoch": 0.2527821939586645, "grad_norm": 11.948620374727305, "learning_rate": 8.35978835978836e-06, "loss": 2.8425357341766357, "step": 159 }, { "epoch": 0.2543720190779014, "grad_norm": 11.489434173785702, "learning_rate": 8.412698412698414e-06, "loss": 3.0375239849090576, "step": 160 }, { "epoch": 0.2559618441971383, "grad_norm": 12.43920449942672, "learning_rate": 8.465608465608466e-06, "loss": 3.767285108566284, "step": 161 }, { "epoch": 0.2575516693163752, "grad_norm": 14.656284556147316, "learning_rate": 8.518518518518519e-06, "loss": 3.189174175262451, "step": 162 }, { "epoch": 0.2591414944356121, "grad_norm": 10.107696999004142, "learning_rate": 8.571428571428571e-06, "loss": 2.8151469230651855, "step": 163 }, { "epoch": 0.26073131955484896, "grad_norm": 7.8333910009033065, "learning_rate": 8.624338624338624e-06, "loss": 2.86727237701416, "step": 164 }, { "epoch": 0.26232114467408585, "grad_norm": 18.344054105660557, "learning_rate": 8.677248677248678e-06, "loss": 3.4317922592163086, "step": 165 }, { "epoch": 0.26391096979332274, "grad_norm": 14.109564273449301, "learning_rate": 8.730158730158731e-06, "loss": 3.358736038208008, "step": 166 }, { "epoch": 0.2655007949125596, "grad_norm": 10.64636873319405, "learning_rate": 8.783068783068783e-06, "loss": 2.5848193168640137, "step": 167 }, { "epoch": 0.2670906200317965, "grad_norm": 7.140157930387994, "learning_rate": 8.835978835978837e-06, "loss": 3.1791625022888184, "step": 168 }, { "epoch": 0.2686804451510334, "grad_norm": 17.161706285667417, "learning_rate": 8.888888888888888e-06, "loss": 2.3590657711029053, "step": 169 }, { "epoch": 0.2702702702702703, "grad_norm": 29.446892543609344, "learning_rate": 8.941798941798942e-06, "loss": 3.5621213912963867, "step": 170 }, { "epoch": 0.2718600953895072, "grad_norm": 16.72759384781146, "learning_rate": 8.994708994708995e-06, "loss": 3.151611089706421, "step": 171 }, { "epoch": 0.27344992050874406, "grad_norm": 12.032239707153675, "learning_rate": 9.047619047619049e-06, "loss": 3.3835530281066895, "step": 172 }, { "epoch": 0.27503974562798095, "grad_norm": 6.721126601850909, "learning_rate": 9.1005291005291e-06, "loss": 2.485015869140625, "step": 173 }, { "epoch": 0.2766295707472178, "grad_norm": 13.098556820950769, "learning_rate": 9.153439153439154e-06, "loss": 3.190592050552368, "step": 174 }, { "epoch": 0.27821939586645467, "grad_norm": 7.6498571126734145, "learning_rate": 9.206349206349207e-06, "loss": 1.9906291961669922, "step": 175 }, { "epoch": 0.27980922098569155, "grad_norm": 10.086096084629292, "learning_rate": 9.25925925925926e-06, "loss": 2.9297289848327637, "step": 176 }, { "epoch": 0.28139904610492844, "grad_norm": 10.521555282547693, "learning_rate": 9.312169312169313e-06, "loss": 2.9031777381896973, "step": 177 }, { "epoch": 0.28298887122416533, "grad_norm": 11.275372751518375, "learning_rate": 9.365079365079366e-06, "loss": 3.1964385509490967, "step": 178 }, { "epoch": 0.2845786963434022, "grad_norm": 31.857894626364228, "learning_rate": 9.417989417989418e-06, "loss": 3.562572717666626, "step": 179 }, { "epoch": 0.2861685214626391, "grad_norm": 11.67182979337774, "learning_rate": 9.470899470899471e-06, "loss": 2.9622483253479004, "step": 180 }, { "epoch": 0.287758346581876, "grad_norm": 8.433382292034693, "learning_rate": 9.523809523809525e-06, "loss": 2.673311710357666, "step": 181 }, { "epoch": 0.2893481717011129, "grad_norm": 9.41809032552769, "learning_rate": 9.576719576719578e-06, "loss": 3.147991895675659, "step": 182 }, { "epoch": 0.29093799682034976, "grad_norm": 14.469006681412353, "learning_rate": 9.62962962962963e-06, "loss": 3.427096128463745, "step": 183 }, { "epoch": 0.29252782193958665, "grad_norm": 11.315982466451157, "learning_rate": 9.682539682539683e-06, "loss": 2.644692897796631, "step": 184 }, { "epoch": 0.29411764705882354, "grad_norm": 7.855532445920403, "learning_rate": 9.735449735449735e-06, "loss": 2.748213291168213, "step": 185 }, { "epoch": 0.2957074721780604, "grad_norm": 8.270032489370442, "learning_rate": 9.788359788359789e-06, "loss": 2.9862685203552246, "step": 186 }, { "epoch": 0.2972972972972973, "grad_norm": 14.12339483663101, "learning_rate": 9.841269841269842e-06, "loss": 3.0839715003967285, "step": 187 }, { "epoch": 0.2988871224165342, "grad_norm": 24.634858333440526, "learning_rate": 9.894179894179896e-06, "loss": 3.622962474822998, "step": 188 }, { "epoch": 0.3004769475357711, "grad_norm": 10.995163007585823, "learning_rate": 9.947089947089947e-06, "loss": 3.127063751220703, "step": 189 }, { "epoch": 0.302066772655008, "grad_norm": 12.483636136677996, "learning_rate": 1e-05, "loss": 2.9223406314849854, "step": 190 }, { "epoch": 0.3036565977742448, "grad_norm": 20.178311042194125, "learning_rate": 9.999991442158113e-06, "loss": 3.5398926734924316, "step": 191 }, { "epoch": 0.3052464228934817, "grad_norm": 10.479156797595314, "learning_rate": 9.99996576866174e-06, "loss": 2.693631649017334, "step": 192 }, { "epoch": 0.3068362480127186, "grad_norm": 70.96351629759928, "learning_rate": 9.999922979598773e-06, "loss": 3.0234291553497314, "step": 193 }, { "epoch": 0.30842607313195547, "grad_norm": 14.7334787992814, "learning_rate": 9.999863075115677e-06, "loss": 3.117661952972412, "step": 194 }, { "epoch": 0.31001589825119236, "grad_norm": 10.128979481357636, "learning_rate": 9.999786055417519e-06, "loss": 2.6096296310424805, "step": 195 }, { "epoch": 0.31160572337042924, "grad_norm": 9.41421099953952, "learning_rate": 9.999691920767945e-06, "loss": 2.9272522926330566, "step": 196 }, { "epoch": 0.31319554848966613, "grad_norm": 15.414651367634708, "learning_rate": 9.999580671489191e-06, "loss": 2.8805973529815674, "step": 197 }, { "epoch": 0.314785373608903, "grad_norm": 15.828806976113896, "learning_rate": 9.999452307962079e-06, "loss": 2.961799144744873, "step": 198 }, { "epoch": 0.3163751987281399, "grad_norm": 14.843803608404203, "learning_rate": 9.999306830626015e-06, "loss": 2.878042221069336, "step": 199 }, { "epoch": 0.3179650238473768, "grad_norm": 14.036434182045523, "learning_rate": 9.999144239978987e-06, "loss": 3.0351200103759766, "step": 200 }, { "epoch": 0.3195548489666137, "grad_norm": 10.730468002314861, "learning_rate": 9.998964536577566e-06, "loss": 2.9610581398010254, "step": 201 }, { "epoch": 0.32114467408585057, "grad_norm": 9.587107087736614, "learning_rate": 9.998767721036901e-06, "loss": 3.2652862071990967, "step": 202 }, { "epoch": 0.32273449920508746, "grad_norm": 14.206705864824103, "learning_rate": 9.998553794030719e-06, "loss": 3.029031276702881, "step": 203 }, { "epoch": 0.32432432432432434, "grad_norm": 7.982790434048001, "learning_rate": 9.998322756291321e-06, "loss": 3.0771546363830566, "step": 204 }, { "epoch": 0.32591414944356123, "grad_norm": 15.760513036342372, "learning_rate": 9.998074608609579e-06, "loss": 2.7780063152313232, "step": 205 }, { "epoch": 0.3275039745627981, "grad_norm": 10.059217166278922, "learning_rate": 9.997809351834939e-06, "loss": 2.802194833755493, "step": 206 }, { "epoch": 0.32909379968203495, "grad_norm": 18.29933240314462, "learning_rate": 9.997526986875412e-06, "loss": 3.0691840648651123, "step": 207 }, { "epoch": 0.33068362480127184, "grad_norm": 8.443032752679324, "learning_rate": 9.997227514697568e-06, "loss": 2.6535720825195312, "step": 208 }, { "epoch": 0.3322734499205087, "grad_norm": 9.084135114248914, "learning_rate": 9.996910936326545e-06, "loss": 2.830028772354126, "step": 209 }, { "epoch": 0.3338632750397456, "grad_norm": 11.92996879784718, "learning_rate": 9.996577252846032e-06, "loss": 2.7349863052368164, "step": 210 }, { "epoch": 0.3354531001589825, "grad_norm": 8.51522244619189, "learning_rate": 9.996226465398272e-06, "loss": 2.55774188041687, "step": 211 }, { "epoch": 0.3370429252782194, "grad_norm": 19.349878654802804, "learning_rate": 9.995858575184062e-06, "loss": 2.8570663928985596, "step": 212 }, { "epoch": 0.3386327503974563, "grad_norm": 7.4415089683943085, "learning_rate": 9.995473583462737e-06, "loss": 2.795311450958252, "step": 213 }, { "epoch": 0.34022257551669316, "grad_norm": 19.748222755481127, "learning_rate": 9.99507149155218e-06, "loss": 3.103321075439453, "step": 214 }, { "epoch": 0.34181240063593005, "grad_norm": 13.405923907887225, "learning_rate": 9.994652300828803e-06, "loss": 2.971069574356079, "step": 215 }, { "epoch": 0.34340222575516693, "grad_norm": 15.173562859355869, "learning_rate": 9.994216012727556e-06, "loss": 2.4139862060546875, "step": 216 }, { "epoch": 0.3449920508744038, "grad_norm": 10.818103405590593, "learning_rate": 9.99376262874191e-06, "loss": 2.6961464881896973, "step": 217 }, { "epoch": 0.3465818759936407, "grad_norm": 7.93512381654976, "learning_rate": 9.993292150423862e-06, "loss": 2.362365484237671, "step": 218 }, { "epoch": 0.3481717011128776, "grad_norm": 14.70978474527209, "learning_rate": 9.992804579383924e-06, "loss": 3.3039183616638184, "step": 219 }, { "epoch": 0.3497615262321145, "grad_norm": 18.572829651801086, "learning_rate": 9.992299917291118e-06, "loss": 2.4119772911071777, "step": 220 }, { "epoch": 0.35135135135135137, "grad_norm": 10.133845798143364, "learning_rate": 9.991778165872973e-06, "loss": 3.023960590362549, "step": 221 }, { "epoch": 0.35294117647058826, "grad_norm": 21.576817140456043, "learning_rate": 9.991239326915509e-06, "loss": 2.799973964691162, "step": 222 }, { "epoch": 0.35453100158982515, "grad_norm": 9.74809561137367, "learning_rate": 9.990683402263254e-06, "loss": 2.593146800994873, "step": 223 }, { "epoch": 0.356120826709062, "grad_norm": 11.298489884990596, "learning_rate": 9.990110393819207e-06, "loss": 2.991196632385254, "step": 224 }, { "epoch": 0.35771065182829886, "grad_norm": 7.74310426714735, "learning_rate": 9.989520303544861e-06, "loss": 2.5676424503326416, "step": 225 }, { "epoch": 0.35930047694753575, "grad_norm": 11.040637572487988, "learning_rate": 9.98891313346017e-06, "loss": 2.484053611755371, "step": 226 }, { "epoch": 0.36089030206677264, "grad_norm": 13.73072858076415, "learning_rate": 9.988288885643565e-06, "loss": 2.5456459522247314, "step": 227 }, { "epoch": 0.3624801271860095, "grad_norm": 11.673610223259558, "learning_rate": 9.987647562231926e-06, "loss": 3.305084228515625, "step": 228 }, { "epoch": 0.3640699523052464, "grad_norm": 18.366434893044207, "learning_rate": 9.986989165420596e-06, "loss": 3.1925582885742188, "step": 229 }, { "epoch": 0.3656597774244833, "grad_norm": 12.177300199600618, "learning_rate": 9.986313697463353e-06, "loss": 2.929258346557617, "step": 230 }, { "epoch": 0.3672496025437202, "grad_norm": 6.5814118651427345, "learning_rate": 9.98562116067242e-06, "loss": 2.5541605949401855, "step": 231 }, { "epoch": 0.3688394276629571, "grad_norm": 10.820882864838984, "learning_rate": 9.984911557418444e-06, "loss": 2.5791280269622803, "step": 232 }, { "epoch": 0.37042925278219396, "grad_norm": 15.37113923702593, "learning_rate": 9.984184890130491e-06, "loss": 3.1903953552246094, "step": 233 }, { "epoch": 0.37201907790143085, "grad_norm": 9.045995242642721, "learning_rate": 9.983441161296048e-06, "loss": 2.4408342838287354, "step": 234 }, { "epoch": 0.37360890302066774, "grad_norm": 15.152661135606118, "learning_rate": 9.982680373460996e-06, "loss": 2.5460476875305176, "step": 235 }, { "epoch": 0.3751987281399046, "grad_norm": 8.16353063007233, "learning_rate": 9.981902529229617e-06, "loss": 2.487499713897705, "step": 236 }, { "epoch": 0.3767885532591415, "grad_norm": 11.463426369962924, "learning_rate": 9.981107631264578e-06, "loss": 2.4622912406921387, "step": 237 }, { "epoch": 0.3783783783783784, "grad_norm": 13.891724957278424, "learning_rate": 9.980295682286924e-06, "loss": 3.4187865257263184, "step": 238 }, { "epoch": 0.3799682034976153, "grad_norm": 8.8302666510195, "learning_rate": 9.979466685076069e-06, "loss": 2.393531322479248, "step": 239 }, { "epoch": 0.3815580286168522, "grad_norm": 11.8201828045838, "learning_rate": 9.97862064246978e-06, "loss": 2.8234825134277344, "step": 240 }, { "epoch": 0.383147853736089, "grad_norm": 14.642242808889572, "learning_rate": 9.97775755736418e-06, "loss": 2.8137476444244385, "step": 241 }, { "epoch": 0.3847376788553259, "grad_norm": 10.460666558488356, "learning_rate": 9.976877432713725e-06, "loss": 2.6718130111694336, "step": 242 }, { "epoch": 0.3863275039745628, "grad_norm": 9.41352654863366, "learning_rate": 9.975980271531205e-06, "loss": 2.797076940536499, "step": 243 }, { "epoch": 0.38791732909379967, "grad_norm": 22.912991700027433, "learning_rate": 9.97506607688772e-06, "loss": 4.023824691772461, "step": 244 }, { "epoch": 0.38950715421303655, "grad_norm": 11.547224083305466, "learning_rate": 9.974134851912688e-06, "loss": 3.1276447772979736, "step": 245 }, { "epoch": 0.39109697933227344, "grad_norm": 23.66944749659379, "learning_rate": 9.97318659979382e-06, "loss": 2.84647798538208, "step": 246 }, { "epoch": 0.39268680445151033, "grad_norm": 8.463015832445738, "learning_rate": 9.97222132377711e-06, "loss": 2.8939974308013916, "step": 247 }, { "epoch": 0.3942766295707472, "grad_norm": 8.792225539309547, "learning_rate": 9.971239027166832e-06, "loss": 2.658247947692871, "step": 248 }, { "epoch": 0.3958664546899841, "grad_norm": 46.91904473108387, "learning_rate": 9.970239713325518e-06, "loss": 3.596339464187622, "step": 249 }, { "epoch": 0.397456279809221, "grad_norm": 8.496075126599893, "learning_rate": 9.969223385673958e-06, "loss": 2.798522472381592, "step": 250 }, { "epoch": 0.3990461049284579, "grad_norm": 10.626098168110953, "learning_rate": 9.968190047691184e-06, "loss": 3.0867185592651367, "step": 251 }, { "epoch": 0.40063593004769477, "grad_norm": 8.956578837726095, "learning_rate": 9.967139702914447e-06, "loss": 3.229172706604004, "step": 252 }, { "epoch": 0.40222575516693165, "grad_norm": 12.855914124198701, "learning_rate": 9.966072354939225e-06, "loss": 2.9705493450164795, "step": 253 }, { "epoch": 0.40381558028616854, "grad_norm": 23.688975978884205, "learning_rate": 9.964988007419195e-06, "loss": 2.8320472240448, "step": 254 }, { "epoch": 0.40540540540540543, "grad_norm": 11.823651203027762, "learning_rate": 9.963886664066224e-06, "loss": 2.742363452911377, "step": 255 }, { "epoch": 0.4069952305246423, "grad_norm": 32.55567327428842, "learning_rate": 9.962768328650367e-06, "loss": 2.6898093223571777, "step": 256 }, { "epoch": 0.40858505564387915, "grad_norm": 12.333778411337002, "learning_rate": 9.961633004999835e-06, "loss": 2.416064739227295, "step": 257 }, { "epoch": 0.41017488076311603, "grad_norm": 12.46481852710721, "learning_rate": 9.960480697000996e-06, "loss": 2.5227324962615967, "step": 258 }, { "epoch": 0.4117647058823529, "grad_norm": 14.821827754412858, "learning_rate": 9.95931140859836e-06, "loss": 2.843254566192627, "step": 259 }, { "epoch": 0.4133545310015898, "grad_norm": 13.241902003603611, "learning_rate": 9.95812514379456e-06, "loss": 2.8716843128204346, "step": 260 }, { "epoch": 0.4149443561208267, "grad_norm": 9.97892993021681, "learning_rate": 9.956921906650342e-06, "loss": 2.8791468143463135, "step": 261 }, { "epoch": 0.4165341812400636, "grad_norm": 10.340993757400383, "learning_rate": 9.95570170128455e-06, "loss": 2.94400691986084, "step": 262 }, { "epoch": 0.41812400635930047, "grad_norm": 14.429065449122799, "learning_rate": 9.954464531874118e-06, "loss": 2.703669786453247, "step": 263 }, { "epoch": 0.41971383147853736, "grad_norm": 9.52424184559037, "learning_rate": 9.953210402654043e-06, "loss": 1.8419052362442017, "step": 264 }, { "epoch": 0.42130365659777425, "grad_norm": 10.85229462688143, "learning_rate": 9.951939317917381e-06, "loss": 2.812004327774048, "step": 265 }, { "epoch": 0.42289348171701113, "grad_norm": 10.563617697488839, "learning_rate": 9.95065128201523e-06, "loss": 3.1184329986572266, "step": 266 }, { "epoch": 0.424483306836248, "grad_norm": 15.181152546843654, "learning_rate": 9.949346299356711e-06, "loss": 2.8881607055664062, "step": 267 }, { "epoch": 0.4260731319554849, "grad_norm": 7.730113415937909, "learning_rate": 9.94802437440896e-06, "loss": 2.938474655151367, "step": 268 }, { "epoch": 0.4276629570747218, "grad_norm": 16.011410990937947, "learning_rate": 9.946685511697108e-06, "loss": 2.8643383979797363, "step": 269 }, { "epoch": 0.4292527821939587, "grad_norm": 14.148092529488144, "learning_rate": 9.945329715804261e-06, "loss": 2.6052684783935547, "step": 270 }, { "epoch": 0.43084260731319557, "grad_norm": 13.558262505075879, "learning_rate": 9.9439569913715e-06, "loss": 3.440485954284668, "step": 271 }, { "epoch": 0.43243243243243246, "grad_norm": 9.467937948634649, "learning_rate": 9.942567343097843e-06, "loss": 3.508424997329712, "step": 272 }, { "epoch": 0.43402225755166934, "grad_norm": 10.39498217285857, "learning_rate": 9.941160775740247e-06, "loss": 2.8846826553344727, "step": 273 }, { "epoch": 0.4356120826709062, "grad_norm": 14.176390300187323, "learning_rate": 9.939737294113585e-06, "loss": 2.803740978240967, "step": 274 }, { "epoch": 0.43720190779014306, "grad_norm": 17.785521530494066, "learning_rate": 9.938296903090631e-06, "loss": 4.964699745178223, "step": 275 }, { "epoch": 0.43879173290937995, "grad_norm": 18.588550086313642, "learning_rate": 9.936839607602038e-06, "loss": 2.787569046020508, "step": 276 }, { "epoch": 0.44038155802861684, "grad_norm": 15.38132057951777, "learning_rate": 9.93536541263633e-06, "loss": 3.089069366455078, "step": 277 }, { "epoch": 0.4419713831478537, "grad_norm": 11.531976186480126, "learning_rate": 9.933874323239876e-06, "loss": 2.688262701034546, "step": 278 }, { "epoch": 0.4435612082670906, "grad_norm": 8.767813327048348, "learning_rate": 9.932366344516879e-06, "loss": 2.903958320617676, "step": 279 }, { "epoch": 0.4451510333863275, "grad_norm": 14.186522002107646, "learning_rate": 9.930841481629358e-06, "loss": 3.396346092224121, "step": 280 }, { "epoch": 0.4467408585055644, "grad_norm": 8.4581258239869, "learning_rate": 9.929299739797127e-06, "loss": 3.1296937465667725, "step": 281 }, { "epoch": 0.4483306836248013, "grad_norm": 10.627174140105737, "learning_rate": 9.927741124297776e-06, "loss": 3.211103916168213, "step": 282 }, { "epoch": 0.44992050874403816, "grad_norm": 12.699516974685517, "learning_rate": 9.926165640466664e-06, "loss": 2.7114107608795166, "step": 283 }, { "epoch": 0.45151033386327505, "grad_norm": 18.797540035139892, "learning_rate": 9.924573293696885e-06, "loss": 2.0409135818481445, "step": 284 }, { "epoch": 0.45310015898251194, "grad_norm": 8.667150407323442, "learning_rate": 9.922964089439257e-06, "loss": 2.540942430496216, "step": 285 }, { "epoch": 0.4546899841017488, "grad_norm": 12.965206512595481, "learning_rate": 9.92133803320231e-06, "loss": 2.699608325958252, "step": 286 }, { "epoch": 0.4562798092209857, "grad_norm": 21.082413121018245, "learning_rate": 9.919695130552257e-06, "loss": 3.058361053466797, "step": 287 }, { "epoch": 0.4578696343402226, "grad_norm": 6.991367140145618, "learning_rate": 9.918035387112976e-06, "loss": 2.7070534229278564, "step": 288 }, { "epoch": 0.4594594594594595, "grad_norm": 13.102007296803354, "learning_rate": 9.916358808565999e-06, "loss": 2.6915783882141113, "step": 289 }, { "epoch": 0.4610492845786963, "grad_norm": 10.195274915471822, "learning_rate": 9.91466540065048e-06, "loss": 2.9073867797851562, "step": 290 }, { "epoch": 0.4626391096979332, "grad_norm": 10.689475549111851, "learning_rate": 9.91295516916319e-06, "loss": 2.729437828063965, "step": 291 }, { "epoch": 0.4642289348171701, "grad_norm": 6.740276455448954, "learning_rate": 9.91122811995848e-06, "loss": 2.762054443359375, "step": 292 }, { "epoch": 0.465818759936407, "grad_norm": 8.284711631921079, "learning_rate": 9.90948425894828e-06, "loss": 3.0620100498199463, "step": 293 }, { "epoch": 0.46740858505564387, "grad_norm": 12.179751636155302, "learning_rate": 9.907723592102062e-06, "loss": 2.8368825912475586, "step": 294 }, { "epoch": 0.46899841017488075, "grad_norm": 17.5460433101431, "learning_rate": 9.905946125446832e-06, "loss": 2.9089303016662598, "step": 295 }, { "epoch": 0.47058823529411764, "grad_norm": 10.357264423725997, "learning_rate": 9.9041518650671e-06, "loss": 3.0648887157440186, "step": 296 }, { "epoch": 0.47217806041335453, "grad_norm": 20.25108319774201, "learning_rate": 9.902340817104864e-06, "loss": 3.5885443687438965, "step": 297 }, { "epoch": 0.4737678855325914, "grad_norm": 20.107004352553076, "learning_rate": 9.90051298775959e-06, "loss": 2.6394972801208496, "step": 298 }, { "epoch": 0.4753577106518283, "grad_norm": 23.997090021624807, "learning_rate": 9.898668383288185e-06, "loss": 3.3596107959747314, "step": 299 }, { "epoch": 0.4769475357710652, "grad_norm": 14.980828112071602, "learning_rate": 9.896807010004988e-06, "loss": 3.2323708534240723, "step": 300 }, { "epoch": 0.4785373608903021, "grad_norm": 10.84905018042917, "learning_rate": 9.89492887428173e-06, "loss": 2.7953944206237793, "step": 301 }, { "epoch": 0.48012718600953896, "grad_norm": 16.74258275045779, "learning_rate": 9.893033982547528e-06, "loss": 2.7926273345947266, "step": 302 }, { "epoch": 0.48171701112877585, "grad_norm": 10.11527276573299, "learning_rate": 9.891122341288854e-06, "loss": 2.3538496494293213, "step": 303 }, { "epoch": 0.48330683624801274, "grad_norm": 13.074222431480202, "learning_rate": 9.88919395704952e-06, "loss": 3.081326961517334, "step": 304 }, { "epoch": 0.4848966613672496, "grad_norm": 12.083919152113767, "learning_rate": 9.887248836430645e-06, "loss": 3.050244092941284, "step": 305 }, { "epoch": 0.4864864864864865, "grad_norm": 8.75019250675372, "learning_rate": 9.885286986090646e-06, "loss": 2.987945556640625, "step": 306 }, { "epoch": 0.48807631160572335, "grad_norm": 18.32254235621283, "learning_rate": 9.883308412745206e-06, "loss": 3.4225993156433105, "step": 307 }, { "epoch": 0.48966613672496023, "grad_norm": 8.832288826661868, "learning_rate": 9.88131312316725e-06, "loss": 2.4250919818878174, "step": 308 }, { "epoch": 0.4912559618441971, "grad_norm": 14.295320282866411, "learning_rate": 9.879301124186926e-06, "loss": 2.9305214881896973, "step": 309 }, { "epoch": 0.492845786963434, "grad_norm": 9.963541379890719, "learning_rate": 9.877272422691583e-06, "loss": 2.68511962890625, "step": 310 }, { "epoch": 0.4944356120826709, "grad_norm": 18.504257818010892, "learning_rate": 9.875227025625744e-06, "loss": 2.9232048988342285, "step": 311 }, { "epoch": 0.4960254372019078, "grad_norm": 6.865199788520621, "learning_rate": 9.873164939991085e-06, "loss": 2.5240325927734375, "step": 312 }, { "epoch": 0.49761526232114467, "grad_norm": 10.196649184912834, "learning_rate": 9.871086172846403e-06, "loss": 2.937847137451172, "step": 313 }, { "epoch": 0.49920508744038156, "grad_norm": 10.807236690204771, "learning_rate": 9.868990731307604e-06, "loss": 2.607318162918091, "step": 314 }, { "epoch": 0.5007949125596184, "grad_norm": 10.04817027309746, "learning_rate": 9.866878622547671e-06, "loss": 3.0139381885528564, "step": 315 }, { "epoch": 0.5023847376788553, "grad_norm": 14.04657834855741, "learning_rate": 9.864749853796642e-06, "loss": 2.713085412979126, "step": 316 }, { "epoch": 0.5039745627980922, "grad_norm": 10.580139193512768, "learning_rate": 9.862604432341583e-06, "loss": 3.2179996967315674, "step": 317 }, { "epoch": 0.505564387917329, "grad_norm": 14.060855679931999, "learning_rate": 9.860442365526565e-06, "loss": 2.7278504371643066, "step": 318 }, { "epoch": 0.5071542130365659, "grad_norm": 9.70708934527147, "learning_rate": 9.858263660752637e-06, "loss": 3.0756285190582275, "step": 319 }, { "epoch": 0.5087440381558028, "grad_norm": 13.293623447658439, "learning_rate": 9.856068325477805e-06, "loss": 3.088465690612793, "step": 320 }, { "epoch": 0.5103338632750397, "grad_norm": 7.794568252567064, "learning_rate": 9.853856367217001e-06, "loss": 2.9818029403686523, "step": 321 }, { "epoch": 0.5119236883942766, "grad_norm": 7.851326897921035, "learning_rate": 9.85162779354206e-06, "loss": 2.2186264991760254, "step": 322 }, { "epoch": 0.5135135135135135, "grad_norm": 27.73475205218493, "learning_rate": 9.849382612081698e-06, "loss": 3.252265453338623, "step": 323 }, { "epoch": 0.5151033386327504, "grad_norm": 12.4240113753297, "learning_rate": 9.847120830521476e-06, "loss": 2.715832233428955, "step": 324 }, { "epoch": 0.5166931637519873, "grad_norm": 11.173745279264194, "learning_rate": 9.844842456603779e-06, "loss": 3.116093635559082, "step": 325 }, { "epoch": 0.5182829888712241, "grad_norm": 12.86649071600495, "learning_rate": 9.842547498127794e-06, "loss": 2.7198853492736816, "step": 326 }, { "epoch": 0.519872813990461, "grad_norm": 9.750789020946055, "learning_rate": 9.84023596294948e-06, "loss": 2.7712936401367188, "step": 327 }, { "epoch": 0.5214626391096979, "grad_norm": 5.8993949186997146, "learning_rate": 9.837907858981536e-06, "loss": 2.7403852939605713, "step": 328 }, { "epoch": 0.5230524642289348, "grad_norm": 8.210808345252927, "learning_rate": 9.835563194193382e-06, "loss": 3.234954357147217, "step": 329 }, { "epoch": 0.5246422893481717, "grad_norm": 17.58386529096575, "learning_rate": 9.833201976611125e-06, "loss": 2.921865463256836, "step": 330 }, { "epoch": 0.5262321144674086, "grad_norm": 17.745817173384197, "learning_rate": 9.830824214317533e-06, "loss": 2.9681968688964844, "step": 331 }, { "epoch": 0.5278219395866455, "grad_norm": 8.88335299895624, "learning_rate": 9.828429915452018e-06, "loss": 2.7954001426696777, "step": 332 }, { "epoch": 0.5294117647058824, "grad_norm": 14.866460548402223, "learning_rate": 9.826019088210586e-06, "loss": 2.6031131744384766, "step": 333 }, { "epoch": 0.5310015898251192, "grad_norm": 15.725264129746899, "learning_rate": 9.823591740845831e-06, "loss": 3.0973379611968994, "step": 334 }, { "epoch": 0.5325914149443561, "grad_norm": 14.628458921610125, "learning_rate": 9.821147881666896e-06, "loss": 2.6520161628723145, "step": 335 }, { "epoch": 0.534181240063593, "grad_norm": 16.54064563714969, "learning_rate": 9.818687519039444e-06, "loss": 2.7431864738464355, "step": 336 }, { "epoch": 0.5357710651828299, "grad_norm": 10.962393061580121, "learning_rate": 9.816210661385633e-06, "loss": 2.5551836490631104, "step": 337 }, { "epoch": 0.5373608903020668, "grad_norm": 18.832152884024985, "learning_rate": 9.813717317184085e-06, "loss": 3.045194625854492, "step": 338 }, { "epoch": 0.5389507154213037, "grad_norm": 17.38158539650151, "learning_rate": 9.811207494969857e-06, "loss": 3.4276580810546875, "step": 339 }, { "epoch": 0.5405405405405406, "grad_norm": 10.622363155005946, "learning_rate": 9.808681203334416e-06, "loss": 3.2487316131591797, "step": 340 }, { "epoch": 0.5421303656597775, "grad_norm": 9.260816023702622, "learning_rate": 9.806138450925604e-06, "loss": 2.612975597381592, "step": 341 }, { "epoch": 0.5437201907790143, "grad_norm": 11.74463313569475, "learning_rate": 9.803579246447609e-06, "loss": 2.7874436378479004, "step": 342 }, { "epoch": 0.5453100158982512, "grad_norm": 17.837043198698524, "learning_rate": 9.801003598660937e-06, "loss": 2.9916462898254395, "step": 343 }, { "epoch": 0.5468998410174881, "grad_norm": 24.438251717009827, "learning_rate": 9.798411516382385e-06, "loss": 2.434546947479248, "step": 344 }, { "epoch": 0.548489666136725, "grad_norm": 14.013153276669572, "learning_rate": 9.795803008485004e-06, "loss": 3.085341453552246, "step": 345 }, { "epoch": 0.5500794912559619, "grad_norm": 13.557057584862173, "learning_rate": 9.793178083898073e-06, "loss": 3.117433547973633, "step": 346 }, { "epoch": 0.5516693163751988, "grad_norm": 10.637125299139159, "learning_rate": 9.790536751607065e-06, "loss": 2.892432689666748, "step": 347 }, { "epoch": 0.5532591414944356, "grad_norm": 10.550865957146144, "learning_rate": 9.787879020653627e-06, "loss": 2.861921787261963, "step": 348 }, { "epoch": 0.5548489666136724, "grad_norm": 22.137580282732337, "learning_rate": 9.785204900135533e-06, "loss": 2.463737726211548, "step": 349 }, { "epoch": 0.5564387917329093, "grad_norm": 21.77300397400076, "learning_rate": 9.782514399206664e-06, "loss": 3.39715838432312, "step": 350 }, { "epoch": 0.5580286168521462, "grad_norm": 26.488755598164676, "learning_rate": 9.77980752707697e-06, "loss": 2.856335163116455, "step": 351 }, { "epoch": 0.5596184419713831, "grad_norm": 15.482056115230122, "learning_rate": 9.777084293012448e-06, "loss": 3.082500457763672, "step": 352 }, { "epoch": 0.56120826709062, "grad_norm": 17.940874790430644, "learning_rate": 9.774344706335097e-06, "loss": 2.5417776107788086, "step": 353 }, { "epoch": 0.5627980922098569, "grad_norm": 10.606810121460613, "learning_rate": 9.7715887764229e-06, "loss": 2.677812099456787, "step": 354 }, { "epoch": 0.5643879173290938, "grad_norm": 18.117698123278753, "learning_rate": 9.768816512709782e-06, "loss": 2.576479911804199, "step": 355 }, { "epoch": 0.5659777424483307, "grad_norm": 11.765350214656465, "learning_rate": 9.766027924685579e-06, "loss": 2.7888994216918945, "step": 356 }, { "epoch": 0.5675675675675675, "grad_norm": 14.785341483147235, "learning_rate": 9.76322302189601e-06, "loss": 2.476093292236328, "step": 357 }, { "epoch": 0.5691573926868044, "grad_norm": 51.843503178912506, "learning_rate": 9.760401813942641e-06, "loss": 2.794877290725708, "step": 358 }, { "epoch": 0.5707472178060413, "grad_norm": 13.440897755315447, "learning_rate": 9.75756431048285e-06, "loss": 2.6585850715637207, "step": 359 }, { "epoch": 0.5723370429252782, "grad_norm": 14.7711393223718, "learning_rate": 9.754710521229804e-06, "loss": 3.022064447402954, "step": 360 }, { "epoch": 0.5739268680445151, "grad_norm": 11.102369711091598, "learning_rate": 9.751840455952411e-06, "loss": 2.669562339782715, "step": 361 }, { "epoch": 0.575516693163752, "grad_norm": 11.41916142117208, "learning_rate": 9.748954124475297e-06, "loss": 2.7201461791992188, "step": 362 }, { "epoch": 0.5771065182829889, "grad_norm": 10.443214334920143, "learning_rate": 9.74605153667877e-06, "loss": 2.6275901794433594, "step": 363 }, { "epoch": 0.5786963434022258, "grad_norm": 20.437613024405156, "learning_rate": 9.743132702498785e-06, "loss": 2.591904401779175, "step": 364 }, { "epoch": 0.5802861685214626, "grad_norm": 11.389555234258582, "learning_rate": 9.740197631926911e-06, "loss": 2.7886199951171875, "step": 365 }, { "epoch": 0.5818759936406995, "grad_norm": 8.461581392383087, "learning_rate": 9.737246335010295e-06, "loss": 2.6747968196868896, "step": 366 }, { "epoch": 0.5834658187599364, "grad_norm": 33.08363519117193, "learning_rate": 9.734278821851631e-06, "loss": 2.841123342514038, "step": 367 }, { "epoch": 0.5850556438791733, "grad_norm": 17.003662815071053, "learning_rate": 9.73129510260912e-06, "loss": 3.1505885124206543, "step": 368 }, { "epoch": 0.5866454689984102, "grad_norm": 22.111399140586197, "learning_rate": 9.728295187496444e-06, "loss": 2.7856974601745605, "step": 369 }, { "epoch": 0.5882352941176471, "grad_norm": 19.583461060784327, "learning_rate": 9.725279086782719e-06, "loss": 3.3989617824554443, "step": 370 }, { "epoch": 0.589825119236884, "grad_norm": 11.456541765804014, "learning_rate": 9.722246810792476e-06, "loss": 2.9938831329345703, "step": 371 }, { "epoch": 0.5914149443561209, "grad_norm": 8.169487886232476, "learning_rate": 9.719198369905605e-06, "loss": 2.4466989040374756, "step": 372 }, { "epoch": 0.5930047694753577, "grad_norm": 7.971907696739063, "learning_rate": 9.716133774557337e-06, "loss": 3.164093494415283, "step": 373 }, { "epoch": 0.5945945945945946, "grad_norm": 10.363321904123197, "learning_rate": 9.713053035238205e-06, "loss": 2.953866958618164, "step": 374 }, { "epoch": 0.5961844197138315, "grad_norm": 9.777251544649713, "learning_rate": 9.709956162493996e-06, "loss": 2.710660457611084, "step": 375 }, { "epoch": 0.5977742448330684, "grad_norm": 14.487810223583852, "learning_rate": 9.706843166925733e-06, "loss": 2.712660312652588, "step": 376 }, { "epoch": 0.5993640699523053, "grad_norm": 12.25933759921934, "learning_rate": 9.70371405918962e-06, "loss": 2.8972253799438477, "step": 377 }, { "epoch": 0.6009538950715422, "grad_norm": 18.03088885129158, "learning_rate": 9.700568849997026e-06, "loss": 3.1258721351623535, "step": 378 }, { "epoch": 0.6025437201907791, "grad_norm": 8.835107319947879, "learning_rate": 9.69740755011443e-06, "loss": 2.955259084701538, "step": 379 }, { "epoch": 0.604133545310016, "grad_norm": 9.600351639857102, "learning_rate": 9.694230170363396e-06, "loss": 2.7996139526367188, "step": 380 }, { "epoch": 0.6057233704292527, "grad_norm": 8.374042344290787, "learning_rate": 9.691036721620525e-06, "loss": 2.9617061614990234, "step": 381 }, { "epoch": 0.6073131955484896, "grad_norm": 16.200759577206647, "learning_rate": 9.687827214817433e-06, "loss": 4.232911586761475, "step": 382 }, { "epoch": 0.6089030206677265, "grad_norm": 9.221291164598371, "learning_rate": 9.6846016609407e-06, "loss": 2.861079692840576, "step": 383 }, { "epoch": 0.6104928457869634, "grad_norm": 7.14794391477156, "learning_rate": 9.681360071031835e-06, "loss": 3.0849013328552246, "step": 384 }, { "epoch": 0.6120826709062003, "grad_norm": 21.29708798650396, "learning_rate": 9.678102456187246e-06, "loss": 3.0311594009399414, "step": 385 }, { "epoch": 0.6136724960254372, "grad_norm": 20.074075644381143, "learning_rate": 9.674828827558194e-06, "loss": 2.7004942893981934, "step": 386 }, { "epoch": 0.615262321144674, "grad_norm": 14.83050652679666, "learning_rate": 9.671539196350757e-06, "loss": 2.589656114578247, "step": 387 }, { "epoch": 0.6168521462639109, "grad_norm": 8.87067945584946, "learning_rate": 9.668233573825794e-06, "loss": 2.9575343132019043, "step": 388 }, { "epoch": 0.6184419713831478, "grad_norm": 10.545476536739276, "learning_rate": 9.664911971298901e-06, "loss": 2.8987927436828613, "step": 389 }, { "epoch": 0.6200317965023847, "grad_norm": 10.16073064262438, "learning_rate": 9.661574400140378e-06, "loss": 2.5674970149993896, "step": 390 }, { "epoch": 0.6216216216216216, "grad_norm": 27.051068878155856, "learning_rate": 9.658220871775188e-06, "loss": 3.2474257946014404, "step": 391 }, { "epoch": 0.6232114467408585, "grad_norm": 9.452922834229314, "learning_rate": 9.654851397682918e-06, "loss": 2.8457717895507812, "step": 392 }, { "epoch": 0.6248012718600954, "grad_norm": 11.005579432042241, "learning_rate": 9.651465989397735e-06, "loss": 2.455747365951538, "step": 393 }, { "epoch": 0.6263910969793323, "grad_norm": 7.215356608742707, "learning_rate": 9.64806465850836e-06, "loss": 2.7574803829193115, "step": 394 }, { "epoch": 0.6279809220985691, "grad_norm": 14.962227882431128, "learning_rate": 9.64464741665801e-06, "loss": 2.429494857788086, "step": 395 }, { "epoch": 0.629570747217806, "grad_norm": 8.013197856622966, "learning_rate": 9.641214275544373e-06, "loss": 2.9387574195861816, "step": 396 }, { "epoch": 0.6311605723370429, "grad_norm": 24.94726816321973, "learning_rate": 9.637765246919559e-06, "loss": 2.8494510650634766, "step": 397 }, { "epoch": 0.6327503974562798, "grad_norm": 10.457210193453564, "learning_rate": 9.634300342590067e-06, "loss": 2.627678871154785, "step": 398 }, { "epoch": 0.6343402225755167, "grad_norm": 14.716507833574038, "learning_rate": 9.630819574416735e-06, "loss": 3.5401620864868164, "step": 399 }, { "epoch": 0.6359300476947536, "grad_norm": 18.887530242887852, "learning_rate": 9.62732295431471e-06, "loss": 3.0188817977905273, "step": 400 }, { "epoch": 0.6375198728139905, "grad_norm": 18.229834565392796, "learning_rate": 9.623810494253403e-06, "loss": 3.1571972370147705, "step": 401 }, { "epoch": 0.6391096979332274, "grad_norm": 13.56039900606889, "learning_rate": 9.620282206256442e-06, "loss": 3.1719672679901123, "step": 402 }, { "epoch": 0.6406995230524642, "grad_norm": 7.963613224338305, "learning_rate": 9.616738102401641e-06, "loss": 2.8952155113220215, "step": 403 }, { "epoch": 0.6422893481717011, "grad_norm": 6.335654799572909, "learning_rate": 9.613178194820952e-06, "loss": 1.8771438598632812, "step": 404 }, { "epoch": 0.643879173290938, "grad_norm": 15.581044704241915, "learning_rate": 9.609602495700422e-06, "loss": 3.0524277687072754, "step": 405 }, { "epoch": 0.6454689984101749, "grad_norm": 12.433857715011595, "learning_rate": 9.606011017280166e-06, "loss": 2.399130344390869, "step": 406 }, { "epoch": 0.6470588235294118, "grad_norm": 6.87894749964456, "learning_rate": 9.602403771854299e-06, "loss": 2.580353021621704, "step": 407 }, { "epoch": 0.6486486486486487, "grad_norm": 6.444440504558877, "learning_rate": 9.598780771770916e-06, "loss": 2.5789973735809326, "step": 408 }, { "epoch": 0.6502384737678856, "grad_norm": 9.629062198154633, "learning_rate": 9.595142029432044e-06, "loss": 2.6498067378997803, "step": 409 }, { "epoch": 0.6518282988871225, "grad_norm": 7.644410796929077, "learning_rate": 9.591487557293595e-06, "loss": 2.4748358726501465, "step": 410 }, { "epoch": 0.6534181240063593, "grad_norm": 9.511237801081927, "learning_rate": 9.587817367865328e-06, "loss": 2.9340078830718994, "step": 411 }, { "epoch": 0.6550079491255962, "grad_norm": 7.5549355398077545, "learning_rate": 9.5841314737108e-06, "loss": 3.104971408843994, "step": 412 }, { "epoch": 0.6565977742448331, "grad_norm": 26.222964600786014, "learning_rate": 9.580429887447334e-06, "loss": 3.083625316619873, "step": 413 }, { "epoch": 0.6581875993640699, "grad_norm": 19.623632426290964, "learning_rate": 9.576712621745965e-06, "loss": 2.4024219512939453, "step": 414 }, { "epoch": 0.6597774244833068, "grad_norm": 10.650849632134157, "learning_rate": 9.572979689331402e-06, "loss": 3.272728443145752, "step": 415 }, { "epoch": 0.6613672496025437, "grad_norm": 9.688717423425123, "learning_rate": 9.569231102981982e-06, "loss": 2.8303894996643066, "step": 416 }, { "epoch": 0.6629570747217806, "grad_norm": 16.254612607461073, "learning_rate": 9.56546687552963e-06, "loss": 3.3961193561553955, "step": 417 }, { "epoch": 0.6645468998410174, "grad_norm": 28.318241565491313, "learning_rate": 9.56168701985981e-06, "loss": 3.2787365913391113, "step": 418 }, { "epoch": 0.6661367249602543, "grad_norm": 9.984301715361173, "learning_rate": 9.557891548911486e-06, "loss": 2.706429958343506, "step": 419 }, { "epoch": 0.6677265500794912, "grad_norm": 52.69149354170844, "learning_rate": 9.554080475677075e-06, "loss": 2.649432897567749, "step": 420 }, { "epoch": 0.6693163751987281, "grad_norm": 10.82457584221695, "learning_rate": 9.5502538132024e-06, "loss": 3.203946828842163, "step": 421 }, { "epoch": 0.670906200317965, "grad_norm": 9.824887603835084, "learning_rate": 9.546411574586649e-06, "loss": 2.792487859725952, "step": 422 }, { "epoch": 0.6724960254372019, "grad_norm": 10.790171066484273, "learning_rate": 9.542553772982334e-06, "loss": 2.542821168899536, "step": 423 }, { "epoch": 0.6740858505564388, "grad_norm": 9.834899956988384, "learning_rate": 9.538680421595236e-06, "loss": 3.0764918327331543, "step": 424 }, { "epoch": 0.6756756756756757, "grad_norm": 9.056903123069523, "learning_rate": 9.534791533684365e-06, "loss": 2.803356170654297, "step": 425 }, { "epoch": 0.6772655007949125, "grad_norm": 10.761225528100839, "learning_rate": 9.530887122561917e-06, "loss": 3.1509580612182617, "step": 426 }, { "epoch": 0.6788553259141494, "grad_norm": 11.131539654369165, "learning_rate": 9.526967201593225e-06, "loss": 3.372119903564453, "step": 427 }, { "epoch": 0.6804451510333863, "grad_norm": 33.68779997608416, "learning_rate": 9.523031784196714e-06, "loss": 2.6376187801361084, "step": 428 }, { "epoch": 0.6820349761526232, "grad_norm": 22.943648324118026, "learning_rate": 9.51908088384386e-06, "loss": 2.1887574195861816, "step": 429 }, { "epoch": 0.6836248012718601, "grad_norm": 20.916906264947617, "learning_rate": 9.515114514059127e-06, "loss": 2.9147121906280518, "step": 430 }, { "epoch": 0.685214626391097, "grad_norm": 12.377959096426785, "learning_rate": 9.51113268841995e-06, "loss": 2.66879940032959, "step": 431 }, { "epoch": 0.6868044515103339, "grad_norm": 8.735671059314264, "learning_rate": 9.507135420556658e-06, "loss": 2.8298702239990234, "step": 432 }, { "epoch": 0.6883942766295708, "grad_norm": 9.704810477955409, "learning_rate": 9.503122724152445e-06, "loss": 2.8676247596740723, "step": 433 }, { "epoch": 0.6899841017488076, "grad_norm": 11.241082482696205, "learning_rate": 9.499094612943323e-06, "loss": 2.931668758392334, "step": 434 }, { "epoch": 0.6915739268680445, "grad_norm": 9.173049928921404, "learning_rate": 9.495051100718063e-06, "loss": 2.5799193382263184, "step": 435 }, { "epoch": 0.6931637519872814, "grad_norm": 18.007659676402852, "learning_rate": 9.490992201318165e-06, "loss": 3.0089612007141113, "step": 436 }, { "epoch": 0.6947535771065183, "grad_norm": 11.362416256495003, "learning_rate": 9.486917928637793e-06, "loss": 2.896777629852295, "step": 437 }, { "epoch": 0.6963434022257552, "grad_norm": 8.416093561210923, "learning_rate": 9.482828296623743e-06, "loss": 2.038195848464966, "step": 438 }, { "epoch": 0.6979332273449921, "grad_norm": 8.48123035061315, "learning_rate": 9.47872331927538e-06, "loss": 2.0925214290618896, "step": 439 }, { "epoch": 0.699523052464229, "grad_norm": 13.309219294696476, "learning_rate": 9.474603010644608e-06, "loss": 3.1267426013946533, "step": 440 }, { "epoch": 0.7011128775834659, "grad_norm": 19.576731189877943, "learning_rate": 9.470467384835804e-06, "loss": 2.386526107788086, "step": 441 }, { "epoch": 0.7027027027027027, "grad_norm": 11.84793161548726, "learning_rate": 9.466316456005783e-06, "loss": 2.735654592514038, "step": 442 }, { "epoch": 0.7042925278219396, "grad_norm": 14.370017088694938, "learning_rate": 9.462150238363737e-06, "loss": 3.2645516395568848, "step": 443 }, { "epoch": 0.7058823529411765, "grad_norm": 17.806748620572648, "learning_rate": 9.457968746171202e-06, "loss": 2.775618076324463, "step": 444 }, { "epoch": 0.7074721780604134, "grad_norm": 15.470507917573313, "learning_rate": 9.453771993742e-06, "loss": 3.137962579727173, "step": 445 }, { "epoch": 0.7090620031796503, "grad_norm": 10.518008388501737, "learning_rate": 9.449559995442184e-06, "loss": 3.061692237854004, "step": 446 }, { "epoch": 0.7106518282988871, "grad_norm": 10.581699627893139, "learning_rate": 9.445332765690003e-06, "loss": 3.165436029434204, "step": 447 }, { "epoch": 0.712241653418124, "grad_norm": 8.91004376713061, "learning_rate": 9.441090318955843e-06, "loss": 2.745981216430664, "step": 448 }, { "epoch": 0.7138314785373608, "grad_norm": 30.01216048271335, "learning_rate": 9.436832669762177e-06, "loss": 2.914241313934326, "step": 449 }, { "epoch": 0.7154213036565977, "grad_norm": 42.15439355029616, "learning_rate": 9.432559832683523e-06, "loss": 2.9794774055480957, "step": 450 }, { "epoch": 0.7170111287758346, "grad_norm": 13.71971441245524, "learning_rate": 9.428271822346384e-06, "loss": 2.801947832107544, "step": 451 }, { "epoch": 0.7186009538950715, "grad_norm": 14.134554485370607, "learning_rate": 9.423968653429207e-06, "loss": 2.9650607109069824, "step": 452 }, { "epoch": 0.7201907790143084, "grad_norm": 8.604798420906718, "learning_rate": 9.419650340662329e-06, "loss": 2.595290184020996, "step": 453 }, { "epoch": 0.7217806041335453, "grad_norm": 11.249268948449823, "learning_rate": 9.415316898827923e-06, "loss": 2.633866310119629, "step": 454 }, { "epoch": 0.7233704292527822, "grad_norm": 11.52964004514907, "learning_rate": 9.410968342759954e-06, "loss": 3.424924850463867, "step": 455 }, { "epoch": 0.724960254372019, "grad_norm": 17.639877653458488, "learning_rate": 9.406604687344123e-06, "loss": 2.369297504425049, "step": 456 }, { "epoch": 0.7265500794912559, "grad_norm": 22.81396466382483, "learning_rate": 9.402225947517822e-06, "loss": 2.883362293243408, "step": 457 }, { "epoch": 0.7281399046104928, "grad_norm": 16.222209071079284, "learning_rate": 9.397832138270073e-06, "loss": 2.8191261291503906, "step": 458 }, { "epoch": 0.7297297297297297, "grad_norm": 7.895306649079384, "learning_rate": 9.393423274641489e-06, "loss": 2.7146449089050293, "step": 459 }, { "epoch": 0.7313195548489666, "grad_norm": 10.89587170199451, "learning_rate": 9.388999371724212e-06, "loss": 3.090642213821411, "step": 460 }, { "epoch": 0.7329093799682035, "grad_norm": 23.9918511541304, "learning_rate": 9.384560444661866e-06, "loss": 2.4599502086639404, "step": 461 }, { "epoch": 0.7344992050874404, "grad_norm": 17.30770085496429, "learning_rate": 9.380106508649504e-06, "loss": 3.157010555267334, "step": 462 }, { "epoch": 0.7360890302066773, "grad_norm": 7.37592450124233, "learning_rate": 9.37563757893356e-06, "loss": 3.021430730819702, "step": 463 }, { "epoch": 0.7376788553259142, "grad_norm": 17.999614114821366, "learning_rate": 9.371153670811792e-06, "loss": 2.3801822662353516, "step": 464 }, { "epoch": 0.739268680445151, "grad_norm": 18.329872618090533, "learning_rate": 9.36665479963323e-06, "loss": 3.0229134559631348, "step": 465 }, { "epoch": 0.7408585055643879, "grad_norm": 16.29800538004355, "learning_rate": 9.362140980798127e-06, "loss": 2.883070945739746, "step": 466 }, { "epoch": 0.7424483306836248, "grad_norm": 6.700420005013404, "learning_rate": 9.357612229757898e-06, "loss": 2.8372249603271484, "step": 467 }, { "epoch": 0.7440381558028617, "grad_norm": 8.49441227773889, "learning_rate": 9.353068562015081e-06, "loss": 2.289818525314331, "step": 468 }, { "epoch": 0.7456279809220986, "grad_norm": 27.79324687080065, "learning_rate": 9.34850999312327e-06, "loss": 2.7569055557250977, "step": 469 }, { "epoch": 0.7472178060413355, "grad_norm": 10.233973203234271, "learning_rate": 9.343936538687071e-06, "loss": 2.8224129676818848, "step": 470 }, { "epoch": 0.7488076311605724, "grad_norm": 9.536916310154247, "learning_rate": 9.339348214362042e-06, "loss": 2.7565484046936035, "step": 471 }, { "epoch": 0.7503974562798092, "grad_norm": 12.660265416201005, "learning_rate": 9.334745035854646e-06, "loss": 2.609936475753784, "step": 472 }, { "epoch": 0.7519872813990461, "grad_norm": 6.609631626506061, "learning_rate": 9.330127018922195e-06, "loss": 1.537891149520874, "step": 473 }, { "epoch": 0.753577106518283, "grad_norm": 11.839925280210922, "learning_rate": 9.325494179372787e-06, "loss": 2.920321464538574, "step": 474 }, { "epoch": 0.7551669316375199, "grad_norm": 8.033768579527674, "learning_rate": 9.32084653306527e-06, "loss": 2.322841167449951, "step": 475 }, { "epoch": 0.7567567567567568, "grad_norm": 18.74724817038534, "learning_rate": 9.316184095909172e-06, "loss": 3.282191276550293, "step": 476 }, { "epoch": 0.7583465818759937, "grad_norm": 9.591961467690806, "learning_rate": 9.311506883864652e-06, "loss": 3.099551200866699, "step": 477 }, { "epoch": 0.7599364069952306, "grad_norm": 6.813041303534929, "learning_rate": 9.306814912942445e-06, "loss": 2.680548667907715, "step": 478 }, { "epoch": 0.7615262321144675, "grad_norm": 7.440877828862766, "learning_rate": 9.302108199203811e-06, "loss": 3.059520721435547, "step": 479 }, { "epoch": 0.7631160572337043, "grad_norm": 11.063755034328096, "learning_rate": 9.297386758760476e-06, "loss": 2.7226760387420654, "step": 480 }, { "epoch": 0.7647058823529411, "grad_norm": 16.03801166420759, "learning_rate": 9.292650607774576e-06, "loss": 3.021273374557495, "step": 481 }, { "epoch": 0.766295707472178, "grad_norm": 17.476643714670104, "learning_rate": 9.287899762458602e-06, "loss": 3.0549211502075195, "step": 482 }, { "epoch": 0.7678855325914149, "grad_norm": 15.837190093947614, "learning_rate": 9.283134239075345e-06, "loss": 2.7466187477111816, "step": 483 }, { "epoch": 0.7694753577106518, "grad_norm": 7.252402922157257, "learning_rate": 9.278354053937848e-06, "loss": 3.251795768737793, "step": 484 }, { "epoch": 0.7710651828298887, "grad_norm": 13.96476074258806, "learning_rate": 9.273559223409336e-06, "loss": 2.9985158443450928, "step": 485 }, { "epoch": 0.7726550079491256, "grad_norm": 13.605199897553412, "learning_rate": 9.268749763903171e-06, "loss": 3.1657190322875977, "step": 486 }, { "epoch": 0.7742448330683624, "grad_norm": 23.870950335808047, "learning_rate": 9.26392569188279e-06, "loss": 2.4962430000305176, "step": 487 }, { "epoch": 0.7758346581875993, "grad_norm": 36.442408380505434, "learning_rate": 9.259087023861649e-06, "loss": 2.98346209526062, "step": 488 }, { "epoch": 0.7774244833068362, "grad_norm": 19.137833562524634, "learning_rate": 9.254233776403172e-06, "loss": 3.3266477584838867, "step": 489 }, { "epoch": 0.7790143084260731, "grad_norm": 8.57517970453567, "learning_rate": 9.249365966120692e-06, "loss": 2.872415542602539, "step": 490 }, { "epoch": 0.78060413354531, "grad_norm": 20.351472280664066, "learning_rate": 9.244483609677384e-06, "loss": 2.7851204872131348, "step": 491 }, { "epoch": 0.7821939586645469, "grad_norm": 18.738667613531746, "learning_rate": 9.239586723786223e-06, "loss": 3.367607593536377, "step": 492 }, { "epoch": 0.7837837837837838, "grad_norm": 11.655735916018001, "learning_rate": 9.234675325209923e-06, "loss": 2.86293625831604, "step": 493 }, { "epoch": 0.7853736089030207, "grad_norm": 15.195275789671278, "learning_rate": 9.229749430760868e-06, "loss": 3.1182608604431152, "step": 494 }, { "epoch": 0.7869634340222575, "grad_norm": 12.571602562023449, "learning_rate": 9.224809057301072e-06, "loss": 3.185694694519043, "step": 495 }, { "epoch": 0.7885532591414944, "grad_norm": 9.37913029848071, "learning_rate": 9.219854221742106e-06, "loss": 3.187572956085205, "step": 496 }, { "epoch": 0.7901430842607313, "grad_norm": 10.256423074234739, "learning_rate": 9.214884941045053e-06, "loss": 2.5662600994110107, "step": 497 }, { "epoch": 0.7917329093799682, "grad_norm": 18.076215929843727, "learning_rate": 9.209901232220436e-06, "loss": 3.0311079025268555, "step": 498 }, { "epoch": 0.7933227344992051, "grad_norm": 16.631781115308666, "learning_rate": 9.204903112328177e-06, "loss": 1.7491254806518555, "step": 499 }, { "epoch": 0.794912559618442, "grad_norm": 23.267256685202685, "learning_rate": 9.19989059847752e-06, "loss": 2.581984043121338, "step": 500 }, { "epoch": 0.7965023847376789, "grad_norm": 12.378630702599818, "learning_rate": 9.194863707826987e-06, "loss": 3.037818193435669, "step": 501 }, { "epoch": 0.7980922098569158, "grad_norm": 8.336093692737641, "learning_rate": 9.189822457584311e-06, "loss": 2.6411571502685547, "step": 502 }, { "epoch": 0.7996820349761526, "grad_norm": 8.867434380285168, "learning_rate": 9.184766865006384e-06, "loss": 2.9949069023132324, "step": 503 }, { "epoch": 0.8012718600953895, "grad_norm": 11.193965664653925, "learning_rate": 9.179696947399188e-06, "loss": 3.144390821456909, "step": 504 }, { "epoch": 0.8028616852146264, "grad_norm": 16.057125696326533, "learning_rate": 9.174612722117744e-06, "loss": 2.9249026775360107, "step": 505 }, { "epoch": 0.8044515103338633, "grad_norm": 20.94564798661583, "learning_rate": 9.169514206566053e-06, "loss": 2.9030885696411133, "step": 506 }, { "epoch": 0.8060413354531002, "grad_norm": 9.485465282445661, "learning_rate": 9.164401418197028e-06, "loss": 2.723435878753662, "step": 507 }, { "epoch": 0.8076311605723371, "grad_norm": 7.97901333149408, "learning_rate": 9.159274374512444e-06, "loss": 2.268899917602539, "step": 508 }, { "epoch": 0.809220985691574, "grad_norm": 11.56378755396159, "learning_rate": 9.154133093062874e-06, "loss": 2.7658634185791016, "step": 509 }, { "epoch": 0.8108108108108109, "grad_norm": 16.318599971485703, "learning_rate": 9.148977591447625e-06, "loss": 2.3817219734191895, "step": 510 }, { "epoch": 0.8124006359300477, "grad_norm": 14.944743792365866, "learning_rate": 9.143807887314686e-06, "loss": 2.5911664962768555, "step": 511 }, { "epoch": 0.8139904610492846, "grad_norm": 16.811534487344034, "learning_rate": 9.138623998360662e-06, "loss": 3.377835988998413, "step": 512 }, { "epoch": 0.8155802861685215, "grad_norm": 7.7776791479934895, "learning_rate": 9.133425942330711e-06, "loss": 2.6951489448547363, "step": 513 }, { "epoch": 0.8171701112877583, "grad_norm": 11.502170896327272, "learning_rate": 9.128213737018493e-06, "loss": 3.042034149169922, "step": 514 }, { "epoch": 0.8187599364069952, "grad_norm": 10.755740854329733, "learning_rate": 9.122987400266095e-06, "loss": 3.1462788581848145, "step": 515 }, { "epoch": 0.8203497615262321, "grad_norm": 8.557466859234387, "learning_rate": 9.117746949963986e-06, "loss": 3.2351651191711426, "step": 516 }, { "epoch": 0.821939586645469, "grad_norm": 6.6138108427559, "learning_rate": 9.112492404050944e-06, "loss": 2.52327036857605, "step": 517 }, { "epoch": 0.8235294117647058, "grad_norm": 7.739658405948868, "learning_rate": 9.107223780513997e-06, "loss": 3.155184030532837, "step": 518 }, { "epoch": 0.8251192368839427, "grad_norm": 14.958547409490388, "learning_rate": 9.101941097388364e-06, "loss": 3.060459613800049, "step": 519 }, { "epoch": 0.8267090620031796, "grad_norm": 13.544711692721917, "learning_rate": 9.096644372757393e-06, "loss": 2.502777338027954, "step": 520 }, { "epoch": 0.8282988871224165, "grad_norm": 16.67047370206468, "learning_rate": 9.091333624752497e-06, "loss": 2.7691304683685303, "step": 521 }, { "epoch": 0.8298887122416534, "grad_norm": 42.77834127326481, "learning_rate": 9.086008871553088e-06, "loss": 2.007439136505127, "step": 522 }, { "epoch": 0.8314785373608903, "grad_norm": 10.81903026213424, "learning_rate": 9.08067013138653e-06, "loss": 2.60162353515625, "step": 523 }, { "epoch": 0.8330683624801272, "grad_norm": 12.050402588569701, "learning_rate": 9.07531742252806e-06, "loss": 3.2098569869995117, "step": 524 }, { "epoch": 0.834658187599364, "grad_norm": 14.320246766204002, "learning_rate": 9.06995076330073e-06, "loss": 2.8991613388061523, "step": 525 }, { "epoch": 0.8362480127186009, "grad_norm": 17.41210724230328, "learning_rate": 9.064570172075349e-06, "loss": 2.1841237545013428, "step": 526 }, { "epoch": 0.8378378378378378, "grad_norm": 10.548683513908175, "learning_rate": 9.059175667270417e-06, "loss": 2.322880744934082, "step": 527 }, { "epoch": 0.8394276629570747, "grad_norm": 12.137179668874367, "learning_rate": 9.053767267352063e-06, "loss": 2.756648540496826, "step": 528 }, { "epoch": 0.8410174880763116, "grad_norm": 15.333393821308167, "learning_rate": 9.048344990833978e-06, "loss": 2.9139137268066406, "step": 529 }, { "epoch": 0.8426073131955485, "grad_norm": 15.676264866891273, "learning_rate": 9.042908856277354e-06, "loss": 1.6564269065856934, "step": 530 }, { "epoch": 0.8441971383147854, "grad_norm": 10.23989787328245, "learning_rate": 9.037458882290829e-06, "loss": 2.8947908878326416, "step": 531 }, { "epoch": 0.8457869634340223, "grad_norm": 13.077387466685536, "learning_rate": 9.031995087530403e-06, "loss": 2.4343180656433105, "step": 532 }, { "epoch": 0.8473767885532592, "grad_norm": 9.515333984171859, "learning_rate": 9.026517490699397e-06, "loss": 2.7388577461242676, "step": 533 }, { "epoch": 0.848966613672496, "grad_norm": 31.050205309879313, "learning_rate": 9.021026110548372e-06, "loss": 2.9309582710266113, "step": 534 }, { "epoch": 0.8505564387917329, "grad_norm": 19.203823669695584, "learning_rate": 9.015520965875073e-06, "loss": 2.706590175628662, "step": 535 }, { "epoch": 0.8521462639109698, "grad_norm": 14.252051747524709, "learning_rate": 9.010002075524365e-06, "loss": 2.7433180809020996, "step": 536 }, { "epoch": 0.8537360890302067, "grad_norm": 22.594564192159826, "learning_rate": 9.004469458388161e-06, "loss": 2.817378044128418, "step": 537 }, { "epoch": 0.8553259141494436, "grad_norm": 67.57887294043289, "learning_rate": 8.99892313340537e-06, "loss": 2.8166146278381348, "step": 538 }, { "epoch": 0.8569157392686805, "grad_norm": 16.373029641709035, "learning_rate": 8.993363119561819e-06, "loss": 2.914787530899048, "step": 539 }, { "epoch": 0.8585055643879174, "grad_norm": 9.294623127371112, "learning_rate": 8.987789435890196e-06, "loss": 2.9436442852020264, "step": 540 }, { "epoch": 0.8600953895071543, "grad_norm": 12.96701127912914, "learning_rate": 8.98220210146998e-06, "loss": 3.34321928024292, "step": 541 }, { "epoch": 0.8616852146263911, "grad_norm": 8.138125083528765, "learning_rate": 8.976601135427386e-06, "loss": 2.543393611907959, "step": 542 }, { "epoch": 0.863275039745628, "grad_norm": 21.988109277338108, "learning_rate": 8.970986556935282e-06, "loss": 2.928457021713257, "step": 543 }, { "epoch": 0.8648648648648649, "grad_norm": 8.033779110302003, "learning_rate": 8.96535838521314e-06, "loss": 2.7310256958007812, "step": 544 }, { "epoch": 0.8664546899841018, "grad_norm": 7.304588914684417, "learning_rate": 8.959716639526962e-06, "loss": 3.030553102493286, "step": 545 }, { "epoch": 0.8680445151033387, "grad_norm": 10.380113781227514, "learning_rate": 8.954061339189214e-06, "loss": 2.711671829223633, "step": 546 }, { "epoch": 0.8696343402225755, "grad_norm": 18.269075985818574, "learning_rate": 8.948392503558763e-06, "loss": 2.4586758613586426, "step": 547 }, { "epoch": 0.8712241653418124, "grad_norm": 17.86796455198677, "learning_rate": 8.942710152040807e-06, "loss": 2.281625270843506, "step": 548 }, { "epoch": 0.8728139904610492, "grad_norm": 11.659312416575938, "learning_rate": 8.937014304086814e-06, "loss": 2.8658084869384766, "step": 549 }, { "epoch": 0.8744038155802861, "grad_norm": 9.678259451150254, "learning_rate": 8.931304979194452e-06, "loss": 2.6468729972839355, "step": 550 }, { "epoch": 0.875993640699523, "grad_norm": 11.13451090154638, "learning_rate": 8.925582196907519e-06, "loss": 2.5170133113861084, "step": 551 }, { "epoch": 0.8775834658187599, "grad_norm": 7.04164151428309, "learning_rate": 8.91984597681588e-06, "loss": 2.8820958137512207, "step": 552 }, { "epoch": 0.8791732909379968, "grad_norm": 12.357390772893178, "learning_rate": 8.914096338555402e-06, "loss": 3.473822593688965, "step": 553 }, { "epoch": 0.8807631160572337, "grad_norm": 28.33219274401018, "learning_rate": 8.908333301807886e-06, "loss": 2.5123298168182373, "step": 554 }, { "epoch": 0.8823529411764706, "grad_norm": 11.030402564865922, "learning_rate": 8.90255688630099e-06, "loss": 2.7539072036743164, "step": 555 }, { "epoch": 0.8839427662957074, "grad_norm": 10.922187259016807, "learning_rate": 8.896767111808177e-06, "loss": 2.488431453704834, "step": 556 }, { "epoch": 0.8855325914149443, "grad_norm": 15.435384223393465, "learning_rate": 8.890963998148637e-06, "loss": 2.2676663398742676, "step": 557 }, { "epoch": 0.8871224165341812, "grad_norm": 11.885270047863468, "learning_rate": 8.88514756518722e-06, "loss": 2.365499496459961, "step": 558 }, { "epoch": 0.8887122416534181, "grad_norm": 21.43582746324342, "learning_rate": 8.879317832834372e-06, "loss": 3.2253689765930176, "step": 559 }, { "epoch": 0.890302066772655, "grad_norm": 9.087125791490214, "learning_rate": 8.873474821046066e-06, "loss": 2.479543685913086, "step": 560 }, { "epoch": 0.8918918918918919, "grad_norm": 14.37236834034769, "learning_rate": 8.867618549823728e-06, "loss": 2.6958513259887695, "step": 561 }, { "epoch": 0.8934817170111288, "grad_norm": 26.795955747432917, "learning_rate": 8.861749039214177e-06, "loss": 3.0564427375793457, "step": 562 }, { "epoch": 0.8950715421303657, "grad_norm": 14.008735885861535, "learning_rate": 8.85586630930955e-06, "loss": 3.0955772399902344, "step": 563 }, { "epoch": 0.8966613672496025, "grad_norm": 17.499999308226226, "learning_rate": 8.849970380247237e-06, "loss": 2.753736972808838, "step": 564 }, { "epoch": 0.8982511923688394, "grad_norm": 16.86991628103478, "learning_rate": 8.844061272209807e-06, "loss": 3.1091933250427246, "step": 565 }, { "epoch": 0.8998410174880763, "grad_norm": 7.292520353862746, "learning_rate": 8.838139005424945e-06, "loss": 2.73673152923584, "step": 566 }, { "epoch": 0.9014308426073132, "grad_norm": 26.10050227785304, "learning_rate": 8.832203600165383e-06, "loss": 2.820924758911133, "step": 567 }, { "epoch": 0.9030206677265501, "grad_norm": 10.983586406500114, "learning_rate": 8.826255076748823e-06, "loss": 2.9828057289123535, "step": 568 }, { "epoch": 0.904610492845787, "grad_norm": 9.500150629192056, "learning_rate": 8.820293455537872e-06, "loss": 2.7929773330688477, "step": 569 }, { "epoch": 0.9062003179650239, "grad_norm": 18.630927541392282, "learning_rate": 8.814318756939979e-06, "loss": 2.3121395111083984, "step": 570 }, { "epoch": 0.9077901430842608, "grad_norm": 19.23342906269139, "learning_rate": 8.808331001407352e-06, "loss": 2.8814163208007812, "step": 571 }, { "epoch": 0.9093799682034976, "grad_norm": 14.377934501655712, "learning_rate": 8.802330209436898e-06, "loss": 3.316739559173584, "step": 572 }, { "epoch": 0.9109697933227345, "grad_norm": 11.871126508660163, "learning_rate": 8.796316401570146e-06, "loss": 2.679964780807495, "step": 573 }, { "epoch": 0.9125596184419714, "grad_norm": 13.85556629537282, "learning_rate": 8.790289598393186e-06, "loss": 2.9453659057617188, "step": 574 }, { "epoch": 0.9141494435612083, "grad_norm": 8.232252631291905, "learning_rate": 8.784249820536588e-06, "loss": 2.6362810134887695, "step": 575 }, { "epoch": 0.9157392686804452, "grad_norm": 15.867140433488505, "learning_rate": 8.778197088675339e-06, "loss": 2.6648402214050293, "step": 576 }, { "epoch": 0.9173290937996821, "grad_norm": 13.237751697039622, "learning_rate": 8.772131423528766e-06, "loss": 2.9705429077148438, "step": 577 }, { "epoch": 0.918918918918919, "grad_norm": 18.41414811767829, "learning_rate": 8.766052845860472e-06, "loss": 1.8093316555023193, "step": 578 }, { "epoch": 0.9205087440381559, "grad_norm": 13.413765564452232, "learning_rate": 8.759961376478256e-06, "loss": 3.0572826862335205, "step": 579 }, { "epoch": 0.9220985691573926, "grad_norm": 15.16419866353513, "learning_rate": 8.753857036234055e-06, "loss": 3.2078309059143066, "step": 580 }, { "epoch": 0.9236883942766295, "grad_norm": 10.520075066302136, "learning_rate": 8.747739846023858e-06, "loss": 2.571777105331421, "step": 581 }, { "epoch": 0.9252782193958664, "grad_norm": 17.393520422365786, "learning_rate": 8.741609826787644e-06, "loss": 2.815624713897705, "step": 582 }, { "epoch": 0.9268680445151033, "grad_norm": 12.114943627687087, "learning_rate": 8.73546699950931e-06, "loss": 2.674105644226074, "step": 583 }, { "epoch": 0.9284578696343402, "grad_norm": 20.829494890927517, "learning_rate": 8.72931138521659e-06, "loss": 2.616847515106201, "step": 584 }, { "epoch": 0.9300476947535771, "grad_norm": 18.7974227472542, "learning_rate": 8.723143004980995e-06, "loss": 3.3333654403686523, "step": 585 }, { "epoch": 0.931637519872814, "grad_norm": 9.621624789248896, "learning_rate": 8.716961879917734e-06, "loss": 2.8845057487487793, "step": 586 }, { "epoch": 0.9332273449920508, "grad_norm": 7.583388841207975, "learning_rate": 8.710768031185643e-06, "loss": 2.532384157180786, "step": 587 }, { "epoch": 0.9348171701112877, "grad_norm": 7.226870112533399, "learning_rate": 8.704561479987115e-06, "loss": 2.9145328998565674, "step": 588 }, { "epoch": 0.9364069952305246, "grad_norm": 10.814912634036599, "learning_rate": 8.698342247568021e-06, "loss": 3.091761827468872, "step": 589 }, { "epoch": 0.9379968203497615, "grad_norm": 15.997550555584759, "learning_rate": 8.692110355217646e-06, "loss": 2.7953693866729736, "step": 590 }, { "epoch": 0.9395866454689984, "grad_norm": 11.411767007121696, "learning_rate": 8.685865824268608e-06, "loss": 3.1209115982055664, "step": 591 }, { "epoch": 0.9411764705882353, "grad_norm": 9.582592328840802, "learning_rate": 8.679608676096793e-06, "loss": 2.7394025325775146, "step": 592 }, { "epoch": 0.9427662957074722, "grad_norm": 18.39337660729296, "learning_rate": 8.673338932121274e-06, "loss": 3.261842966079712, "step": 593 }, { "epoch": 0.9443561208267091, "grad_norm": 10.601794336067574, "learning_rate": 8.66705661380424e-06, "loss": 3.0146102905273438, "step": 594 }, { "epoch": 0.9459459459459459, "grad_norm": 12.563607294258158, "learning_rate": 8.660761742650928e-06, "loss": 2.659600019454956, "step": 595 }, { "epoch": 0.9475357710651828, "grad_norm": 39.18151668287377, "learning_rate": 8.654454340209542e-06, "loss": 2.043147087097168, "step": 596 }, { "epoch": 0.9491255961844197, "grad_norm": 12.18798117002197, "learning_rate": 8.648134428071182e-06, "loss": 2.62393856048584, "step": 597 }, { "epoch": 0.9507154213036566, "grad_norm": 25.381543731028497, "learning_rate": 8.641802027869774e-06, "loss": 2.586343288421631, "step": 598 }, { "epoch": 0.9523052464228935, "grad_norm": 10.863871219074202, "learning_rate": 8.635457161281988e-06, "loss": 2.907933235168457, "step": 599 }, { "epoch": 0.9538950715421304, "grad_norm": 17.560668770317324, "learning_rate": 8.629099850027172e-06, "loss": 2.894634962081909, "step": 600 }, { "epoch": 0.9554848966613673, "grad_norm": 15.359181392618892, "learning_rate": 8.622730115867268e-06, "loss": 3.1808290481567383, "step": 601 }, { "epoch": 0.9570747217806042, "grad_norm": 19.61490599139797, "learning_rate": 8.616347980606749e-06, "loss": 2.564119338989258, "step": 602 }, { "epoch": 0.958664546899841, "grad_norm": 11.460288433511876, "learning_rate": 8.60995346609254e-06, "loss": 2.3505280017852783, "step": 603 }, { "epoch": 0.9602543720190779, "grad_norm": 8.178876009589425, "learning_rate": 8.603546594213935e-06, "loss": 2.901543617248535, "step": 604 }, { "epoch": 0.9618441971383148, "grad_norm": 12.305038523339514, "learning_rate": 8.597127386902536e-06, "loss": 2.8978724479675293, "step": 605 }, { "epoch": 0.9634340222575517, "grad_norm": 8.450718398735082, "learning_rate": 8.590695866132162e-06, "loss": 2.6897552013397217, "step": 606 }, { "epoch": 0.9650238473767886, "grad_norm": 28.815336253830008, "learning_rate": 8.58425205391879e-06, "loss": 2.6910252571105957, "step": 607 }, { "epoch": 0.9666136724960255, "grad_norm": 26.111368898008163, "learning_rate": 8.577795972320475e-06, "loss": 2.798401355743408, "step": 608 }, { "epoch": 0.9682034976152624, "grad_norm": 8.913376825484798, "learning_rate": 8.571327643437261e-06, "loss": 2.1879935264587402, "step": 609 }, { "epoch": 0.9697933227344993, "grad_norm": 12.695120776563611, "learning_rate": 8.564847089411128e-06, "loss": 2.797454357147217, "step": 610 }, { "epoch": 0.9713831478537361, "grad_norm": 10.59170000095689, "learning_rate": 8.558354332425893e-06, "loss": 2.911411762237549, "step": 611 }, { "epoch": 0.972972972972973, "grad_norm": 11.516078755335915, "learning_rate": 8.551849394707158e-06, "loss": 3.4041268825531006, "step": 612 }, { "epoch": 0.9745627980922098, "grad_norm": 15.186668948081552, "learning_rate": 8.545332298522207e-06, "loss": 2.0779900550842285, "step": 613 }, { "epoch": 0.9761526232114467, "grad_norm": 10.329332615910896, "learning_rate": 8.538803066179955e-06, "loss": 2.844508647918701, "step": 614 }, { "epoch": 0.9777424483306836, "grad_norm": 7.134532897405484, "learning_rate": 8.53226172003086e-06, "loss": 3.226003646850586, "step": 615 }, { "epoch": 0.9793322734499205, "grad_norm": 7.92503508430859, "learning_rate": 8.525708282466839e-06, "loss": 2.8174638748168945, "step": 616 }, { "epoch": 0.9809220985691574, "grad_norm": 13.882957144040324, "learning_rate": 8.519142775921207e-06, "loss": 3.629255533218384, "step": 617 }, { "epoch": 0.9825119236883942, "grad_norm": 17.89036699856641, "learning_rate": 8.512565222868592e-06, "loss": 2.345249652862549, "step": 618 }, { "epoch": 0.9841017488076311, "grad_norm": 15.385106008821687, "learning_rate": 8.505975645824858e-06, "loss": 2.824721097946167, "step": 619 }, { "epoch": 0.985691573926868, "grad_norm": 8.342182862592603, "learning_rate": 8.499374067347026e-06, "loss": 2.341355562210083, "step": 620 }, { "epoch": 0.9872813990461049, "grad_norm": 9.78424313321291, "learning_rate": 8.492760510033203e-06, "loss": 2.6399459838867188, "step": 621 }, { "epoch": 0.9888712241653418, "grad_norm": 18.50621272333162, "learning_rate": 8.486134996522502e-06, "loss": 2.872849941253662, "step": 622 }, { "epoch": 0.9904610492845787, "grad_norm": 16.893372126409265, "learning_rate": 8.47949754949496e-06, "loss": 2.854398727416992, "step": 623 }, { "epoch": 0.9920508744038156, "grad_norm": 33.989665060746646, "learning_rate": 8.472848191671465e-06, "loss": 2.569676160812378, "step": 624 }, { "epoch": 0.9936406995230525, "grad_norm": 10.364248356846579, "learning_rate": 8.46618694581368e-06, "loss": 2.520169734954834, "step": 625 }, { "epoch": 0.9952305246422893, "grad_norm": 9.047743501472581, "learning_rate": 8.459513834723957e-06, "loss": 2.9824767112731934, "step": 626 }, { "epoch": 0.9968203497615262, "grad_norm": 15.714759225038321, "learning_rate": 8.452828881245273e-06, "loss": 2.8227317333221436, "step": 627 }, { "epoch": 0.9984101748807631, "grad_norm": 8.567173852792775, "learning_rate": 8.446132108261136e-06, "loss": 2.5381555557250977, "step": 628 }, { "epoch": 1.0, "grad_norm": 19.503916867140724, "learning_rate": 8.439423538695515e-06, "loss": 2.3427681922912598, "step": 629 }, { "epoch": 1.0015898251192368, "grad_norm": 10.202895702857752, "learning_rate": 8.432703195512761e-06, "loss": 2.6940202713012695, "step": 630 }, { "epoch": 1.0031796502384738, "grad_norm": 9.200573955983792, "learning_rate": 8.425971101717528e-06, "loss": 1.772001028060913, "step": 631 }, { "epoch": 1.0047694753577106, "grad_norm": 10.123855095623277, "learning_rate": 8.419227280354693e-06, "loss": 2.2012226581573486, "step": 632 }, { "epoch": 1.0063593004769475, "grad_norm": 13.960221710056468, "learning_rate": 8.412471754509282e-06, "loss": 1.5737675428390503, "step": 633 }, { "epoch": 1.0079491255961843, "grad_norm": 12.284994971813214, "learning_rate": 8.405704547306379e-06, "loss": 1.8023271560668945, "step": 634 }, { "epoch": 1.0095389507154213, "grad_norm": 7.548097649993748, "learning_rate": 8.398925681911064e-06, "loss": 1.4765472412109375, "step": 635 }, { "epoch": 1.011128775834658, "grad_norm": 16.46778155673934, "learning_rate": 8.392135181528318e-06, "loss": 1.8113789558410645, "step": 636 }, { "epoch": 1.012718600953895, "grad_norm": 14.205221993999034, "learning_rate": 8.385333069402952e-06, "loss": 2.0972166061401367, "step": 637 }, { "epoch": 1.0143084260731319, "grad_norm": 20.2955824590524, "learning_rate": 8.378519368819528e-06, "loss": 1.1075962781906128, "step": 638 }, { "epoch": 1.0158982511923689, "grad_norm": 13.78054218173786, "learning_rate": 8.371694103102272e-06, "loss": 2.185720205307007, "step": 639 }, { "epoch": 1.0174880763116056, "grad_norm": 11.86981515345048, "learning_rate": 8.364857295615006e-06, "loss": 1.1814801692962646, "step": 640 }, { "epoch": 1.0190779014308426, "grad_norm": 9.618387923402087, "learning_rate": 8.358008969761054e-06, "loss": 1.6825406551361084, "step": 641 }, { "epoch": 1.0206677265500794, "grad_norm": 9.906258760224635, "learning_rate": 8.351149148983173e-06, "loss": 1.643816351890564, "step": 642 }, { "epoch": 1.0222575516693164, "grad_norm": 19.68985006500769, "learning_rate": 8.344277856763465e-06, "loss": 1.3807225227355957, "step": 643 }, { "epoch": 1.0238473767885532, "grad_norm": 28.597373473820486, "learning_rate": 8.337395116623308e-06, "loss": 0.641170084476471, "step": 644 }, { "epoch": 1.0254372019077902, "grad_norm": 7.082787066403454, "learning_rate": 8.330500952123259e-06, "loss": 1.495134711265564, "step": 645 }, { "epoch": 1.027027027027027, "grad_norm": 15.416921380517037, "learning_rate": 8.323595386862985e-06, "loss": 2.411254644393921, "step": 646 }, { "epoch": 1.028616852146264, "grad_norm": 14.334933490816235, "learning_rate": 8.316678444481186e-06, "loss": 1.6603529453277588, "step": 647 }, { "epoch": 1.0302066772655007, "grad_norm": 7.775666873968019, "learning_rate": 8.309750148655496e-06, "loss": 1.343907356262207, "step": 648 }, { "epoch": 1.0317965023847377, "grad_norm": 9.43239062990287, "learning_rate": 8.302810523102422e-06, "loss": 1.6101237535476685, "step": 649 }, { "epoch": 1.0333863275039745, "grad_norm": 14.172496355509377, "learning_rate": 8.295859591577249e-06, "loss": 1.3219900131225586, "step": 650 }, { "epoch": 1.0349761526232115, "grad_norm": 15.249734205542557, "learning_rate": 8.288897377873967e-06, "loss": 1.5491715669631958, "step": 651 }, { "epoch": 1.0365659777424483, "grad_norm": 15.005853149685576, "learning_rate": 8.281923905825188e-06, "loss": 1.1344152688980103, "step": 652 }, { "epoch": 1.0381558028616853, "grad_norm": 11.299005381994808, "learning_rate": 8.274939199302058e-06, "loss": 0.9863616228103638, "step": 653 }, { "epoch": 1.039745627980922, "grad_norm": 19.033875793735564, "learning_rate": 8.267943282214182e-06, "loss": 1.5967910289764404, "step": 654 }, { "epoch": 1.041335453100159, "grad_norm": 12.368037602013091, "learning_rate": 8.260936178509543e-06, "loss": 1.2763534784317017, "step": 655 }, { "epoch": 1.0429252782193958, "grad_norm": 11.113205357262565, "learning_rate": 8.253917912174415e-06, "loss": 1.4309293031692505, "step": 656 }, { "epoch": 1.0445151033386328, "grad_norm": 19.420228317811137, "learning_rate": 8.246888507233281e-06, "loss": 1.7432823181152344, "step": 657 }, { "epoch": 1.0461049284578696, "grad_norm": 12.251793126929448, "learning_rate": 8.23984798774876e-06, "loss": 1.3506265878677368, "step": 658 }, { "epoch": 1.0476947535771066, "grad_norm": 14.720427025655871, "learning_rate": 8.232796377821509e-06, "loss": 1.5710445642471313, "step": 659 }, { "epoch": 1.0492845786963434, "grad_norm": 12.7661392760613, "learning_rate": 8.225733701590153e-06, "loss": 2.116056203842163, "step": 660 }, { "epoch": 1.0508744038155804, "grad_norm": 16.26962811578906, "learning_rate": 8.218659983231203e-06, "loss": 1.7300777435302734, "step": 661 }, { "epoch": 1.0524642289348172, "grad_norm": 9.013874583793882, "learning_rate": 8.211575246958959e-06, "loss": 1.5254652500152588, "step": 662 }, { "epoch": 1.054054054054054, "grad_norm": 12.516975753036407, "learning_rate": 8.204479517025445e-06, "loss": 1.6050835847854614, "step": 663 }, { "epoch": 1.055643879173291, "grad_norm": 8.674526511907334, "learning_rate": 8.197372817720314e-06, "loss": 1.4675190448760986, "step": 664 }, { "epoch": 1.0572337042925277, "grad_norm": 14.429611267241105, "learning_rate": 8.190255173370768e-06, "loss": 1.2936460971832275, "step": 665 }, { "epoch": 1.0588235294117647, "grad_norm": 10.802671282364987, "learning_rate": 8.183126608341483e-06, "loss": 1.7229145765304565, "step": 666 }, { "epoch": 1.0604133545310015, "grad_norm": 13.845554587035924, "learning_rate": 8.175987147034505e-06, "loss": 1.276991367340088, "step": 667 }, { "epoch": 1.0620031796502385, "grad_norm": 8.591410251790123, "learning_rate": 8.168836813889192e-06, "loss": 1.0878384113311768, "step": 668 }, { "epoch": 1.0635930047694753, "grad_norm": 8.17770282098833, "learning_rate": 8.161675633382109e-06, "loss": 1.4470587968826294, "step": 669 }, { "epoch": 1.0651828298887123, "grad_norm": 7.708900221646167, "learning_rate": 8.154503630026955e-06, "loss": 2.3226277828216553, "step": 670 }, { "epoch": 1.066772655007949, "grad_norm": 12.815998930989513, "learning_rate": 8.14732082837448e-06, "loss": 1.5032761096954346, "step": 671 }, { "epoch": 1.068362480127186, "grad_norm": 13.292465683699978, "learning_rate": 8.140127253012398e-06, "loss": 1.2072701454162598, "step": 672 }, { "epoch": 1.0699523052464228, "grad_norm": 14.920491770207613, "learning_rate": 8.1329229285653e-06, "loss": 1.2873613834381104, "step": 673 }, { "epoch": 1.0715421303656598, "grad_norm": 12.48917920314591, "learning_rate": 8.125707879694572e-06, "loss": 1.3614212274551392, "step": 674 }, { "epoch": 1.0731319554848966, "grad_norm": 13.758213612800427, "learning_rate": 8.118482131098316e-06, "loss": 0.9290915727615356, "step": 675 }, { "epoch": 1.0747217806041336, "grad_norm": 9.095753898258069, "learning_rate": 8.111245707511253e-06, "loss": 2.0878541469573975, "step": 676 }, { "epoch": 1.0763116057233704, "grad_norm": 15.04706722861245, "learning_rate": 8.103998633704657e-06, "loss": 0.9775704145431519, "step": 677 }, { "epoch": 1.0779014308426074, "grad_norm": 11.071354494668453, "learning_rate": 8.096740934486247e-06, "loss": 2.289834499359131, "step": 678 }, { "epoch": 1.0794912559618441, "grad_norm": 9.251271380712895, "learning_rate": 8.089472634700123e-06, "loss": 1.649209976196289, "step": 679 }, { "epoch": 1.0810810810810811, "grad_norm": 24.42903087444763, "learning_rate": 8.082193759226669e-06, "loss": 1.4703314304351807, "step": 680 }, { "epoch": 1.082670906200318, "grad_norm": 45.72760949916053, "learning_rate": 8.074904332982469e-06, "loss": 1.6743850708007812, "step": 681 }, { "epoch": 1.084260731319555, "grad_norm": 16.323601623107166, "learning_rate": 8.067604380920228e-06, "loss": 1.056239128112793, "step": 682 }, { "epoch": 1.0858505564387917, "grad_norm": 11.65057660638623, "learning_rate": 8.060293928028681e-06, "loss": 1.9537746906280518, "step": 683 }, { "epoch": 1.0874403815580287, "grad_norm": 8.664990565760272, "learning_rate": 8.052972999332506e-06, "loss": 1.6714719533920288, "step": 684 }, { "epoch": 1.0890302066772655, "grad_norm": 12.294197165646066, "learning_rate": 8.045641619892243e-06, "loss": 2.0577895641326904, "step": 685 }, { "epoch": 1.0906200317965025, "grad_norm": 9.4942774330208, "learning_rate": 8.038299814804209e-06, "loss": 1.5982561111450195, "step": 686 }, { "epoch": 1.0922098569157392, "grad_norm": 8.929620325013477, "learning_rate": 8.030947609200404e-06, "loss": 1.3098976612091064, "step": 687 }, { "epoch": 1.0937996820349762, "grad_norm": 9.428379383957022, "learning_rate": 8.023585028248435e-06, "loss": 1.7451062202453613, "step": 688 }, { "epoch": 1.095389507154213, "grad_norm": 8.148617459246555, "learning_rate": 8.01621209715142e-06, "loss": 1.3702692985534668, "step": 689 }, { "epoch": 1.09697933227345, "grad_norm": 12.424080461489897, "learning_rate": 8.008828841147915e-06, "loss": 1.6049578189849854, "step": 690 }, { "epoch": 1.0985691573926868, "grad_norm": 11.336668812432293, "learning_rate": 8.001435285511815e-06, "loss": 1.5943506956100464, "step": 691 }, { "epoch": 1.1001589825119238, "grad_norm": 9.734477712731705, "learning_rate": 7.994031455552267e-06, "loss": 1.1714757680892944, "step": 692 }, { "epoch": 1.1017488076311606, "grad_norm": 8.456057076789037, "learning_rate": 7.986617376613599e-06, "loss": 1.6149002313613892, "step": 693 }, { "epoch": 1.1033386327503973, "grad_norm": 10.887262581227894, "learning_rate": 7.979193074075216e-06, "loss": 1.3291692733764648, "step": 694 }, { "epoch": 1.1049284578696343, "grad_norm": 16.529055036565882, "learning_rate": 7.971758573351517e-06, "loss": 1.4333473443984985, "step": 695 }, { "epoch": 1.1065182829888713, "grad_norm": 16.027442702620533, "learning_rate": 7.964313899891818e-06, "loss": 1.6329424381256104, "step": 696 }, { "epoch": 1.1081081081081081, "grad_norm": 11.167845097822756, "learning_rate": 7.956859079180255e-06, "loss": 1.4067692756652832, "step": 697 }, { "epoch": 1.109697933227345, "grad_norm": 20.0858437953387, "learning_rate": 7.949394136735696e-06, "loss": 1.185004472732544, "step": 698 }, { "epoch": 1.1112877583465819, "grad_norm": 10.101135491074523, "learning_rate": 7.941919098111662e-06, "loss": 1.6585707664489746, "step": 699 }, { "epoch": 1.1128775834658187, "grad_norm": 17.35505415719115, "learning_rate": 7.934433988896233e-06, "loss": 1.474552869796753, "step": 700 }, { "epoch": 1.1144674085850557, "grad_norm": 10.23106161583393, "learning_rate": 7.92693883471196e-06, "loss": 1.6253128051757812, "step": 701 }, { "epoch": 1.1160572337042924, "grad_norm": 16.105820321363836, "learning_rate": 7.91943366121578e-06, "loss": 1.8239731788635254, "step": 702 }, { "epoch": 1.1176470588235294, "grad_norm": 13.76306602324918, "learning_rate": 7.911918494098928e-06, "loss": 2.7172493934631348, "step": 703 }, { "epoch": 1.1192368839427662, "grad_norm": 11.332518828743096, "learning_rate": 7.904393359086854e-06, "loss": 1.7896854877471924, "step": 704 }, { "epoch": 1.1208267090620032, "grad_norm": 21.736127737439453, "learning_rate": 7.896858281939118e-06, "loss": 1.4515012502670288, "step": 705 }, { "epoch": 1.12241653418124, "grad_norm": 13.403604508681815, "learning_rate": 7.889313288449323e-06, "loss": 0.8405922651290894, "step": 706 }, { "epoch": 1.124006359300477, "grad_norm": 9.592906016715583, "learning_rate": 7.881758404445012e-06, "loss": 2.1267611980438232, "step": 707 }, { "epoch": 1.1255961844197138, "grad_norm": 10.32928485222235, "learning_rate": 7.874193655787586e-06, "loss": 2.1224472522735596, "step": 708 }, { "epoch": 1.1271860095389508, "grad_norm": 14.678514253644037, "learning_rate": 7.866619068372217e-06, "loss": 1.2487913370132446, "step": 709 }, { "epoch": 1.1287758346581875, "grad_norm": 7.59899885047442, "learning_rate": 7.859034668127749e-06, "loss": 1.7427008152008057, "step": 710 }, { "epoch": 1.1303656597774245, "grad_norm": 9.848850083387825, "learning_rate": 7.851440481016623e-06, "loss": 1.2126924991607666, "step": 711 }, { "epoch": 1.1319554848966613, "grad_norm": 15.671482915827829, "learning_rate": 7.843836533034784e-06, "loss": 1.1827189922332764, "step": 712 }, { "epoch": 1.1335453100158983, "grad_norm": 10.250019023250589, "learning_rate": 7.836222850211579e-06, "loss": 0.995161771774292, "step": 713 }, { "epoch": 1.135135135135135, "grad_norm": 14.836541179215907, "learning_rate": 7.828599458609691e-06, "loss": 1.2809135913848877, "step": 714 }, { "epoch": 1.136724960254372, "grad_norm": 7.69267269624143, "learning_rate": 7.82096638432503e-06, "loss": 1.3715252876281738, "step": 715 }, { "epoch": 1.1383147853736089, "grad_norm": 12.400395047939194, "learning_rate": 7.813323653486654e-06, "loss": 1.2230970859527588, "step": 716 }, { "epoch": 1.1399046104928459, "grad_norm": 10.468062369029404, "learning_rate": 7.805671292256671e-06, "loss": 1.3827756643295288, "step": 717 }, { "epoch": 1.1414944356120826, "grad_norm": 18.099237128900125, "learning_rate": 7.798009326830167e-06, "loss": 3.2046289443969727, "step": 718 }, { "epoch": 1.1430842607313196, "grad_norm": 16.498804467781206, "learning_rate": 7.790337783435093e-06, "loss": 1.0387102365493774, "step": 719 }, { "epoch": 1.1446740858505564, "grad_norm": 46.94419002015813, "learning_rate": 7.782656688332194e-06, "loss": 1.9753658771514893, "step": 720 }, { "epoch": 1.1462639109697934, "grad_norm": 9.385769778995229, "learning_rate": 7.774966067814906e-06, "loss": 1.6574186086654663, "step": 721 }, { "epoch": 1.1478537360890302, "grad_norm": 13.56562472812322, "learning_rate": 7.767265948209278e-06, "loss": 1.7107985019683838, "step": 722 }, { "epoch": 1.1494435612082672, "grad_norm": 12.854159815575816, "learning_rate": 7.75955635587387e-06, "loss": 1.8311526775360107, "step": 723 }, { "epoch": 1.151033386327504, "grad_norm": 11.318847288794403, "learning_rate": 7.751837317199673e-06, "loss": 2.2952828407287598, "step": 724 }, { "epoch": 1.1526232114467407, "grad_norm": 8.927949871396361, "learning_rate": 7.744108858610008e-06, "loss": 1.077453374862671, "step": 725 }, { "epoch": 1.1542130365659777, "grad_norm": 15.25654710884335, "learning_rate": 7.73637100656045e-06, "loss": 1.7947218418121338, "step": 726 }, { "epoch": 1.1558028616852147, "grad_norm": 14.775370524705814, "learning_rate": 7.728623787538722e-06, "loss": 1.9251363277435303, "step": 727 }, { "epoch": 1.1573926868044515, "grad_norm": 12.83647833543155, "learning_rate": 7.720867228064616e-06, "loss": 1.8598628044128418, "step": 728 }, { "epoch": 1.1589825119236883, "grad_norm": 10.830550994338182, "learning_rate": 7.713101354689897e-06, "loss": 1.4215333461761475, "step": 729 }, { "epoch": 1.1605723370429253, "grad_norm": 13.323716181839108, "learning_rate": 7.705326193998207e-06, "loss": 3.242117404937744, "step": 730 }, { "epoch": 1.1621621621621623, "grad_norm": 7.33997375680322, "learning_rate": 7.697541772604988e-06, "loss": 1.8210642337799072, "step": 731 }, { "epoch": 1.163751987281399, "grad_norm": 14.779318326563873, "learning_rate": 7.689748117157379e-06, "loss": 1.1805927753448486, "step": 732 }, { "epoch": 1.1653418124006358, "grad_norm": 11.600067060820573, "learning_rate": 7.681945254334126e-06, "loss": 1.0212841033935547, "step": 733 }, { "epoch": 1.1669316375198728, "grad_norm": 11.25816159523179, "learning_rate": 7.674133210845496e-06, "loss": 1.844172477722168, "step": 734 }, { "epoch": 1.1685214626391096, "grad_norm": 19.155279561674575, "learning_rate": 7.666312013433183e-06, "loss": 1.9130163192749023, "step": 735 }, { "epoch": 1.1701112877583466, "grad_norm": 19.710624892947475, "learning_rate": 7.658481688870218e-06, "loss": 1.312086582183838, "step": 736 }, { "epoch": 1.1717011128775834, "grad_norm": 11.4303087593856, "learning_rate": 7.65064226396087e-06, "loss": 1.8819104433059692, "step": 737 }, { "epoch": 1.1732909379968204, "grad_norm": 12.468213355805375, "learning_rate": 7.642793765540561e-06, "loss": 1.4843418598175049, "step": 738 }, { "epoch": 1.1748807631160572, "grad_norm": 11.022423957178553, "learning_rate": 7.634936220475777e-06, "loss": 1.4506335258483887, "step": 739 }, { "epoch": 1.1764705882352942, "grad_norm": 12.175893084008573, "learning_rate": 7.62706965566397e-06, "loss": 1.5481715202331543, "step": 740 }, { "epoch": 1.178060413354531, "grad_norm": 16.694869997171303, "learning_rate": 7.619194098033466e-06, "loss": 1.3884726762771606, "step": 741 }, { "epoch": 1.179650238473768, "grad_norm": 13.212581507544572, "learning_rate": 7.611309574543373e-06, "loss": 1.5078057050704956, "step": 742 }, { "epoch": 1.1812400635930047, "grad_norm": 27.58219967229084, "learning_rate": 7.603416112183497e-06, "loss": 3.193087100982666, "step": 743 }, { "epoch": 1.1828298887122417, "grad_norm": 41.878024638548794, "learning_rate": 7.595513737974238e-06, "loss": 1.6256263256072998, "step": 744 }, { "epoch": 1.1844197138314785, "grad_norm": 10.48182151817921, "learning_rate": 7.587602478966503e-06, "loss": 1.0705622434616089, "step": 745 }, { "epoch": 1.1860095389507155, "grad_norm": 13.769941816011189, "learning_rate": 7.579682362241613e-06, "loss": 2.1637659072875977, "step": 746 }, { "epoch": 1.1875993640699523, "grad_norm": 9.491111225073396, "learning_rate": 7.571753414911213e-06, "loss": 2.2312355041503906, "step": 747 }, { "epoch": 1.1891891891891893, "grad_norm": 8.897617093166733, "learning_rate": 7.563815664117173e-06, "loss": 2.0733022689819336, "step": 748 }, { "epoch": 1.190779014308426, "grad_norm": 13.523802090806276, "learning_rate": 7.555869137031497e-06, "loss": 1.4615492820739746, "step": 749 }, { "epoch": 1.192368839427663, "grad_norm": 13.737519147436364, "learning_rate": 7.547913860856239e-06, "loss": 1.8079819679260254, "step": 750 }, { "epoch": 1.1939586645468998, "grad_norm": 12.315997979642338, "learning_rate": 7.5399498628233925e-06, "loss": 1.159532070159912, "step": 751 }, { "epoch": 1.1955484896661368, "grad_norm": 8.999780532264547, "learning_rate": 7.531977170194813e-06, "loss": 0.9958317279815674, "step": 752 }, { "epoch": 1.1971383147853736, "grad_norm": 10.867977410197962, "learning_rate": 7.52399581026212e-06, "loss": 1.3158916234970093, "step": 753 }, { "epoch": 1.1987281399046106, "grad_norm": 13.816531992054164, "learning_rate": 7.5160058103465985e-06, "loss": 2.40507173538208, "step": 754 }, { "epoch": 1.2003179650238474, "grad_norm": 21.31941893078477, "learning_rate": 7.508007197799111e-06, "loss": 1.0883036851882935, "step": 755 }, { "epoch": 1.2019077901430844, "grad_norm": 7.2796164125970515, "learning_rate": 7.500000000000001e-06, "loss": 1.3112430572509766, "step": 756 }, { "epoch": 1.2034976152623211, "grad_norm": 7.6642364598545605, "learning_rate": 7.491984244359003e-06, "loss": 1.5843225717544556, "step": 757 }, { "epoch": 1.2050874403815581, "grad_norm": 10.412610985057054, "learning_rate": 7.483959958315143e-06, "loss": 1.3042569160461426, "step": 758 }, { "epoch": 1.206677265500795, "grad_norm": 12.386068998781987, "learning_rate": 7.475927169336653e-06, "loss": 1.1159758567810059, "step": 759 }, { "epoch": 1.2082670906200317, "grad_norm": 10.560876113350165, "learning_rate": 7.467885904920864e-06, "loss": 1.8457821607589722, "step": 760 }, { "epoch": 1.2098569157392687, "grad_norm": 6.7816933066214, "learning_rate": 7.459836192594127e-06, "loss": 1.314563274383545, "step": 761 }, { "epoch": 1.2114467408585057, "grad_norm": 15.265650006770516, "learning_rate": 7.451778059911706e-06, "loss": 1.4867005348205566, "step": 762 }, { "epoch": 1.2130365659777425, "grad_norm": 10.657337325730579, "learning_rate": 7.4437115344576935e-06, "loss": 1.0135457515716553, "step": 763 }, { "epoch": 1.2146263910969792, "grad_norm": 14.7213218030471, "learning_rate": 7.4356366438449065e-06, "loss": 1.6390702724456787, "step": 764 }, { "epoch": 1.2162162162162162, "grad_norm": 11.865827724018164, "learning_rate": 7.427553415714801e-06, "loss": 1.6365562677383423, "step": 765 }, { "epoch": 1.217806041335453, "grad_norm": 16.70042228025528, "learning_rate": 7.419461877737373e-06, "loss": 1.411786437034607, "step": 766 }, { "epoch": 1.21939586645469, "grad_norm": 9.721633410028563, "learning_rate": 7.411362057611065e-06, "loss": 0.9351043105125427, "step": 767 }, { "epoch": 1.2209856915739268, "grad_norm": 13.790912646142422, "learning_rate": 7.403253983062665e-06, "loss": 0.7709986567497253, "step": 768 }, { "epoch": 1.2225755166931638, "grad_norm": 17.60584845779764, "learning_rate": 7.395137681847223e-06, "loss": 1.7028567790985107, "step": 769 }, { "epoch": 1.2241653418124006, "grad_norm": 12.669423900706361, "learning_rate": 7.387013181747949e-06, "loss": 1.0321797132492065, "step": 770 }, { "epoch": 1.2257551669316376, "grad_norm": 12.734948593506429, "learning_rate": 7.378880510576115e-06, "loss": 1.5205578804016113, "step": 771 }, { "epoch": 1.2273449920508743, "grad_norm": 16.313619320103047, "learning_rate": 7.370739696170971e-06, "loss": 2.0671231746673584, "step": 772 }, { "epoch": 1.2289348171701113, "grad_norm": 13.651887497636158, "learning_rate": 7.362590766399635e-06, "loss": 1.5689630508422852, "step": 773 }, { "epoch": 1.230524642289348, "grad_norm": 12.77935664125304, "learning_rate": 7.3544337491570075e-06, "loss": 1.2613396644592285, "step": 774 }, { "epoch": 1.232114467408585, "grad_norm": 6.431511096471851, "learning_rate": 7.346268672365675e-06, "loss": 1.390768051147461, "step": 775 }, { "epoch": 1.2337042925278219, "grad_norm": 11.04117437186309, "learning_rate": 7.338095563975813e-06, "loss": 1.3204916715621948, "step": 776 }, { "epoch": 1.2352941176470589, "grad_norm": 8.025556102160234, "learning_rate": 7.329914451965089e-06, "loss": 1.8049380779266357, "step": 777 }, { "epoch": 1.2368839427662957, "grad_norm": 12.664895874014816, "learning_rate": 7.321725364338566e-06, "loss": 2.279134511947632, "step": 778 }, { "epoch": 1.2384737678855327, "grad_norm": 10.40779875172629, "learning_rate": 7.313528329128613e-06, "loss": 1.6769804954528809, "step": 779 }, { "epoch": 1.2400635930047694, "grad_norm": 16.92840882244054, "learning_rate": 7.305323374394802e-06, "loss": 1.8052300214767456, "step": 780 }, { "epoch": 1.2416534181240064, "grad_norm": 8.142736484313943, "learning_rate": 7.297110528223817e-06, "loss": 1.4213504791259766, "step": 781 }, { "epoch": 1.2432432432432432, "grad_norm": 10.747768055674868, "learning_rate": 7.28888981872935e-06, "loss": 1.988961100578308, "step": 782 }, { "epoch": 1.2448330683624802, "grad_norm": 18.15875563346072, "learning_rate": 7.280661274052014e-06, "loss": 1.5727958679199219, "step": 783 }, { "epoch": 1.246422893481717, "grad_norm": 8.433529741351467, "learning_rate": 7.272424922359246e-06, "loss": 1.3556486368179321, "step": 784 }, { "epoch": 1.248012718600954, "grad_norm": 14.194850048972247, "learning_rate": 7.264180791845201e-06, "loss": 1.7819693088531494, "step": 785 }, { "epoch": 1.2496025437201908, "grad_norm": 8.397921807638253, "learning_rate": 7.255928910730669e-06, "loss": 1.6179646253585815, "step": 786 }, { "epoch": 1.2511923688394275, "grad_norm": 14.386921536746254, "learning_rate": 7.247669307262964e-06, "loss": 1.554338812828064, "step": 787 }, { "epoch": 1.2527821939586645, "grad_norm": 12.800105634391487, "learning_rate": 7.239402009715838e-06, "loss": 1.8695118427276611, "step": 788 }, { "epoch": 1.2543720190779015, "grad_norm": 15.512989476889842, "learning_rate": 7.231127046389384e-06, "loss": 1.8640936613082886, "step": 789 }, { "epoch": 1.2559618441971383, "grad_norm": 14.578922134309607, "learning_rate": 7.222844445609931e-06, "loss": 1.0992615222930908, "step": 790 }, { "epoch": 1.257551669316375, "grad_norm": 8.724455013991323, "learning_rate": 7.214554235729955e-06, "loss": 1.2543790340423584, "step": 791 }, { "epoch": 1.259141494435612, "grad_norm": 7.692029468393919, "learning_rate": 7.206256445127977e-06, "loss": 1.2529809474945068, "step": 792 }, { "epoch": 1.260731319554849, "grad_norm": 14.13800376092366, "learning_rate": 7.19795110220847e-06, "loss": 1.3441779613494873, "step": 793 }, { "epoch": 1.2623211446740858, "grad_norm": 9.49380607849554, "learning_rate": 7.18963823540176e-06, "loss": 1.3629730939865112, "step": 794 }, { "epoch": 1.2639109697933226, "grad_norm": 12.44600649205267, "learning_rate": 7.1813178731639255e-06, "loss": 1.345304012298584, "step": 795 }, { "epoch": 1.2655007949125596, "grad_norm": 10.304580436383407, "learning_rate": 7.172990043976703e-06, "loss": 1.1120240688323975, "step": 796 }, { "epoch": 1.2670906200317966, "grad_norm": 13.784064682241143, "learning_rate": 7.1646547763473916e-06, "loss": 1.0464750528335571, "step": 797 }, { "epoch": 1.2686804451510334, "grad_norm": 16.03338491525074, "learning_rate": 7.156312098808753e-06, "loss": 1.034813404083252, "step": 798 }, { "epoch": 1.2702702702702702, "grad_norm": 16.52768239877676, "learning_rate": 7.147962039918913e-06, "loss": 1.8651677370071411, "step": 799 }, { "epoch": 1.2718600953895072, "grad_norm": 7.8097708732231625, "learning_rate": 7.139604628261265e-06, "loss": 1.138526201248169, "step": 800 }, { "epoch": 1.2734499205087442, "grad_norm": 9.153076753451778, "learning_rate": 7.131239892444371e-06, "loss": 1.4918463230133057, "step": 801 }, { "epoch": 1.275039745627981, "grad_norm": 10.804677171421506, "learning_rate": 7.122867861101868e-06, "loss": 1.0829172134399414, "step": 802 }, { "epoch": 1.2766295707472177, "grad_norm": 10.229751180792467, "learning_rate": 7.114488562892363e-06, "loss": 1.1312910318374634, "step": 803 }, { "epoch": 1.2782193958664547, "grad_norm": 9.417078438717807, "learning_rate": 7.106102026499339e-06, "loss": 1.0001945495605469, "step": 804 }, { "epoch": 1.2798092209856915, "grad_norm": 17.074149708506145, "learning_rate": 7.097708280631057e-06, "loss": 1.3354151248931885, "step": 805 }, { "epoch": 1.2813990461049285, "grad_norm": 12.118419172188265, "learning_rate": 7.089307354020459e-06, "loss": 1.0924017429351807, "step": 806 }, { "epoch": 1.2829888712241653, "grad_norm": 11.901441327586127, "learning_rate": 7.080899275425063e-06, "loss": 1.7671406269073486, "step": 807 }, { "epoch": 1.2845786963434023, "grad_norm": 22.741979314506512, "learning_rate": 7.072484073626872e-06, "loss": 1.15092134475708, "step": 808 }, { "epoch": 1.286168521462639, "grad_norm": 12.17848493630658, "learning_rate": 7.064061777432276e-06, "loss": 1.0537457466125488, "step": 809 }, { "epoch": 1.287758346581876, "grad_norm": 14.484284581597736, "learning_rate": 7.055632415671942e-06, "loss": 2.0027740001678467, "step": 810 }, { "epoch": 1.2893481717011128, "grad_norm": 12.812643289560867, "learning_rate": 7.047196017200731e-06, "loss": 1.8905892372131348, "step": 811 }, { "epoch": 1.2909379968203498, "grad_norm": 10.311201489126852, "learning_rate": 7.038752610897589e-06, "loss": 2.1269192695617676, "step": 812 }, { "epoch": 1.2925278219395866, "grad_norm": 17.571625875332888, "learning_rate": 7.03030222566545e-06, "loss": 1.0327365398406982, "step": 813 }, { "epoch": 1.2941176470588236, "grad_norm": 7.576322051647097, "learning_rate": 7.021844890431136e-06, "loss": 1.481746792793274, "step": 814 }, { "epoch": 1.2957074721780604, "grad_norm": 9.000745253510628, "learning_rate": 7.013380634145264e-06, "loss": 2.112708330154419, "step": 815 }, { "epoch": 1.2972972972972974, "grad_norm": 8.205491038831578, "learning_rate": 7.004909485782141e-06, "loss": 1.0585367679595947, "step": 816 }, { "epoch": 1.2988871224165341, "grad_norm": 15.117454396605899, "learning_rate": 6.996431474339666e-06, "loss": 2.16007924079895, "step": 817 }, { "epoch": 1.3004769475357711, "grad_norm": 8.864090810986143, "learning_rate": 6.987946628839232e-06, "loss": 1.8724396228790283, "step": 818 }, { "epoch": 1.302066772655008, "grad_norm": 8.531503498684259, "learning_rate": 6.979454978325625e-06, "loss": 1.6091532707214355, "step": 819 }, { "epoch": 1.303656597774245, "grad_norm": 7.992191856625111, "learning_rate": 6.970956551866925e-06, "loss": 1.0313612222671509, "step": 820 }, { "epoch": 1.3052464228934817, "grad_norm": 6.984013339692948, "learning_rate": 6.962451378554411e-06, "loss": 0.973236083984375, "step": 821 }, { "epoch": 1.3068362480127185, "grad_norm": 8.439028091626657, "learning_rate": 6.9539394875024525e-06, "loss": 1.5291297435760498, "step": 822 }, { "epoch": 1.3084260731319555, "grad_norm": 11.687167308435836, "learning_rate": 6.945420907848415e-06, "loss": 1.6111561059951782, "step": 823 }, { "epoch": 1.3100158982511925, "grad_norm": 17.086783862630924, "learning_rate": 6.936895668752564e-06, "loss": 1.6212303638458252, "step": 824 }, { "epoch": 1.3116057233704292, "grad_norm": 16.37346021342094, "learning_rate": 6.9283637993979565e-06, "loss": 1.2969826459884644, "step": 825 }, { "epoch": 1.313195548489666, "grad_norm": 10.311683648355972, "learning_rate": 6.9198253289903515e-06, "loss": 1.4965565204620361, "step": 826 }, { "epoch": 1.314785373608903, "grad_norm": 15.199989391439724, "learning_rate": 6.911280286758097e-06, "loss": 1.3168373107910156, "step": 827 }, { "epoch": 1.31637519872814, "grad_norm": 16.279163654631084, "learning_rate": 6.902728701952045e-06, "loss": 2.215139389038086, "step": 828 }, { "epoch": 1.3179650238473768, "grad_norm": 11.490345488333293, "learning_rate": 6.894170603845436e-06, "loss": 1.1593304872512817, "step": 829 }, { "epoch": 1.3195548489666136, "grad_norm": 10.651962301231425, "learning_rate": 6.885606021733814e-06, "loss": 1.9360640048980713, "step": 830 }, { "epoch": 1.3211446740858506, "grad_norm": 24.69552648567043, "learning_rate": 6.877034984934912e-06, "loss": 1.5336499214172363, "step": 831 }, { "epoch": 1.3227344992050876, "grad_norm": 10.99837687708425, "learning_rate": 6.868457522788561e-06, "loss": 1.7535721063613892, "step": 832 }, { "epoch": 1.3243243243243243, "grad_norm": 9.662947464026942, "learning_rate": 6.859873664656588e-06, "loss": 1.508925437927246, "step": 833 }, { "epoch": 1.3259141494435611, "grad_norm": 17.79955371157225, "learning_rate": 6.851283439922714e-06, "loss": 1.1767382621765137, "step": 834 }, { "epoch": 1.3275039745627981, "grad_norm": 18.972019834967206, "learning_rate": 6.842686877992453e-06, "loss": 2.279311418533325, "step": 835 }, { "epoch": 1.329093799682035, "grad_norm": 9.823083495354286, "learning_rate": 6.834084008293009e-06, "loss": 1.5969994068145752, "step": 836 }, { "epoch": 1.330683624801272, "grad_norm": 17.311712777539558, "learning_rate": 6.825474860273186e-06, "loss": 1.2723362445831299, "step": 837 }, { "epoch": 1.3322734499205087, "grad_norm": 18.227029489838436, "learning_rate": 6.816859463403271e-06, "loss": 1.6115031242370605, "step": 838 }, { "epoch": 1.3338632750397457, "grad_norm": 11.055264990980145, "learning_rate": 6.808237847174948e-06, "loss": 1.4109325408935547, "step": 839 }, { "epoch": 1.3354531001589824, "grad_norm": 11.959325782123942, "learning_rate": 6.799610041101188e-06, "loss": 1.4117895364761353, "step": 840 }, { "epoch": 1.3370429252782194, "grad_norm": 9.938907954624465, "learning_rate": 6.790976074716151e-06, "loss": 1.3602039813995361, "step": 841 }, { "epoch": 1.3386327503974562, "grad_norm": 13.427330990539836, "learning_rate": 6.782335977575084e-06, "loss": 1.29445219039917, "step": 842 }, { "epoch": 1.3402225755166932, "grad_norm": 10.882630644745714, "learning_rate": 6.773689779254222e-06, "loss": 3.1862294673919678, "step": 843 }, { "epoch": 1.34181240063593, "grad_norm": 9.840468197614767, "learning_rate": 6.765037509350685e-06, "loss": 1.4964901208877563, "step": 844 }, { "epoch": 1.343402225755167, "grad_norm": 8.65407373608341, "learning_rate": 6.756379197482374e-06, "loss": 1.8972535133361816, "step": 845 }, { "epoch": 1.3449920508744038, "grad_norm": 10.226098512099883, "learning_rate": 6.747714873287876e-06, "loss": 1.2278270721435547, "step": 846 }, { "epoch": 1.3465818759936408, "grad_norm": 14.157256838775108, "learning_rate": 6.7390445664263586e-06, "loss": 1.52341628074646, "step": 847 }, { "epoch": 1.3481717011128775, "grad_norm": 16.97191198333951, "learning_rate": 6.730368306577464e-06, "loss": 1.5410349369049072, "step": 848 }, { "epoch": 1.3497615262321145, "grad_norm": 14.05811570041505, "learning_rate": 6.721686123441221e-06, "loss": 1.2722220420837402, "step": 849 }, { "epoch": 1.3513513513513513, "grad_norm": 14.11019835180354, "learning_rate": 6.7129980467379265e-06, "loss": 2.0446019172668457, "step": 850 }, { "epoch": 1.3529411764705883, "grad_norm": 10.718116025458626, "learning_rate": 6.704304106208056e-06, "loss": 1.768629789352417, "step": 851 }, { "epoch": 1.354531001589825, "grad_norm": 14.34390925267869, "learning_rate": 6.695604331612158e-06, "loss": 1.5219838619232178, "step": 852 }, { "epoch": 1.3561208267090619, "grad_norm": 15.353615186067385, "learning_rate": 6.686898752730751e-06, "loss": 2.1381354331970215, "step": 853 }, { "epoch": 1.3577106518282989, "grad_norm": 8.976124092423499, "learning_rate": 6.678187399364219e-06, "loss": 1.3002848625183105, "step": 854 }, { "epoch": 1.3593004769475359, "grad_norm": 9.815958640374769, "learning_rate": 6.669470301332718e-06, "loss": 1.3151880502700806, "step": 855 }, { "epoch": 1.3608903020667726, "grad_norm": 11.547257227718601, "learning_rate": 6.660747488476066e-06, "loss": 1.8249077796936035, "step": 856 }, { "epoch": 1.3624801271860094, "grad_norm": 9.516915628583785, "learning_rate": 6.652018990653646e-06, "loss": 1.3392479419708252, "step": 857 }, { "epoch": 1.3640699523052464, "grad_norm": 17.36011863152519, "learning_rate": 6.643284837744298e-06, "loss": 2.1219942569732666, "step": 858 }, { "epoch": 1.3656597774244834, "grad_norm": 15.704349365528364, "learning_rate": 6.6345450596462224e-06, "loss": 1.3290646076202393, "step": 859 }, { "epoch": 1.3672496025437202, "grad_norm": 18.10725760531691, "learning_rate": 6.625799686276876e-06, "loss": 1.8200846910476685, "step": 860 }, { "epoch": 1.368839427662957, "grad_norm": 12.506844604116118, "learning_rate": 6.617048747572865e-06, "loss": 2.4371092319488525, "step": 861 }, { "epoch": 1.370429252782194, "grad_norm": 7.80995257903257, "learning_rate": 6.608292273489851e-06, "loss": 2.0654964447021484, "step": 862 }, { "epoch": 1.372019077901431, "grad_norm": 12.966837058086185, "learning_rate": 6.599530294002443e-06, "loss": 1.5136079788208008, "step": 863 }, { "epoch": 1.3736089030206677, "grad_norm": 13.843991854836343, "learning_rate": 6.5907628391040945e-06, "loss": 1.7351160049438477, "step": 864 }, { "epoch": 1.3751987281399045, "grad_norm": 9.5849956241888, "learning_rate": 6.581989938807001e-06, "loss": 1.0918192863464355, "step": 865 }, { "epoch": 1.3767885532591415, "grad_norm": 13.699873383622664, "learning_rate": 6.573211623142002e-06, "loss": 1.272527813911438, "step": 866 }, { "epoch": 1.3783783783783785, "grad_norm": 20.5345031546365, "learning_rate": 6.564427922158472e-06, "loss": 1.753305196762085, "step": 867 }, { "epoch": 1.3799682034976153, "grad_norm": 16.200604524764035, "learning_rate": 6.555638865924221e-06, "loss": 1.4228838682174683, "step": 868 }, { "epoch": 1.381558028616852, "grad_norm": 11.07654229906319, "learning_rate": 6.546844484525389e-06, "loss": 1.5971556901931763, "step": 869 }, { "epoch": 1.383147853736089, "grad_norm": 8.07355505884821, "learning_rate": 6.538044808066346e-06, "loss": 1.8155808448791504, "step": 870 }, { "epoch": 1.3847376788553258, "grad_norm": 16.04438512619071, "learning_rate": 6.529239866669592e-06, "loss": 1.3249969482421875, "step": 871 }, { "epoch": 1.3863275039745628, "grad_norm": 6.72430675838066, "learning_rate": 6.5204296904756405e-06, "loss": 1.5359678268432617, "step": 872 }, { "epoch": 1.3879173290937996, "grad_norm": 19.07489525371284, "learning_rate": 6.511614309642933e-06, "loss": 0.7233240008354187, "step": 873 }, { "epoch": 1.3895071542130366, "grad_norm": 12.401419042352451, "learning_rate": 6.502793754347721e-06, "loss": 1.3856028318405151, "step": 874 }, { "epoch": 1.3910969793322734, "grad_norm": 14.058889831537517, "learning_rate": 6.493968054783973e-06, "loss": 1.1357369422912598, "step": 875 }, { "epoch": 1.3926868044515104, "grad_norm": 11.845628341686973, "learning_rate": 6.485137241163266e-06, "loss": 1.6570309400558472, "step": 876 }, { "epoch": 1.3942766295707472, "grad_norm": 8.769841987385185, "learning_rate": 6.476301343714682e-06, "loss": 1.8941020965576172, "step": 877 }, { "epoch": 1.3958664546899842, "grad_norm": 11.197728580579021, "learning_rate": 6.467460392684706e-06, "loss": 1.2110662460327148, "step": 878 }, { "epoch": 1.397456279809221, "grad_norm": 22.348478293445435, "learning_rate": 6.4586144183371215e-06, "loss": 1.6229068040847778, "step": 879 }, { "epoch": 1.399046104928458, "grad_norm": 17.601335745522142, "learning_rate": 6.449763450952912e-06, "loss": 1.4073424339294434, "step": 880 }, { "epoch": 1.4006359300476947, "grad_norm": 9.69254485007978, "learning_rate": 6.4409075208301454e-06, "loss": 2.3579235076904297, "step": 881 }, { "epoch": 1.4022257551669317, "grad_norm": 63.767907422838185, "learning_rate": 6.432046658283882e-06, "loss": 1.7618337869644165, "step": 882 }, { "epoch": 1.4038155802861685, "grad_norm": 12.355457680018553, "learning_rate": 6.423180893646068e-06, "loss": 1.5357401371002197, "step": 883 }, { "epoch": 1.4054054054054055, "grad_norm": 12.449348927552991, "learning_rate": 6.41431025726543e-06, "loss": 1.253293514251709, "step": 884 }, { "epoch": 1.4069952305246423, "grad_norm": 10.283883999418105, "learning_rate": 6.405434779507363e-06, "loss": 0.9170820713043213, "step": 885 }, { "epoch": 1.4085850556438793, "grad_norm": 8.815585806232317, "learning_rate": 6.396554490753848e-06, "loss": 1.651247262954712, "step": 886 }, { "epoch": 1.410174880763116, "grad_norm": 12.925043436380768, "learning_rate": 6.387669421403324e-06, "loss": 1.3426076173782349, "step": 887 }, { "epoch": 1.4117647058823528, "grad_norm": 4.816050020353849, "learning_rate": 6.378779601870598e-06, "loss": 0.7309417724609375, "step": 888 }, { "epoch": 1.4133545310015898, "grad_norm": 18.174019009330134, "learning_rate": 6.369885062586741e-06, "loss": 1.451963186264038, "step": 889 }, { "epoch": 1.4149443561208268, "grad_norm": 21.93877974703329, "learning_rate": 6.360985833998974e-06, "loss": 1.6305956840515137, "step": 890 }, { "epoch": 1.4165341812400636, "grad_norm": 9.31424518285767, "learning_rate": 6.352081946570577e-06, "loss": 1.859921932220459, "step": 891 }, { "epoch": 1.4181240063593004, "grad_norm": 10.563231176463914, "learning_rate": 6.343173430780769e-06, "loss": 1.586233377456665, "step": 892 }, { "epoch": 1.4197138314785374, "grad_norm": 11.50204395935882, "learning_rate": 6.334260317124623e-06, "loss": 1.5006359815597534, "step": 893 }, { "epoch": 1.4213036565977744, "grad_norm": 14.898950229103232, "learning_rate": 6.325342636112945e-06, "loss": 1.1938085556030273, "step": 894 }, { "epoch": 1.4228934817170111, "grad_norm": 9.523797786916452, "learning_rate": 6.316420418272176e-06, "loss": 1.42879056930542, "step": 895 }, { "epoch": 1.424483306836248, "grad_norm": 10.87010950961569, "learning_rate": 6.3074936941442865e-06, "loss": 1.358415126800537, "step": 896 }, { "epoch": 1.426073131955485, "grad_norm": 13.930262341030167, "learning_rate": 6.2985624942866764e-06, "loss": 1.1303586959838867, "step": 897 }, { "epoch": 1.427662957074722, "grad_norm": 18.54295613105915, "learning_rate": 6.289626849272062e-06, "loss": 1.4245703220367432, "step": 898 }, { "epoch": 1.4292527821939587, "grad_norm": 8.732229316644414, "learning_rate": 6.2806867896883795e-06, "loss": 1.403051495552063, "step": 899 }, { "epoch": 1.4308426073131955, "grad_norm": 11.823310719194478, "learning_rate": 6.271742346138676e-06, "loss": 1.0180282592773438, "step": 900 }, { "epoch": 1.4324324324324325, "grad_norm": 11.29613478139605, "learning_rate": 6.262793549241003e-06, "loss": 1.287471055984497, "step": 901 }, { "epoch": 1.4340222575516695, "grad_norm": 13.77247104303683, "learning_rate": 6.253840429628317e-06, "loss": 1.5134391784667969, "step": 902 }, { "epoch": 1.4356120826709062, "grad_norm": 14.298318991675389, "learning_rate": 6.244883017948371e-06, "loss": 1.1660302877426147, "step": 903 }, { "epoch": 1.437201907790143, "grad_norm": 14.375040695355453, "learning_rate": 6.2359213448636104e-06, "loss": 1.808586835861206, "step": 904 }, { "epoch": 1.43879173290938, "grad_norm": 8.861984330110152, "learning_rate": 6.226955441051067e-06, "loss": 1.7996587753295898, "step": 905 }, { "epoch": 1.4403815580286168, "grad_norm": 12.94910466867558, "learning_rate": 6.2179853372022555e-06, "loss": 1.6222466230392456, "step": 906 }, { "epoch": 1.4419713831478538, "grad_norm": 10.618344021283788, "learning_rate": 6.209011064023072e-06, "loss": 1.047587513923645, "step": 907 }, { "epoch": 1.4435612082670906, "grad_norm": 9.516494660750666, "learning_rate": 6.200032652233674e-06, "loss": 1.5758092403411865, "step": 908 }, { "epoch": 1.4451510333863276, "grad_norm": 16.19352518570402, "learning_rate": 6.191050132568397e-06, "loss": 1.4394742250442505, "step": 909 }, { "epoch": 1.4467408585055643, "grad_norm": 7.677253578283328, "learning_rate": 6.182063535775634e-06, "loss": 1.2608493566513062, "step": 910 }, { "epoch": 1.4483306836248013, "grad_norm": 13.912530870059598, "learning_rate": 6.173072892617737e-06, "loss": 1.3285424709320068, "step": 911 }, { "epoch": 1.449920508744038, "grad_norm": 11.724953848878124, "learning_rate": 6.164078233870902e-06, "loss": 1.33537757396698, "step": 912 }, { "epoch": 1.451510333863275, "grad_norm": 22.12907802469097, "learning_rate": 6.155079590325079e-06, "loss": 1.7496384382247925, "step": 913 }, { "epoch": 1.4531001589825119, "grad_norm": 8.76725712308349, "learning_rate": 6.1460769927838535e-06, "loss": 1.0802656412124634, "step": 914 }, { "epoch": 1.4546899841017489, "grad_norm": 11.521977294381971, "learning_rate": 6.137070472064351e-06, "loss": 1.8613876104354858, "step": 915 }, { "epoch": 1.4562798092209857, "grad_norm": 9.847645207715075, "learning_rate": 6.1280600589971225e-06, "loss": 1.8818397521972656, "step": 916 }, { "epoch": 1.4578696343402227, "grad_norm": 9.180918128743544, "learning_rate": 6.1190457844260434e-06, "loss": 1.3407704830169678, "step": 917 }, { "epoch": 1.4594594594594594, "grad_norm": 8.21057896116912, "learning_rate": 6.110027679208208e-06, "loss": 1.2275454998016357, "step": 918 }, { "epoch": 1.4610492845786962, "grad_norm": 11.421076884146746, "learning_rate": 6.1010057742138255e-06, "loss": 0.33396875858306885, "step": 919 }, { "epoch": 1.4626391096979332, "grad_norm": 16.782553590361438, "learning_rate": 6.091980100326109e-06, "loss": 1.9670312404632568, "step": 920 }, { "epoch": 1.4642289348171702, "grad_norm": 13.30887232068492, "learning_rate": 6.082950688441174e-06, "loss": 1.1424121856689453, "step": 921 }, { "epoch": 1.465818759936407, "grad_norm": 9.503687911554648, "learning_rate": 6.073917569467934e-06, "loss": 2.301384925842285, "step": 922 }, { "epoch": 1.4674085850556438, "grad_norm": 9.930986204824418, "learning_rate": 6.064880774327989e-06, "loss": 1.5081627368927002, "step": 923 }, { "epoch": 1.4689984101748808, "grad_norm": 14.51109800380038, "learning_rate": 6.055840333955526e-06, "loss": 1.5140700340270996, "step": 924 }, { "epoch": 1.4705882352941178, "grad_norm": 14.537439872109701, "learning_rate": 6.046796279297208e-06, "loss": 1.4806504249572754, "step": 925 }, { "epoch": 1.4721780604133545, "grad_norm": 6.594814975273433, "learning_rate": 6.037748641312071e-06, "loss": 1.6404941082000732, "step": 926 }, { "epoch": 1.4737678855325913, "grad_norm": 11.604362595785672, "learning_rate": 6.028697450971417e-06, "loss": 1.1991019248962402, "step": 927 }, { "epoch": 1.4753577106518283, "grad_norm": 10.249079970643205, "learning_rate": 6.0196427392587085e-06, "loss": 1.0598170757293701, "step": 928 }, { "epoch": 1.4769475357710653, "grad_norm": 23.586165274015066, "learning_rate": 6.0105845371694615e-06, "loss": 2.617990016937256, "step": 929 }, { "epoch": 1.478537360890302, "grad_norm": 10.652102340118171, "learning_rate": 6.001522875711142e-06, "loss": 1.792860746383667, "step": 930 }, { "epoch": 1.4801271860095389, "grad_norm": 11.329630653040569, "learning_rate": 5.992457785903054e-06, "loss": 1.4832801818847656, "step": 931 }, { "epoch": 1.4817170111287759, "grad_norm": 8.972410054989348, "learning_rate": 5.983389298776241e-06, "loss": 1.5356411933898926, "step": 932 }, { "epoch": 1.4833068362480128, "grad_norm": 10.268881109477642, "learning_rate": 5.974317445373374e-06, "loss": 1.5654367208480835, "step": 933 }, { "epoch": 1.4848966613672496, "grad_norm": 11.714234165487198, "learning_rate": 5.96524225674865e-06, "loss": 1.6659619808197021, "step": 934 }, { "epoch": 1.4864864864864864, "grad_norm": 10.228252789232146, "learning_rate": 5.956163763967678e-06, "loss": 1.9939287900924683, "step": 935 }, { "epoch": 1.4880763116057234, "grad_norm": 16.7890095794601, "learning_rate": 5.947081998107381e-06, "loss": 1.8214223384857178, "step": 936 }, { "epoch": 1.4896661367249602, "grad_norm": 7.800897781524228, "learning_rate": 5.937996990255886e-06, "loss": 1.4987486600875854, "step": 937 }, { "epoch": 1.4912559618441972, "grad_norm": 15.394073679043718, "learning_rate": 5.928908771512418e-06, "loss": 1.1124498844146729, "step": 938 }, { "epoch": 1.492845786963434, "grad_norm": 16.106188196353283, "learning_rate": 5.919817372987192e-06, "loss": 2.0639588832855225, "step": 939 }, { "epoch": 1.494435612082671, "grad_norm": 11.880407283343887, "learning_rate": 5.9107228258013085e-06, "loss": 2.061716079711914, "step": 940 }, { "epoch": 1.4960254372019077, "grad_norm": 8.204725650078993, "learning_rate": 5.901625161086645e-06, "loss": 2.1551291942596436, "step": 941 }, { "epoch": 1.4976152623211447, "grad_norm": 17.715945196929077, "learning_rate": 5.892524409985754e-06, "loss": 1.2744243144989014, "step": 942 }, { "epoch": 1.4992050874403815, "grad_norm": 11.613878065057113, "learning_rate": 5.883420603651749e-06, "loss": 1.4785696268081665, "step": 943 }, { "epoch": 1.5007949125596185, "grad_norm": 27.10249353739021, "learning_rate": 5.874313773248206e-06, "loss": 1.4471817016601562, "step": 944 }, { "epoch": 1.5023847376788553, "grad_norm": 21.66926685817701, "learning_rate": 5.86520394994905e-06, "loss": 1.6950613260269165, "step": 945 }, { "epoch": 1.503974562798092, "grad_norm": 13.085644815567496, "learning_rate": 5.856091164938451e-06, "loss": 1.689048171043396, "step": 946 }, { "epoch": 1.505564387917329, "grad_norm": 8.749648322881661, "learning_rate": 5.8469754494107215e-06, "loss": 1.3710697889328003, "step": 947 }, { "epoch": 1.507154213036566, "grad_norm": 10.92476604200737, "learning_rate": 5.837856834570197e-06, "loss": 1.0909825563430786, "step": 948 }, { "epoch": 1.5087440381558028, "grad_norm": 10.261657935970968, "learning_rate": 5.828735351631149e-06, "loss": 1.0412880182266235, "step": 949 }, { "epoch": 1.5103338632750396, "grad_norm": 8.37623419784984, "learning_rate": 5.819611031817657e-06, "loss": 1.4581108093261719, "step": 950 }, { "epoch": 1.5119236883942766, "grad_norm": 8.534526301543458, "learning_rate": 5.8104839063635164e-06, "loss": 1.3928742408752441, "step": 951 }, { "epoch": 1.5135135135135136, "grad_norm": 17.1404773700483, "learning_rate": 5.801354006512127e-06, "loss": 1.7017488479614258, "step": 952 }, { "epoch": 1.5151033386327504, "grad_norm": 10.78331640625184, "learning_rate": 5.792221363516386e-06, "loss": 1.185091495513916, "step": 953 }, { "epoch": 1.5166931637519872, "grad_norm": 13.293730796736146, "learning_rate": 5.7830860086385746e-06, "loss": 1.9326300621032715, "step": 954 }, { "epoch": 1.5182829888712241, "grad_norm": 8.801862772509093, "learning_rate": 5.773947973150265e-06, "loss": 1.367699384689331, "step": 955 }, { "epoch": 1.5198728139904611, "grad_norm": 9.794675722959532, "learning_rate": 5.764807288332202e-06, "loss": 1.971652626991272, "step": 956 }, { "epoch": 1.521462639109698, "grad_norm": 10.324089171093677, "learning_rate": 5.7556639854741995e-06, "loss": 1.718085765838623, "step": 957 }, { "epoch": 1.5230524642289347, "grad_norm": 14.973860462918022, "learning_rate": 5.746518095875033e-06, "loss": 1.8058040142059326, "step": 958 }, { "epoch": 1.5246422893481717, "grad_norm": 10.33878097613058, "learning_rate": 5.737369650842334e-06, "loss": 2.024052143096924, "step": 959 }, { "epoch": 1.5262321144674087, "grad_norm": 10.877213574316285, "learning_rate": 5.728218681692482e-06, "loss": 1.4764926433563232, "step": 960 }, { "epoch": 1.5278219395866455, "grad_norm": 12.69209007016564, "learning_rate": 5.719065219750493e-06, "loss": 1.2469747066497803, "step": 961 }, { "epoch": 1.5294117647058822, "grad_norm": 61.643149474300564, "learning_rate": 5.709909296349921e-06, "loss": 1.5892256498336792, "step": 962 }, { "epoch": 1.5310015898251192, "grad_norm": 10.179597580056537, "learning_rate": 5.700750942832744e-06, "loss": 1.0561248064041138, "step": 963 }, { "epoch": 1.5325914149443562, "grad_norm": 11.321189364453208, "learning_rate": 5.6915901905492586e-06, "loss": 1.670686960220337, "step": 964 }, { "epoch": 1.534181240063593, "grad_norm": 11.261606261134911, "learning_rate": 5.682427070857973e-06, "loss": 1.058276653289795, "step": 965 }, { "epoch": 1.5357710651828298, "grad_norm": 15.864794483406374, "learning_rate": 5.673261615125498e-06, "loss": 0.6892185807228088, "step": 966 }, { "epoch": 1.5373608903020668, "grad_norm": 18.230716361815034, "learning_rate": 5.664093854726442e-06, "loss": 1.2923883199691772, "step": 967 }, { "epoch": 1.5389507154213038, "grad_norm": 8.837189988255762, "learning_rate": 5.6549238210433035e-06, "loss": 1.2818342447280884, "step": 968 }, { "epoch": 1.5405405405405406, "grad_norm": 13.745290482460153, "learning_rate": 5.6457515454663595e-06, "loss": 0.8728968501091003, "step": 969 }, { "epoch": 1.5421303656597773, "grad_norm": 14.575907914126436, "learning_rate": 5.6365770593935665e-06, "loss": 1.9964404106140137, "step": 970 }, { "epoch": 1.5437201907790143, "grad_norm": 11.633894434963024, "learning_rate": 5.627400394230443e-06, "loss": 1.6522562503814697, "step": 971 }, { "epoch": 1.5453100158982513, "grad_norm": 10.625355051973981, "learning_rate": 5.618221581389971e-06, "loss": 1.1675052642822266, "step": 972 }, { "epoch": 1.5468998410174881, "grad_norm": 16.143556764927435, "learning_rate": 5.609040652292479e-06, "loss": 1.5812228918075562, "step": 973 }, { "epoch": 1.548489666136725, "grad_norm": 22.11039303511628, "learning_rate": 5.599857638365547e-06, "loss": 1.923478364944458, "step": 974 }, { "epoch": 1.550079491255962, "grad_norm": 11.631721410256578, "learning_rate": 5.590672571043883e-06, "loss": 2.511260986328125, "step": 975 }, { "epoch": 1.551669316375199, "grad_norm": 10.028949234619766, "learning_rate": 5.581485481769231e-06, "loss": 2.299067258834839, "step": 976 }, { "epoch": 1.5532591414944354, "grad_norm": 23.468699681971557, "learning_rate": 5.5722964019902535e-06, "loss": 1.86583411693573, "step": 977 }, { "epoch": 1.5548489666136724, "grad_norm": 12.304920816212974, "learning_rate": 5.56310536316243e-06, "loss": 2.1012837886810303, "step": 978 }, { "epoch": 1.5564387917329094, "grad_norm": 17.576061655061082, "learning_rate": 5.553912396747938e-06, "loss": 1.7390809059143066, "step": 979 }, { "epoch": 1.5580286168521462, "grad_norm": 9.199264804010845, "learning_rate": 5.544717534215562e-06, "loss": 1.872962474822998, "step": 980 }, { "epoch": 1.559618441971383, "grad_norm": 14.006346639764674, "learning_rate": 5.535520807040574e-06, "loss": 1.6516368389129639, "step": 981 }, { "epoch": 1.56120826709062, "grad_norm": 8.25412090051608, "learning_rate": 5.526322246704628e-06, "loss": 1.301893711090088, "step": 982 }, { "epoch": 1.562798092209857, "grad_norm": 7.93205855182012, "learning_rate": 5.517121884695652e-06, "loss": 1.6840949058532715, "step": 983 }, { "epoch": 1.5643879173290938, "grad_norm": 8.670962811174403, "learning_rate": 5.507919752507749e-06, "loss": 1.8640323877334595, "step": 984 }, { "epoch": 1.5659777424483305, "grad_norm": 8.520452058979373, "learning_rate": 5.498715881641069e-06, "loss": 1.0217180252075195, "step": 985 }, { "epoch": 1.5675675675675675, "grad_norm": 9.740115136069019, "learning_rate": 5.489510303601726e-06, "loss": 1.6914985179901123, "step": 986 }, { "epoch": 1.5691573926868045, "grad_norm": 11.47073775556394, "learning_rate": 5.480303049901669e-06, "loss": 1.4903960227966309, "step": 987 }, { "epoch": 1.5707472178060413, "grad_norm": 10.408355607354645, "learning_rate": 5.471094152058592e-06, "loss": 1.6876851320266724, "step": 988 }, { "epoch": 1.572337042925278, "grad_norm": 10.029839032638279, "learning_rate": 5.461883641595804e-06, "loss": 1.29941987991333, "step": 989 }, { "epoch": 1.573926868044515, "grad_norm": 20.084310299738938, "learning_rate": 5.4526715500421465e-06, "loss": 2.3986659049987793, "step": 990 }, { "epoch": 1.575516693163752, "grad_norm": 18.092918310438236, "learning_rate": 5.443457908931868e-06, "loss": 1.7800534963607788, "step": 991 }, { "epoch": 1.5771065182829889, "grad_norm": 13.684640461036262, "learning_rate": 5.434242749804523e-06, "loss": 1.9280859231948853, "step": 992 }, { "epoch": 1.5786963434022256, "grad_norm": 11.004874677450085, "learning_rate": 5.42502610420486e-06, "loss": 1.2366968393325806, "step": 993 }, { "epoch": 1.5802861685214626, "grad_norm": 7.500201196806951, "learning_rate": 5.415808003682717e-06, "loss": 1.7288457155227661, "step": 994 }, { "epoch": 1.5818759936406996, "grad_norm": 15.307405528491252, "learning_rate": 5.406588479792915e-06, "loss": 1.1540336608886719, "step": 995 }, { "epoch": 1.5834658187599364, "grad_norm": 8.924512960021882, "learning_rate": 5.397367564095142e-06, "loss": 1.3142873048782349, "step": 996 }, { "epoch": 1.5850556438791732, "grad_norm": 13.789877615094275, "learning_rate": 5.388145288153855e-06, "loss": 0.8072051405906677, "step": 997 }, { "epoch": 1.5866454689984102, "grad_norm": 11.004905111807771, "learning_rate": 5.378921683538166e-06, "loss": 1.4714614152908325, "step": 998 }, { "epoch": 1.5882352941176472, "grad_norm": 8.752774902984761, "learning_rate": 5.369696781821735e-06, "loss": 2.083068370819092, "step": 999 }, { "epoch": 1.589825119236884, "grad_norm": 8.877980237727733, "learning_rate": 5.360470614582661e-06, "loss": 1.6199175119400024, "step": 1000 }, { "epoch": 1.5914149443561207, "grad_norm": 7.5027659446752555, "learning_rate": 5.351243213403378e-06, "loss": 0.8739040493965149, "step": 1001 }, { "epoch": 1.5930047694753577, "grad_norm": 15.849688654140593, "learning_rate": 5.3420146098705404e-06, "loss": 1.8732268810272217, "step": 1002 }, { "epoch": 1.5945945945945947, "grad_norm": 13.424960671306968, "learning_rate": 5.33278483557492e-06, "loss": 1.1949975490570068, "step": 1003 }, { "epoch": 1.5961844197138315, "grad_norm": 8.314079314375906, "learning_rate": 5.323553922111299e-06, "loss": 1.6262646913528442, "step": 1004 }, { "epoch": 1.5977742448330683, "grad_norm": 7.52713288008192, "learning_rate": 5.314321901078355e-06, "loss": 2.4691824913024902, "step": 1005 }, { "epoch": 1.5993640699523053, "grad_norm": 11.37720619082473, "learning_rate": 5.305088804078559e-06, "loss": 1.672831416130066, "step": 1006 }, { "epoch": 1.6009538950715423, "grad_norm": 9.716827384821913, "learning_rate": 5.295854662718062e-06, "loss": 1.9149669408798218, "step": 1007 }, { "epoch": 1.602543720190779, "grad_norm": 16.708710476919038, "learning_rate": 5.286619508606595e-06, "loss": 1.4306385517120361, "step": 1008 }, { "epoch": 1.6041335453100158, "grad_norm": 9.407323555091757, "learning_rate": 5.277383373357353e-06, "loss": 1.8381001949310303, "step": 1009 }, { "epoch": 1.6057233704292528, "grad_norm": 13.934447524856429, "learning_rate": 5.268146288586893e-06, "loss": 1.8029513359069824, "step": 1010 }, { "epoch": 1.6073131955484896, "grad_norm": 9.50195517935946, "learning_rate": 5.258908285915014e-06, "loss": 1.353687047958374, "step": 1011 }, { "epoch": 1.6089030206677264, "grad_norm": 11.305832651115942, "learning_rate": 5.249669396964665e-06, "loss": 1.2117490768432617, "step": 1012 }, { "epoch": 1.6104928457869634, "grad_norm": 13.13493812642368, "learning_rate": 5.2404296533618285e-06, "loss": 1.570952296257019, "step": 1013 }, { "epoch": 1.6120826709062004, "grad_norm": 8.042406666850942, "learning_rate": 5.231189086735406e-06, "loss": 1.1835918426513672, "step": 1014 }, { "epoch": 1.6136724960254372, "grad_norm": 7.452052581285824, "learning_rate": 5.221947728717126e-06, "loss": 1.1483402252197266, "step": 1015 }, { "epoch": 1.615262321144674, "grad_norm": 10.17335158903229, "learning_rate": 5.212705610941417e-06, "loss": 1.547670602798462, "step": 1016 }, { "epoch": 1.616852146263911, "grad_norm": 17.268602211235677, "learning_rate": 5.203462765045313e-06, "loss": 2.911768913269043, "step": 1017 }, { "epoch": 1.618441971383148, "grad_norm": 10.827239036290603, "learning_rate": 5.1942192226683385e-06, "loss": 1.5376920700073242, "step": 1018 }, { "epoch": 1.6200317965023847, "grad_norm": 12.49070377010686, "learning_rate": 5.184975015452407e-06, "loss": 2.1733736991882324, "step": 1019 }, { "epoch": 1.6216216216216215, "grad_norm": 9.51294833642187, "learning_rate": 5.1757301750416996e-06, "loss": 1.2068171501159668, "step": 1020 }, { "epoch": 1.6232114467408585, "grad_norm": 17.13733775730942, "learning_rate": 5.166484733082572e-06, "loss": 2.067399501800537, "step": 1021 }, { "epoch": 1.6248012718600955, "grad_norm": 9.397305167537892, "learning_rate": 5.157238721223433e-06, "loss": 1.0969213247299194, "step": 1022 }, { "epoch": 1.6263910969793323, "grad_norm": 14.102433470211162, "learning_rate": 5.1479921711146495e-06, "loss": 2.003542184829712, "step": 1023 }, { "epoch": 1.627980922098569, "grad_norm": 11.795355928597715, "learning_rate": 5.138745114408427e-06, "loss": 1.537621021270752, "step": 1024 }, { "epoch": 1.629570747217806, "grad_norm": 8.8711281512968, "learning_rate": 5.1294975827587015e-06, "loss": 1.231778621673584, "step": 1025 }, { "epoch": 1.631160572337043, "grad_norm": 11.85504205931933, "learning_rate": 5.1202496078210415e-06, "loss": 1.3609113693237305, "step": 1026 }, { "epoch": 1.6327503974562798, "grad_norm": 11.946327249887682, "learning_rate": 5.111001221252528e-06, "loss": 1.6268502473831177, "step": 1027 }, { "epoch": 1.6343402225755166, "grad_norm": 12.232703013961357, "learning_rate": 5.101752454711657e-06, "loss": 1.505003571510315, "step": 1028 }, { "epoch": 1.6359300476947536, "grad_norm": 19.083352218371367, "learning_rate": 5.092503339858216e-06, "loss": 1.8680495023727417, "step": 1029 }, { "epoch": 1.6375198728139906, "grad_norm": 14.054082187442837, "learning_rate": 5.083253908353193e-06, "loss": 1.8762643337249756, "step": 1030 }, { "epoch": 1.6391096979332274, "grad_norm": 7.699927162964524, "learning_rate": 5.074004191858656e-06, "loss": 1.466231346130371, "step": 1031 }, { "epoch": 1.6406995230524641, "grad_norm": 12.776351055451174, "learning_rate": 5.06475422203765e-06, "loss": 2.1224594116210938, "step": 1032 }, { "epoch": 1.6422893481717011, "grad_norm": 9.167891892890259, "learning_rate": 5.055504030554088e-06, "loss": 0.92369544506073, "step": 1033 }, { "epoch": 1.6438791732909381, "grad_norm": 9.219248419523527, "learning_rate": 5.046253649072637e-06, "loss": 1.374680995941162, "step": 1034 }, { "epoch": 1.645468998410175, "grad_norm": 13.856228172472624, "learning_rate": 5.037003109258619e-06, "loss": 1.4022250175476074, "step": 1035 }, { "epoch": 1.6470588235294117, "grad_norm": 18.383371823664145, "learning_rate": 5.0277524427778986e-06, "loss": 1.4787659645080566, "step": 1036 }, { "epoch": 1.6486486486486487, "grad_norm": 16.679763723284253, "learning_rate": 5.018501681296772e-06, "loss": 1.5815346240997314, "step": 1037 }, { "epoch": 1.6502384737678857, "grad_norm": 14.783063826660744, "learning_rate": 5.00925085648186e-06, "loss": 1.5328896045684814, "step": 1038 }, { "epoch": 1.6518282988871225, "grad_norm": 17.505178205736698, "learning_rate": 5e-06, "loss": 1.2714117765426636, "step": 1039 }, { "epoch": 1.6534181240063592, "grad_norm": 9.665591959431389, "learning_rate": 4.990749143518141e-06, "loss": 1.1989299058914185, "step": 1040 }, { "epoch": 1.6550079491255962, "grad_norm": 8.381579498477992, "learning_rate": 4.9814983187032285e-06, "loss": 1.792160987854004, "step": 1041 }, { "epoch": 1.6565977742448332, "grad_norm": 12.647260516370203, "learning_rate": 4.972247557222102e-06, "loss": 1.248565435409546, "step": 1042 }, { "epoch": 1.6581875993640698, "grad_norm": 11.615346352985599, "learning_rate": 4.962996890741382e-06, "loss": 2.1052873134613037, "step": 1043 }, { "epoch": 1.6597774244833068, "grad_norm": 9.539901754318029, "learning_rate": 4.953746350927365e-06, "loss": 1.4773461818695068, "step": 1044 }, { "epoch": 1.6613672496025438, "grad_norm": 12.189800981021298, "learning_rate": 4.944495969445914e-06, "loss": 1.362640619277954, "step": 1045 }, { "epoch": 1.6629570747217806, "grad_norm": 13.405337529322672, "learning_rate": 4.9352457779623515e-06, "loss": 1.1369311809539795, "step": 1046 }, { "epoch": 1.6645468998410173, "grad_norm": 12.750578128848364, "learning_rate": 4.925995808141345e-06, "loss": 1.287116289138794, "step": 1047 }, { "epoch": 1.6661367249602543, "grad_norm": 12.715618535880669, "learning_rate": 4.916746091646808e-06, "loss": 1.0644608736038208, "step": 1048 }, { "epoch": 1.6677265500794913, "grad_norm": 10.084441280599995, "learning_rate": 4.907496660141784e-06, "loss": 1.4485750198364258, "step": 1049 }, { "epoch": 1.669316375198728, "grad_norm": 9.612770128377981, "learning_rate": 4.898247545288345e-06, "loss": 1.5343468189239502, "step": 1050 }, { "epoch": 1.6709062003179649, "grad_norm": 15.81650026520017, "learning_rate": 4.8889987787474716e-06, "loss": 1.8680295944213867, "step": 1051 }, { "epoch": 1.6724960254372019, "grad_norm": 9.857358251920665, "learning_rate": 4.879750392178959e-06, "loss": 1.6718649864196777, "step": 1052 }, { "epoch": 1.6740858505564389, "grad_norm": 8.410780391145309, "learning_rate": 4.870502417241301e-06, "loss": 1.4237810373306274, "step": 1053 }, { "epoch": 1.6756756756756757, "grad_norm": 12.84467315414432, "learning_rate": 4.8612548855915755e-06, "loss": 1.3827040195465088, "step": 1054 }, { "epoch": 1.6772655007949124, "grad_norm": 9.523992781457146, "learning_rate": 4.852007828885351e-06, "loss": 1.0847723484039307, "step": 1055 }, { "epoch": 1.6788553259141494, "grad_norm": 21.357651931004337, "learning_rate": 4.842761278776569e-06, "loss": 1.4171757698059082, "step": 1056 }, { "epoch": 1.6804451510333864, "grad_norm": 11.891847367476064, "learning_rate": 4.833515266917431e-06, "loss": 1.6752054691314697, "step": 1057 }, { "epoch": 1.6820349761526232, "grad_norm": 13.400296577285918, "learning_rate": 4.824269824958303e-06, "loss": 1.6439988613128662, "step": 1058 }, { "epoch": 1.68362480127186, "grad_norm": 12.5119030672635, "learning_rate": 4.815024984547595e-06, "loss": 1.913288950920105, "step": 1059 }, { "epoch": 1.685214626391097, "grad_norm": 20.74654892530919, "learning_rate": 4.805780777331662e-06, "loss": 1.6886498928070068, "step": 1060 }, { "epoch": 1.686804451510334, "grad_norm": 7.865464674765795, "learning_rate": 4.796537234954689e-06, "loss": 1.3677489757537842, "step": 1061 }, { "epoch": 1.6883942766295708, "grad_norm": 10.073894502078147, "learning_rate": 4.787294389058584e-06, "loss": 1.2702281475067139, "step": 1062 }, { "epoch": 1.6899841017488075, "grad_norm": 30.369206738847247, "learning_rate": 4.778052271282875e-06, "loss": 1.4017125368118286, "step": 1063 }, { "epoch": 1.6915739268680445, "grad_norm": 9.499711488415217, "learning_rate": 4.7688109132645945e-06, "loss": 1.7769944667816162, "step": 1064 }, { "epoch": 1.6931637519872815, "grad_norm": 9.145985663104037, "learning_rate": 4.759570346638174e-06, "loss": 1.2236988544464111, "step": 1065 }, { "epoch": 1.6947535771065183, "grad_norm": 10.142690351521992, "learning_rate": 4.750330603035336e-06, "loss": 1.8145155906677246, "step": 1066 }, { "epoch": 1.696343402225755, "grad_norm": 8.110942874236551, "learning_rate": 4.7410917140849875e-06, "loss": 1.3042653799057007, "step": 1067 }, { "epoch": 1.697933227344992, "grad_norm": 15.99942668926325, "learning_rate": 4.731853711413109e-06, "loss": 3.3661022186279297, "step": 1068 }, { "epoch": 1.699523052464229, "grad_norm": 17.349050742085467, "learning_rate": 4.722616626642648e-06, "loss": 1.630997896194458, "step": 1069 }, { "epoch": 1.7011128775834659, "grad_norm": 9.241213734802546, "learning_rate": 4.713380491393407e-06, "loss": 2.056382179260254, "step": 1070 }, { "epoch": 1.7027027027027026, "grad_norm": 7.777110027923069, "learning_rate": 4.704145337281939e-06, "loss": 1.5880179405212402, "step": 1071 }, { "epoch": 1.7042925278219396, "grad_norm": 10.501792077584147, "learning_rate": 4.694911195921443e-06, "loss": 1.3245116472244263, "step": 1072 }, { "epoch": 1.7058823529411766, "grad_norm": 14.581112130568533, "learning_rate": 4.685678098921646e-06, "loss": 1.5311322212219238, "step": 1073 }, { "epoch": 1.7074721780604134, "grad_norm": 23.662013399713306, "learning_rate": 4.676446077888702e-06, "loss": 1.4951614141464233, "step": 1074 }, { "epoch": 1.7090620031796502, "grad_norm": 12.80668956016228, "learning_rate": 4.66721516442508e-06, "loss": 1.113204002380371, "step": 1075 }, { "epoch": 1.7106518282988872, "grad_norm": 8.974898814582193, "learning_rate": 4.65798539012946e-06, "loss": 1.6649549007415771, "step": 1076 }, { "epoch": 1.712241653418124, "grad_norm": 16.18611041927037, "learning_rate": 4.648756786596623e-06, "loss": 1.1735410690307617, "step": 1077 }, { "epoch": 1.7138314785373607, "grad_norm": 13.935037443806461, "learning_rate": 4.6395293854173395e-06, "loss": 1.265052080154419, "step": 1078 }, { "epoch": 1.7154213036565977, "grad_norm": 7.812844399134349, "learning_rate": 4.630303218178268e-06, "loss": 1.7603724002838135, "step": 1079 }, { "epoch": 1.7170111287758347, "grad_norm": 21.589524221730095, "learning_rate": 4.6210783164618365e-06, "loss": 1.5719702243804932, "step": 1080 }, { "epoch": 1.7186009538950715, "grad_norm": 10.395595369165518, "learning_rate": 4.611854711846147e-06, "loss": 0.8241512775421143, "step": 1081 }, { "epoch": 1.7201907790143083, "grad_norm": 9.214004268952669, "learning_rate": 4.6026324359048605e-06, "loss": 0.8531097173690796, "step": 1082 }, { "epoch": 1.7217806041335453, "grad_norm": 28.760024359718773, "learning_rate": 4.593411520207089e-06, "loss": 1.7780548334121704, "step": 1083 }, { "epoch": 1.7233704292527823, "grad_norm": 12.82561855274378, "learning_rate": 4.584191996317285e-06, "loss": 1.3491740226745605, "step": 1084 }, { "epoch": 1.724960254372019, "grad_norm": 11.42159575316779, "learning_rate": 4.574973895795142e-06, "loss": 1.5338444709777832, "step": 1085 }, { "epoch": 1.7265500794912558, "grad_norm": 13.967603062524692, "learning_rate": 4.565757250195478e-06, "loss": 1.6991304159164429, "step": 1086 }, { "epoch": 1.7281399046104928, "grad_norm": 8.779947538783102, "learning_rate": 4.5565420910681334e-06, "loss": 1.460195779800415, "step": 1087 }, { "epoch": 1.7297297297297298, "grad_norm": 13.016026044250268, "learning_rate": 4.547328449957855e-06, "loss": 1.5150680541992188, "step": 1088 }, { "epoch": 1.7313195548489666, "grad_norm": 13.351283400681424, "learning_rate": 4.538116358404197e-06, "loss": 1.753143548965454, "step": 1089 }, { "epoch": 1.7329093799682034, "grad_norm": 19.367068157556663, "learning_rate": 4.528905847941411e-06, "loss": 2.2308993339538574, "step": 1090 }, { "epoch": 1.7344992050874404, "grad_norm": 44.34625747692627, "learning_rate": 4.5196969500983315e-06, "loss": 2.8182756900787354, "step": 1091 }, { "epoch": 1.7360890302066774, "grad_norm": 25.01461465314549, "learning_rate": 4.510489696398276e-06, "loss": 0.9819879531860352, "step": 1092 }, { "epoch": 1.7376788553259142, "grad_norm": 10.034790142202231, "learning_rate": 4.501284118358932e-06, "loss": 2.123917818069458, "step": 1093 }, { "epoch": 1.739268680445151, "grad_norm": 10.237222093871212, "learning_rate": 4.492080247492253e-06, "loss": 1.394527554512024, "step": 1094 }, { "epoch": 1.740858505564388, "grad_norm": 8.342748762012173, "learning_rate": 4.482878115304349e-06, "loss": 1.162081003189087, "step": 1095 }, { "epoch": 1.742448330683625, "grad_norm": 17.246089483390268, "learning_rate": 4.473677753295375e-06, "loss": 1.4420222043991089, "step": 1096 }, { "epoch": 1.7440381558028617, "grad_norm": 6.643472749032102, "learning_rate": 4.4644791929594275e-06, "loss": 1.4935743808746338, "step": 1097 }, { "epoch": 1.7456279809220985, "grad_norm": 14.660507382859036, "learning_rate": 4.455282465784439e-06, "loss": 1.5564453601837158, "step": 1098 }, { "epoch": 1.7472178060413355, "grad_norm": 12.368786222061885, "learning_rate": 4.446087603252063e-06, "loss": 1.4903934001922607, "step": 1099 }, { "epoch": 1.7488076311605725, "grad_norm": 11.650791064204592, "learning_rate": 4.436894636837572e-06, "loss": 1.6506072282791138, "step": 1100 }, { "epoch": 1.7503974562798092, "grad_norm": 8.847823276469398, "learning_rate": 4.427703598009746e-06, "loss": 2.0559582710266113, "step": 1101 }, { "epoch": 1.751987281399046, "grad_norm": 17.273679884242704, "learning_rate": 4.418514518230769e-06, "loss": 1.9226901531219482, "step": 1102 }, { "epoch": 1.753577106518283, "grad_norm": 11.410574357531535, "learning_rate": 4.4093274289561175e-06, "loss": 1.615866780281067, "step": 1103 }, { "epoch": 1.75516693163752, "grad_norm": 7.140395814782854, "learning_rate": 4.400142361634455e-06, "loss": 1.7190699577331543, "step": 1104 }, { "epoch": 1.7567567567567568, "grad_norm": 8.70500340975263, "learning_rate": 4.390959347707521e-06, "loss": 1.3968921899795532, "step": 1105 }, { "epoch": 1.7583465818759936, "grad_norm": 10.63964214517544, "learning_rate": 4.381778418610032e-06, "loss": 1.8509771823883057, "step": 1106 }, { "epoch": 1.7599364069952306, "grad_norm": 12.689878068855194, "learning_rate": 4.372599605769559e-06, "loss": 1.6396044492721558, "step": 1107 }, { "epoch": 1.7615262321144676, "grad_norm": 9.633114460073033, "learning_rate": 4.363422940606435e-06, "loss": 2.012916326522827, "step": 1108 }, { "epoch": 1.7631160572337043, "grad_norm": 10.88535400055693, "learning_rate": 4.354248454533642e-06, "loss": 1.6241707801818848, "step": 1109 }, { "epoch": 1.7647058823529411, "grad_norm": 14.496830646494981, "learning_rate": 4.3450761789567e-06, "loss": 1.930153250694275, "step": 1110 }, { "epoch": 1.7662957074721781, "grad_norm": 8.198111506627123, "learning_rate": 4.33590614527356e-06, "loss": 1.298691987991333, "step": 1111 }, { "epoch": 1.767885532591415, "grad_norm": 17.898130255991628, "learning_rate": 4.326738384874504e-06, "loss": 1.3378024101257324, "step": 1112 }, { "epoch": 1.7694753577106517, "grad_norm": 17.628047594457833, "learning_rate": 4.3175729291420274e-06, "loss": 1.0957921743392944, "step": 1113 }, { "epoch": 1.7710651828298887, "grad_norm": 19.940812720592632, "learning_rate": 4.308409809450742e-06, "loss": 1.656261682510376, "step": 1114 }, { "epoch": 1.7726550079491257, "grad_norm": 13.828440615939675, "learning_rate": 4.299249057167257e-06, "loss": 1.5342800617218018, "step": 1115 }, { "epoch": 1.7742448330683624, "grad_norm": 20.91069601345002, "learning_rate": 4.29009070365008e-06, "loss": 2.8556060791015625, "step": 1116 }, { "epoch": 1.7758346581875992, "grad_norm": 11.843670910836927, "learning_rate": 4.280934780249508e-06, "loss": 1.4000861644744873, "step": 1117 }, { "epoch": 1.7774244833068362, "grad_norm": 9.403771542463636, "learning_rate": 4.271781318307521e-06, "loss": 1.664405345916748, "step": 1118 }, { "epoch": 1.7790143084260732, "grad_norm": 9.385108394505194, "learning_rate": 4.262630349157668e-06, "loss": 1.1378430128097534, "step": 1119 }, { "epoch": 1.78060413354531, "grad_norm": 11.862856460689603, "learning_rate": 4.253481904124968e-06, "loss": 1.1843236684799194, "step": 1120 }, { "epoch": 1.7821939586645468, "grad_norm": 10.593527889540056, "learning_rate": 4.244336014525802e-06, "loss": 1.4108467102050781, "step": 1121 }, { "epoch": 1.7837837837837838, "grad_norm": 9.984790464911894, "learning_rate": 4.235192711667801e-06, "loss": 1.5492326021194458, "step": 1122 }, { "epoch": 1.7853736089030208, "grad_norm": 17.193473504259163, "learning_rate": 4.226052026849737e-06, "loss": 2.0661959648132324, "step": 1123 }, { "epoch": 1.7869634340222575, "grad_norm": 10.56957621143626, "learning_rate": 4.216913991361426e-06, "loss": 1.0835058689117432, "step": 1124 }, { "epoch": 1.7885532591414943, "grad_norm": 13.32367793440441, "learning_rate": 4.207778636483616e-06, "loss": 1.1431982517242432, "step": 1125 }, { "epoch": 1.7901430842607313, "grad_norm": 12.912193647240832, "learning_rate": 4.198645993487872e-06, "loss": 1.5615761280059814, "step": 1126 }, { "epoch": 1.7917329093799683, "grad_norm": 14.856664790888582, "learning_rate": 4.1895160936364835e-06, "loss": 1.387691617012024, "step": 1127 }, { "epoch": 1.793322734499205, "grad_norm": 18.856555512029352, "learning_rate": 4.180388968182344e-06, "loss": 2.3428382873535156, "step": 1128 }, { "epoch": 1.7949125596184419, "grad_norm": 13.097645343955634, "learning_rate": 4.171264648368852e-06, "loss": 1.2207417488098145, "step": 1129 }, { "epoch": 1.7965023847376789, "grad_norm": 13.416910681636399, "learning_rate": 4.1621431654298024e-06, "loss": 1.4143396615982056, "step": 1130 }, { "epoch": 1.7980922098569159, "grad_norm": 15.210472614344281, "learning_rate": 4.153024550589281e-06, "loss": 1.1632781028747559, "step": 1131 }, { "epoch": 1.7996820349761526, "grad_norm": 13.475608442079784, "learning_rate": 4.143908835061551e-06, "loss": 1.1491469144821167, "step": 1132 }, { "epoch": 1.8012718600953894, "grad_norm": 13.319515226159563, "learning_rate": 4.134796050050953e-06, "loss": 1.3703821897506714, "step": 1133 }, { "epoch": 1.8028616852146264, "grad_norm": 14.676186602780401, "learning_rate": 4.125686226751797e-06, "loss": 1.8206853866577148, "step": 1134 }, { "epoch": 1.8044515103338634, "grad_norm": 13.940910741736662, "learning_rate": 4.116579396348253e-06, "loss": 1.140876293182373, "step": 1135 }, { "epoch": 1.8060413354531002, "grad_norm": 11.407075681936295, "learning_rate": 4.107475590014249e-06, "loss": 1.3825416564941406, "step": 1136 }, { "epoch": 1.807631160572337, "grad_norm": 19.395287518899767, "learning_rate": 4.098374838913357e-06, "loss": 1.551064133644104, "step": 1137 }, { "epoch": 1.809220985691574, "grad_norm": 13.327024051149664, "learning_rate": 4.089277174198694e-06, "loss": 1.6357027292251587, "step": 1138 }, { "epoch": 1.810810810810811, "grad_norm": 9.964726159477475, "learning_rate": 4.080182627012809e-06, "loss": 1.3184521198272705, "step": 1139 }, { "epoch": 1.8124006359300477, "grad_norm": 12.739326586882429, "learning_rate": 4.0710912284875825e-06, "loss": 1.131117343902588, "step": 1140 }, { "epoch": 1.8139904610492845, "grad_norm": 10.310528998181276, "learning_rate": 4.062003009744115e-06, "loss": 0.9722883105278015, "step": 1141 }, { "epoch": 1.8155802861685215, "grad_norm": 9.048014501981426, "learning_rate": 4.0529180018926204e-06, "loss": 1.4159162044525146, "step": 1142 }, { "epoch": 1.8171701112877583, "grad_norm": 11.393083494341436, "learning_rate": 4.0438362360323235e-06, "loss": 1.8594584465026855, "step": 1143 }, { "epoch": 1.818759936406995, "grad_norm": 9.77361123634561, "learning_rate": 4.0347577432513515e-06, "loss": 1.2384434938430786, "step": 1144 }, { "epoch": 1.820349761526232, "grad_norm": 9.69850917062634, "learning_rate": 4.025682554626627e-06, "loss": 1.332160234451294, "step": 1145 }, { "epoch": 1.821939586645469, "grad_norm": 8.92969480348373, "learning_rate": 4.016610701223761e-06, "loss": 1.4852681159973145, "step": 1146 }, { "epoch": 1.8235294117647058, "grad_norm": 9.888731821630902, "learning_rate": 4.007542214096947e-06, "loss": 0.9297858476638794, "step": 1147 }, { "epoch": 1.8251192368839426, "grad_norm": 12.811337259058794, "learning_rate": 3.99847712428886e-06, "loss": 1.359275221824646, "step": 1148 }, { "epoch": 1.8267090620031796, "grad_norm": 19.484812837344027, "learning_rate": 3.98941546283054e-06, "loss": 1.4129400253295898, "step": 1149 }, { "epoch": 1.8282988871224166, "grad_norm": 9.3270830388869, "learning_rate": 3.980357260741293e-06, "loss": 2.279143810272217, "step": 1150 }, { "epoch": 1.8298887122416534, "grad_norm": 16.43903195846325, "learning_rate": 3.971302549028584e-06, "loss": 1.1392877101898193, "step": 1151 }, { "epoch": 1.8314785373608902, "grad_norm": 14.606589733785624, "learning_rate": 3.96225135868793e-06, "loss": 1.7062054872512817, "step": 1152 }, { "epoch": 1.8330683624801272, "grad_norm": 14.665917808717666, "learning_rate": 3.953203720702793e-06, "loss": 1.5431506633758545, "step": 1153 }, { "epoch": 1.8346581875993642, "grad_norm": 13.356989223513242, "learning_rate": 3.944159666044475e-06, "loss": 1.9477022886276245, "step": 1154 }, { "epoch": 1.836248012718601, "grad_norm": 8.341193828787407, "learning_rate": 3.935119225672011e-06, "loss": 1.216181755065918, "step": 1155 }, { "epoch": 1.8378378378378377, "grad_norm": 10.457325709133471, "learning_rate": 3.926082430532067e-06, "loss": 1.0427238941192627, "step": 1156 }, { "epoch": 1.8394276629570747, "grad_norm": 16.0775639146379, "learning_rate": 3.917049311558826e-06, "loss": 2.7715682983398438, "step": 1157 }, { "epoch": 1.8410174880763117, "grad_norm": 21.198142682060794, "learning_rate": 3.908019899673893e-06, "loss": 1.289351463317871, "step": 1158 }, { "epoch": 1.8426073131955485, "grad_norm": 10.329857163101414, "learning_rate": 3.898994225786178e-06, "loss": 2.824537754058838, "step": 1159 }, { "epoch": 1.8441971383147853, "grad_norm": 8.45326317662246, "learning_rate": 3.889972320791794e-06, "loss": 1.3787089586257935, "step": 1160 }, { "epoch": 1.8457869634340223, "grad_norm": 9.351953802142202, "learning_rate": 3.880954215573959e-06, "loss": 1.7395728826522827, "step": 1161 }, { "epoch": 1.8473767885532593, "grad_norm": 11.805906719129295, "learning_rate": 3.87193994100288e-06, "loss": 1.3916877508163452, "step": 1162 }, { "epoch": 1.848966613672496, "grad_norm": 11.13344464632568, "learning_rate": 3.8629295279356495e-06, "loss": 1.7816511392593384, "step": 1163 }, { "epoch": 1.8505564387917328, "grad_norm": 13.802686240477355, "learning_rate": 3.853923007216148e-06, "loss": 1.5507429838180542, "step": 1164 }, { "epoch": 1.8521462639109698, "grad_norm": 9.43063113054961, "learning_rate": 3.8449204096749235e-06, "loss": 1.4492930173873901, "step": 1165 }, { "epoch": 1.8537360890302068, "grad_norm": 13.107457709159075, "learning_rate": 3.8359217661291e-06, "loss": 1.2537250518798828, "step": 1166 }, { "epoch": 1.8553259141494436, "grad_norm": 16.879618538572284, "learning_rate": 3.826927107382266e-06, "loss": 1.6310523748397827, "step": 1167 }, { "epoch": 1.8569157392686804, "grad_norm": 15.612894861680394, "learning_rate": 3.817936464224367e-06, "loss": 1.04306960105896, "step": 1168 }, { "epoch": 1.8585055643879174, "grad_norm": 11.568208703788091, "learning_rate": 3.8089498674316038e-06, "loss": 1.5552500486373901, "step": 1169 }, { "epoch": 1.8600953895071544, "grad_norm": 10.170721127512651, "learning_rate": 3.7999673477663275e-06, "loss": 1.0391424894332886, "step": 1170 }, { "epoch": 1.8616852146263911, "grad_norm": 13.356876657888899, "learning_rate": 3.79098893597693e-06, "loss": 1.7671284675598145, "step": 1171 }, { "epoch": 1.863275039745628, "grad_norm": 10.694613723154546, "learning_rate": 3.782014662797745e-06, "loss": 1.4554939270019531, "step": 1172 }, { "epoch": 1.864864864864865, "grad_norm": 11.259540296955532, "learning_rate": 3.773044558948934e-06, "loss": 1.2981938123703003, "step": 1173 }, { "epoch": 1.866454689984102, "grad_norm": 9.445366035807636, "learning_rate": 3.764078655136391e-06, "loss": 1.6467961072921753, "step": 1174 }, { "epoch": 1.8680445151033387, "grad_norm": 10.085891490611074, "learning_rate": 3.75511698205163e-06, "loss": 1.3536697626113892, "step": 1175 }, { "epoch": 1.8696343402225755, "grad_norm": 14.678813192106523, "learning_rate": 3.7461595703716847e-06, "loss": 1.0834566354751587, "step": 1176 }, { "epoch": 1.8712241653418125, "grad_norm": 10.338033788368241, "learning_rate": 3.737206450758999e-06, "loss": 1.3749675750732422, "step": 1177 }, { "epoch": 1.8728139904610492, "grad_norm": 12.932288807465243, "learning_rate": 3.7282576538613257e-06, "loss": 1.0677857398986816, "step": 1178 }, { "epoch": 1.874403815580286, "grad_norm": 14.637002385739367, "learning_rate": 3.7193132103116204e-06, "loss": 1.2733405828475952, "step": 1179 }, { "epoch": 1.875993640699523, "grad_norm": 9.054040962514696, "learning_rate": 3.7103731507279383e-06, "loss": 1.2556195259094238, "step": 1180 }, { "epoch": 1.87758346581876, "grad_norm": 10.435062065797702, "learning_rate": 3.7014375057133244e-06, "loss": 1.6212596893310547, "step": 1181 }, { "epoch": 1.8791732909379968, "grad_norm": 8.755640184498002, "learning_rate": 3.692506305855713e-06, "loss": 1.2575325965881348, "step": 1182 }, { "epoch": 1.8807631160572336, "grad_norm": 7.678575585216897, "learning_rate": 3.683579581727824e-06, "loss": 1.2312979698181152, "step": 1183 }, { "epoch": 1.8823529411764706, "grad_norm": 9.349154680394342, "learning_rate": 3.674657363887054e-06, "loss": 1.901777982711792, "step": 1184 }, { "epoch": 1.8839427662957076, "grad_norm": 9.810069442357346, "learning_rate": 3.6657396828753777e-06, "loss": 1.9274979829788208, "step": 1185 }, { "epoch": 1.8855325914149443, "grad_norm": 17.442528159931612, "learning_rate": 3.656826569219233e-06, "loss": 2.0956759452819824, "step": 1186 }, { "epoch": 1.8871224165341811, "grad_norm": 14.980699322166306, "learning_rate": 3.6479180534294266e-06, "loss": 1.862923502922058, "step": 1187 }, { "epoch": 1.8887122416534181, "grad_norm": 13.712955069068984, "learning_rate": 3.639014166001028e-06, "loss": 1.348436951637268, "step": 1188 }, { "epoch": 1.890302066772655, "grad_norm": 20.71826439241576, "learning_rate": 3.6301149374132615e-06, "loss": 2.031398296356201, "step": 1189 }, { "epoch": 1.8918918918918919, "grad_norm": 9.594973129413956, "learning_rate": 3.6212203981294036e-06, "loss": 1.497904658317566, "step": 1190 }, { "epoch": 1.8934817170111287, "grad_norm": 11.319616227089226, "learning_rate": 3.612330578596679e-06, "loss": 1.0648324489593506, "step": 1191 }, { "epoch": 1.8950715421303657, "grad_norm": 10.380379765203418, "learning_rate": 3.603445509246154e-06, "loss": 1.3865478038787842, "step": 1192 }, { "epoch": 1.8966613672496027, "grad_norm": 13.477553345089566, "learning_rate": 3.5945652204926372e-06, "loss": 1.2981629371643066, "step": 1193 }, { "epoch": 1.8982511923688394, "grad_norm": 12.006104786236229, "learning_rate": 3.585689742734572e-06, "loss": 1.333137035369873, "step": 1194 }, { "epoch": 1.8998410174880762, "grad_norm": 8.21852860282474, "learning_rate": 3.5768191063539326e-06, "loss": 1.3987650871276855, "step": 1195 }, { "epoch": 1.9014308426073132, "grad_norm": 18.415493771192033, "learning_rate": 3.567953341716119e-06, "loss": 1.9830102920532227, "step": 1196 }, { "epoch": 1.9030206677265502, "grad_norm": 9.680446315656202, "learning_rate": 3.5590924791698567e-06, "loss": 1.4442445039749146, "step": 1197 }, { "epoch": 1.904610492845787, "grad_norm": 11.630049989862073, "learning_rate": 3.55023654904709e-06, "loss": 1.189267873764038, "step": 1198 }, { "epoch": 1.9062003179650238, "grad_norm": 11.072476326883493, "learning_rate": 3.5413855816628793e-06, "loss": 1.0022368431091309, "step": 1199 }, { "epoch": 1.9077901430842608, "grad_norm": 10.767005058187586, "learning_rate": 3.5325396073152964e-06, "loss": 1.1614134311676025, "step": 1200 }, { "epoch": 1.9093799682034978, "grad_norm": 12.5607678458347, "learning_rate": 3.5236986562853193e-06, "loss": 1.1036311388015747, "step": 1201 }, { "epoch": 1.9109697933227345, "grad_norm": 12.982347384348031, "learning_rate": 3.5148627588367345e-06, "loss": 2.124173164367676, "step": 1202 }, { "epoch": 1.9125596184419713, "grad_norm": 14.021168603906222, "learning_rate": 3.506031945216028e-06, "loss": 1.0432281494140625, "step": 1203 }, { "epoch": 1.9141494435612083, "grad_norm": 10.671876312297726, "learning_rate": 3.49720624565228e-06, "loss": 1.9567753076553345, "step": 1204 }, { "epoch": 1.9157392686804453, "grad_norm": 18.378038994448797, "learning_rate": 3.488385690357068e-06, "loss": 1.7380425930023193, "step": 1205 }, { "epoch": 1.917329093799682, "grad_norm": 9.523531652407353, "learning_rate": 3.4795703095243594e-06, "loss": 1.3146196603775024, "step": 1206 }, { "epoch": 1.9189189189189189, "grad_norm": 8.311896562107178, "learning_rate": 3.4707601333304093e-06, "loss": 1.1068713665008545, "step": 1207 }, { "epoch": 1.9205087440381559, "grad_norm": 11.447076014813135, "learning_rate": 3.4619551919336538e-06, "loss": 2.32598876953125, "step": 1208 }, { "epoch": 1.9220985691573926, "grad_norm": 11.267848664247097, "learning_rate": 3.453155515474612e-06, "loss": 1.343285322189331, "step": 1209 }, { "epoch": 1.9236883942766294, "grad_norm": 18.00190098116035, "learning_rate": 3.44436113407578e-06, "loss": 1.4658093452453613, "step": 1210 }, { "epoch": 1.9252782193958664, "grad_norm": 12.268418304840932, "learning_rate": 3.435572077841528e-06, "loss": 1.5446200370788574, "step": 1211 }, { "epoch": 1.9268680445151034, "grad_norm": 10.940958777182, "learning_rate": 3.4267883768579996e-06, "loss": 1.5812410116195679, "step": 1212 }, { "epoch": 1.9284578696343402, "grad_norm": 10.559323390919907, "learning_rate": 3.4180100611930012e-06, "loss": 1.3475531339645386, "step": 1213 }, { "epoch": 1.930047694753577, "grad_norm": 11.972634265361458, "learning_rate": 3.4092371608959085e-06, "loss": 1.5513516664505005, "step": 1214 }, { "epoch": 1.931637519872814, "grad_norm": 12.588846619515197, "learning_rate": 3.4004697059975587e-06, "loss": 1.2521653175354004, "step": 1215 }, { "epoch": 1.933227344992051, "grad_norm": 13.983216807336552, "learning_rate": 3.3917077265101505e-06, "loss": 1.5460331439971924, "step": 1216 }, { "epoch": 1.9348171701112877, "grad_norm": 16.21049385463335, "learning_rate": 3.3829512524271378e-06, "loss": 1.8424224853515625, "step": 1217 }, { "epoch": 1.9364069952305245, "grad_norm": 8.623575480512274, "learning_rate": 3.3742003137231273e-06, "loss": 1.7361804246902466, "step": 1218 }, { "epoch": 1.9379968203497615, "grad_norm": 12.923991206768896, "learning_rate": 3.365454940353779e-06, "loss": 1.7546651363372803, "step": 1219 }, { "epoch": 1.9395866454689985, "grad_norm": 8.194594971743348, "learning_rate": 3.3567151622557033e-06, "loss": 1.4870991706848145, "step": 1220 }, { "epoch": 1.9411764705882353, "grad_norm": 16.249647945871697, "learning_rate": 3.3479810093463547e-06, "loss": 2.124351978302002, "step": 1221 }, { "epoch": 1.942766295707472, "grad_norm": 9.919591367300772, "learning_rate": 3.3392525115239353e-06, "loss": 1.6585767269134521, "step": 1222 }, { "epoch": 1.944356120826709, "grad_norm": 9.884097866620877, "learning_rate": 3.330529698667284e-06, "loss": 1.5039808750152588, "step": 1223 }, { "epoch": 1.945945945945946, "grad_norm": 13.899317503642905, "learning_rate": 3.321812600635783e-06, "loss": 1.514086365699768, "step": 1224 }, { "epoch": 1.9475357710651828, "grad_norm": 11.120223530554286, "learning_rate": 3.3131012472692515e-06, "loss": 2.1714556217193604, "step": 1225 }, { "epoch": 1.9491255961844196, "grad_norm": 10.914354080354897, "learning_rate": 3.3043956683878437e-06, "loss": 1.9673073291778564, "step": 1226 }, { "epoch": 1.9507154213036566, "grad_norm": 12.365178192411566, "learning_rate": 3.2956958937919448e-06, "loss": 1.3523991107940674, "step": 1227 }, { "epoch": 1.9523052464228936, "grad_norm": 8.521930902170034, "learning_rate": 3.2870019532620744e-06, "loss": 1.9819375276565552, "step": 1228 }, { "epoch": 1.9538950715421304, "grad_norm": 10.504799370654721, "learning_rate": 3.278313876558781e-06, "loss": 1.7047924995422363, "step": 1229 }, { "epoch": 1.9554848966613672, "grad_norm": 9.055204538526395, "learning_rate": 3.269631693422537e-06, "loss": 1.5361595153808594, "step": 1230 }, { "epoch": 1.9570747217806042, "grad_norm": 6.708125686672845, "learning_rate": 3.2609554335736435e-06, "loss": 1.481113314628601, "step": 1231 }, { "epoch": 1.9586645468998412, "grad_norm": 9.296827575354715, "learning_rate": 3.2522851267121245e-06, "loss": 1.2066642045974731, "step": 1232 }, { "epoch": 1.960254372019078, "grad_norm": 12.594687183204563, "learning_rate": 3.2436208025176265e-06, "loss": 1.624826431274414, "step": 1233 }, { "epoch": 1.9618441971383147, "grad_norm": 9.957789777175805, "learning_rate": 3.2349624906493164e-06, "loss": 1.5784817934036255, "step": 1234 }, { "epoch": 1.9634340222575517, "grad_norm": 9.4320758167637, "learning_rate": 3.2263102207457788e-06, "loss": 1.2005516290664673, "step": 1235 }, { "epoch": 1.9650238473767887, "grad_norm": 12.596584496831804, "learning_rate": 3.217664022424917e-06, "loss": 1.8746891021728516, "step": 1236 }, { "epoch": 1.9666136724960255, "grad_norm": 10.576192009066764, "learning_rate": 3.2090239252838496e-06, "loss": 2.1368587017059326, "step": 1237 }, { "epoch": 1.9682034976152623, "grad_norm": 19.169212542664745, "learning_rate": 3.2003899588988143e-06, "loss": 1.5662051439285278, "step": 1238 }, { "epoch": 1.9697933227344993, "grad_norm": 15.769420962817392, "learning_rate": 3.191762152825054e-06, "loss": 1.284698247909546, "step": 1239 }, { "epoch": 1.9713831478537363, "grad_norm": 9.28155247533557, "learning_rate": 3.1831405365967315e-06, "loss": 1.336848497390747, "step": 1240 }, { "epoch": 1.972972972972973, "grad_norm": 12.239541301618456, "learning_rate": 3.1745251397268175e-06, "loss": 1.6406328678131104, "step": 1241 }, { "epoch": 1.9745627980922098, "grad_norm": 17.29048355766668, "learning_rate": 3.1659159917069927e-06, "loss": 0.8950412273406982, "step": 1242 }, { "epoch": 1.9761526232114468, "grad_norm": 10.856600740023122, "learning_rate": 3.1573131220075494e-06, "loss": 1.6217354536056519, "step": 1243 }, { "epoch": 1.9777424483306836, "grad_norm": 8.637494876738593, "learning_rate": 3.1487165600772883e-06, "loss": 1.7622885704040527, "step": 1244 }, { "epoch": 1.9793322734499204, "grad_norm": 9.898715586809786, "learning_rate": 3.140126335343413e-06, "loss": 1.273987889289856, "step": 1245 }, { "epoch": 1.9809220985691574, "grad_norm": 21.598449261710204, "learning_rate": 3.1315424772114404e-06, "loss": 0.9739120006561279, "step": 1246 }, { "epoch": 1.9825119236883944, "grad_norm": 11.108789078925794, "learning_rate": 3.1229650150650905e-06, "loss": 1.1030012369155884, "step": 1247 }, { "epoch": 1.9841017488076311, "grad_norm": 11.772345198716755, "learning_rate": 3.1143939782661875e-06, "loss": 1.5073179006576538, "step": 1248 }, { "epoch": 1.985691573926868, "grad_norm": 10.00286915146364, "learning_rate": 3.1058293961545648e-06, "loss": 1.473888874053955, "step": 1249 }, { "epoch": 1.987281399046105, "grad_norm": 13.510717709557214, "learning_rate": 3.0972712980479567e-06, "loss": 1.3762142658233643, "step": 1250 }, { "epoch": 1.988871224165342, "grad_norm": 17.515999727799556, "learning_rate": 3.0887197132419033e-06, "loss": 1.0883512496948242, "step": 1251 }, { "epoch": 1.9904610492845787, "grad_norm": 11.385402537880124, "learning_rate": 3.0801746710096497e-06, "loss": 1.5562313795089722, "step": 1252 }, { "epoch": 1.9920508744038155, "grad_norm": 9.894651386230901, "learning_rate": 3.0716362006020443e-06, "loss": 1.555297613143921, "step": 1253 }, { "epoch": 1.9936406995230525, "grad_norm": 11.531440417844689, "learning_rate": 3.0631043312474375e-06, "loss": 1.4571053981781006, "step": 1254 }, { "epoch": 1.9952305246422894, "grad_norm": 11.438541092850024, "learning_rate": 3.054579092151586e-06, "loss": 1.3141937255859375, "step": 1255 }, { "epoch": 1.9968203497615262, "grad_norm": 10.832938440767606, "learning_rate": 3.0460605124975483e-06, "loss": 1.8371860980987549, "step": 1256 }, { "epoch": 1.998410174880763, "grad_norm": 10.707640697615673, "learning_rate": 3.0375486214455895e-06, "loss": 1.7980101108551025, "step": 1257 }, { "epoch": 2.0, "grad_norm": 9.479988371577287, "learning_rate": 3.0290434481330746e-06, "loss": 1.6025118827819824, "step": 1258 }, { "epoch": 2.001589825119237, "grad_norm": 15.618086616205854, "learning_rate": 3.0205450216743753e-06, "loss": 0.5739709734916687, "step": 1259 }, { "epoch": 2.0031796502384736, "grad_norm": 11.240797695090038, "learning_rate": 3.012053371160768e-06, "loss": 0.8699474334716797, "step": 1260 }, { "epoch": 2.0047694753577106, "grad_norm": 9.552987335691743, "learning_rate": 3.003568525660334e-06, "loss": 0.43918731808662415, "step": 1261 }, { "epoch": 2.0063593004769475, "grad_norm": 7.818036854277269, "learning_rate": 2.9950905142178594e-06, "loss": 0.39753860235214233, "step": 1262 }, { "epoch": 2.0079491255961845, "grad_norm": 9.416756298898695, "learning_rate": 2.9866193658547365e-06, "loss": 0.7467893362045288, "step": 1263 }, { "epoch": 2.009538950715421, "grad_norm": 9.986682901762805, "learning_rate": 2.978155109568864e-06, "loss": 0.5302591919898987, "step": 1264 }, { "epoch": 2.011128775834658, "grad_norm": 7.2866809350396275, "learning_rate": 2.9696977743345533e-06, "loss": 0.36087679862976074, "step": 1265 }, { "epoch": 2.012718600953895, "grad_norm": 7.8227632427897555, "learning_rate": 2.961247389102413e-06, "loss": 0.6572636365890503, "step": 1266 }, { "epoch": 2.014308426073132, "grad_norm": 9.835888531542967, "learning_rate": 2.952803982799271e-06, "loss": 0.7076925039291382, "step": 1267 }, { "epoch": 2.0158982511923687, "grad_norm": 14.608403115303773, "learning_rate": 2.94436758432806e-06, "loss": 0.4047316908836365, "step": 1268 }, { "epoch": 2.0174880763116056, "grad_norm": 7.860380258115871, "learning_rate": 2.935938222567727e-06, "loss": 0.6956659555435181, "step": 1269 }, { "epoch": 2.0190779014308426, "grad_norm": 15.00644807734612, "learning_rate": 2.927515926373129e-06, "loss": 0.774339497089386, "step": 1270 }, { "epoch": 2.0206677265500796, "grad_norm": 15.25860483287857, "learning_rate": 2.9191007245749404e-06, "loss": 0.3957682251930237, "step": 1271 }, { "epoch": 2.022257551669316, "grad_norm": 8.500581046667302, "learning_rate": 2.9106926459795426e-06, "loss": 0.5914150476455688, "step": 1272 }, { "epoch": 2.023847376788553, "grad_norm": 12.026226845794612, "learning_rate": 2.902291719368945e-06, "loss": 0.4321010708808899, "step": 1273 }, { "epoch": 2.02543720190779, "grad_norm": 11.930831367434969, "learning_rate": 2.8938979735006635e-06, "loss": 0.6191248893737793, "step": 1274 }, { "epoch": 2.027027027027027, "grad_norm": 9.521327776569159, "learning_rate": 2.885511437107638e-06, "loss": 0.41829949617385864, "step": 1275 }, { "epoch": 2.0286168521462637, "grad_norm": 21.361526672258712, "learning_rate": 2.8771321388981334e-06, "loss": 0.9986895322799683, "step": 1276 }, { "epoch": 2.0302066772655007, "grad_norm": 10.078827235638975, "learning_rate": 2.868760107555628e-06, "loss": 0.5669399499893188, "step": 1277 }, { "epoch": 2.0317965023847377, "grad_norm": 12.278853369947052, "learning_rate": 2.860395371738736e-06, "loss": 0.6854183673858643, "step": 1278 }, { "epoch": 2.0333863275039747, "grad_norm": 9.391439827398543, "learning_rate": 2.8520379600810886e-06, "loss": 0.276329904794693, "step": 1279 }, { "epoch": 2.0349761526232113, "grad_norm": 9.96245742568317, "learning_rate": 2.843687901191248e-06, "loss": 0.7409742474555969, "step": 1280 }, { "epoch": 2.0365659777424483, "grad_norm": 9.636782695588359, "learning_rate": 2.8353452236526097e-06, "loss": 0.4047026038169861, "step": 1281 }, { "epoch": 2.0381558028616853, "grad_norm": 16.916538036900032, "learning_rate": 2.8270099560232992e-06, "loss": 0.6543888449668884, "step": 1282 }, { "epoch": 2.0397456279809223, "grad_norm": 10.992728384261314, "learning_rate": 2.8186821268360757e-06, "loss": 0.40383610129356384, "step": 1283 }, { "epoch": 2.041335453100159, "grad_norm": 12.030605161431646, "learning_rate": 2.810361764598241e-06, "loss": 0.5539910197257996, "step": 1284 }, { "epoch": 2.042925278219396, "grad_norm": 9.36847445067224, "learning_rate": 2.802048897791529e-06, "loss": 0.4439224898815155, "step": 1285 }, { "epoch": 2.044515103338633, "grad_norm": 10.237830248539021, "learning_rate": 2.7937435548720232e-06, "loss": 0.32635053992271423, "step": 1286 }, { "epoch": 2.04610492845787, "grad_norm": 13.144565026480977, "learning_rate": 2.785445764270047e-06, "loss": 0.540446937084198, "step": 1287 }, { "epoch": 2.0476947535771064, "grad_norm": 9.94255306056615, "learning_rate": 2.77715555439007e-06, "loss": 0.749458909034729, "step": 1288 }, { "epoch": 2.0492845786963434, "grad_norm": 8.42062161680688, "learning_rate": 2.7688729536106175e-06, "loss": 0.42192327976226807, "step": 1289 }, { "epoch": 2.0508744038155804, "grad_norm": 9.790604103222373, "learning_rate": 2.7605979902841635e-06, "loss": 0.18696679174900055, "step": 1290 }, { "epoch": 2.0524642289348174, "grad_norm": 8.557591397101714, "learning_rate": 2.7523306927370375e-06, "loss": 0.3152458965778351, "step": 1291 }, { "epoch": 2.054054054054054, "grad_norm": 9.373458399738638, "learning_rate": 2.7440710892693346e-06, "loss": 0.4110928177833557, "step": 1292 }, { "epoch": 2.055643879173291, "grad_norm": 12.948354145830105, "learning_rate": 2.7358192081547994e-06, "loss": 0.49277588725090027, "step": 1293 }, { "epoch": 2.057233704292528, "grad_norm": 9.130956209811558, "learning_rate": 2.7275750776407568e-06, "loss": 1.085226058959961, "step": 1294 }, { "epoch": 2.0588235294117645, "grad_norm": 10.060570275418396, "learning_rate": 2.719338725947987e-06, "loss": 0.7210683822631836, "step": 1295 }, { "epoch": 2.0604133545310015, "grad_norm": 9.069136226968924, "learning_rate": 2.711110181270653e-06, "loss": 1.3576455116271973, "step": 1296 }, { "epoch": 2.0620031796502385, "grad_norm": 25.167615589113343, "learning_rate": 2.7028894717761867e-06, "loss": 2.676640748977661, "step": 1297 }, { "epoch": 2.0635930047694755, "grad_norm": 9.696060766286752, "learning_rate": 2.6946766256051983e-06, "loss": 0.30056455731391907, "step": 1298 }, { "epoch": 2.065182829888712, "grad_norm": 9.185968609559813, "learning_rate": 2.6864716708713885e-06, "loss": 0.6304574012756348, "step": 1299 }, { "epoch": 2.066772655007949, "grad_norm": 10.355978305326843, "learning_rate": 2.6782746356614364e-06, "loss": 0.45259398221969604, "step": 1300 }, { "epoch": 2.068362480127186, "grad_norm": 11.804791422786808, "learning_rate": 2.670085548034913e-06, "loss": 0.5118886232376099, "step": 1301 }, { "epoch": 2.069952305246423, "grad_norm": 7.416042020504371, "learning_rate": 2.6619044360241886e-06, "loss": 0.26387444138526917, "step": 1302 }, { "epoch": 2.0715421303656596, "grad_norm": 10.262238601993216, "learning_rate": 2.6537313276343255e-06, "loss": 0.5545535087585449, "step": 1303 }, { "epoch": 2.0731319554848966, "grad_norm": 7.876916704043814, "learning_rate": 2.6455662508429946e-06, "loss": 0.2981090545654297, "step": 1304 }, { "epoch": 2.0747217806041336, "grad_norm": 11.160084976263871, "learning_rate": 2.6374092336003684e-06, "loss": 0.29364457726478577, "step": 1305 }, { "epoch": 2.0763116057233706, "grad_norm": 11.303770977415407, "learning_rate": 2.6292603038290306e-06, "loss": 0.7849453687667847, "step": 1306 }, { "epoch": 2.077901430842607, "grad_norm": 19.75269483714375, "learning_rate": 2.6211194894238863e-06, "loss": 0.9576752781867981, "step": 1307 }, { "epoch": 2.079491255961844, "grad_norm": 8.001166271043143, "learning_rate": 2.6129868182520525e-06, "loss": 0.1691819727420807, "step": 1308 }, { "epoch": 2.081081081081081, "grad_norm": 9.026308181157667, "learning_rate": 2.604862318152778e-06, "loss": 0.22451704740524292, "step": 1309 }, { "epoch": 2.082670906200318, "grad_norm": 12.843536821449838, "learning_rate": 2.596746016937337e-06, "loss": 0.4650968313217163, "step": 1310 }, { "epoch": 2.0842607313195547, "grad_norm": 9.988774639170009, "learning_rate": 2.5886379423889362e-06, "loss": 0.38352689146995544, "step": 1311 }, { "epoch": 2.0858505564387917, "grad_norm": 17.79353421678456, "learning_rate": 2.580538122262627e-06, "loss": 0.6193521022796631, "step": 1312 }, { "epoch": 2.0874403815580287, "grad_norm": 7.326817064479934, "learning_rate": 2.5724465842852e-06, "loss": 0.2504882514476776, "step": 1313 }, { "epoch": 2.0890302066772657, "grad_norm": 18.334658146906335, "learning_rate": 2.564363356155094e-06, "loss": 0.9683111906051636, "step": 1314 }, { "epoch": 2.0906200317965022, "grad_norm": 8.054717169135303, "learning_rate": 2.556288465542308e-06, "loss": 0.297244668006897, "step": 1315 }, { "epoch": 2.0922098569157392, "grad_norm": 11.01684001675487, "learning_rate": 2.5482219400882934e-06, "loss": 0.4446839690208435, "step": 1316 }, { "epoch": 2.0937996820349762, "grad_norm": 9.838931398311988, "learning_rate": 2.540163807405873e-06, "loss": 0.8158186674118042, "step": 1317 }, { "epoch": 2.0953895071542132, "grad_norm": 7.416228081106363, "learning_rate": 2.532114095079137e-06, "loss": 0.31487804651260376, "step": 1318 }, { "epoch": 2.09697933227345, "grad_norm": 12.833134362946721, "learning_rate": 2.5240728306633492e-06, "loss": 0.7158694863319397, "step": 1319 }, { "epoch": 2.098569157392687, "grad_norm": 15.148450523189657, "learning_rate": 2.5160400416848583e-06, "loss": 0.5310110449790955, "step": 1320 }, { "epoch": 2.100158982511924, "grad_norm": 8.187789663576774, "learning_rate": 2.508015755640999e-06, "loss": 0.31758755445480347, "step": 1321 }, { "epoch": 2.101748807631161, "grad_norm": 12.040527490093737, "learning_rate": 2.5000000000000015e-06, "loss": 0.5420504212379456, "step": 1322 }, { "epoch": 2.1033386327503973, "grad_norm": 8.696591339854574, "learning_rate": 2.491992802200892e-06, "loss": 0.8775444030761719, "step": 1323 }, { "epoch": 2.1049284578696343, "grad_norm": 14.542466373749383, "learning_rate": 2.4839941896534027e-06, "loss": 0.34773313999176025, "step": 1324 }, { "epoch": 2.1065182829888713, "grad_norm": 7.5325204562743675, "learning_rate": 2.4760041897378813e-06, "loss": 0.3825177252292633, "step": 1325 }, { "epoch": 2.108108108108108, "grad_norm": 7.061725758761062, "learning_rate": 2.4680228298051866e-06, "loss": 0.16116374731063843, "step": 1326 }, { "epoch": 2.109697933227345, "grad_norm": 15.681078852075046, "learning_rate": 2.4600501371766087e-06, "loss": 0.7574387788772583, "step": 1327 }, { "epoch": 2.111287758346582, "grad_norm": 8.187924042692945, "learning_rate": 2.4520861391437635e-06, "loss": 0.19623573124408722, "step": 1328 }, { "epoch": 2.112877583465819, "grad_norm": 12.631065786042955, "learning_rate": 2.444130862968503e-06, "loss": 0.24437788128852844, "step": 1329 }, { "epoch": 2.1144674085850554, "grad_norm": 10.764006349436796, "learning_rate": 2.4361843358828287e-06, "loss": 0.35523247718811035, "step": 1330 }, { "epoch": 2.1160572337042924, "grad_norm": 15.904616252885177, "learning_rate": 2.4282465850887887e-06, "loss": 0.754123866558075, "step": 1331 }, { "epoch": 2.1176470588235294, "grad_norm": 9.693189940959178, "learning_rate": 2.420317637758387e-06, "loss": 0.2743876576423645, "step": 1332 }, { "epoch": 2.1192368839427664, "grad_norm": 14.041254548938046, "learning_rate": 2.4123975210334987e-06, "loss": 0.6556911468505859, "step": 1333 }, { "epoch": 2.120826709062003, "grad_norm": 10.60443572924436, "learning_rate": 2.404486262025763e-06, "loss": 0.7615698575973511, "step": 1334 }, { "epoch": 2.12241653418124, "grad_norm": 9.536018360894495, "learning_rate": 2.3965838878165043e-06, "loss": 0.7127845883369446, "step": 1335 }, { "epoch": 2.124006359300477, "grad_norm": 8.952164367508987, "learning_rate": 2.388690425456629e-06, "loss": 0.5734552145004272, "step": 1336 }, { "epoch": 2.125596184419714, "grad_norm": 7.609720701343017, "learning_rate": 2.380805901966536e-06, "loss": 0.8266602158546448, "step": 1337 }, { "epoch": 2.1271860095389505, "grad_norm": 11.743301492870309, "learning_rate": 2.3729303443360312e-06, "loss": 0.5090128779411316, "step": 1338 }, { "epoch": 2.1287758346581875, "grad_norm": 10.592030711418884, "learning_rate": 2.365063779524222e-06, "loss": 0.20535558462142944, "step": 1339 }, { "epoch": 2.1303656597774245, "grad_norm": 11.864484547591088, "learning_rate": 2.3572062344594387e-06, "loss": 0.40618768334388733, "step": 1340 }, { "epoch": 2.1319554848966615, "grad_norm": 8.684602509877466, "learning_rate": 2.3493577360391316e-06, "loss": 0.386374831199646, "step": 1341 }, { "epoch": 2.133545310015898, "grad_norm": 57.672457327244935, "learning_rate": 2.341518311129781e-06, "loss": 1.3086575269699097, "step": 1342 }, { "epoch": 2.135135135135135, "grad_norm": 10.240140748912529, "learning_rate": 2.333687986566816e-06, "loss": 0.6220468282699585, "step": 1343 }, { "epoch": 2.136724960254372, "grad_norm": 12.813613405444732, "learning_rate": 2.325866789154505e-06, "loss": 1.0330783128738403, "step": 1344 }, { "epoch": 2.138314785373609, "grad_norm": 11.520654618841975, "learning_rate": 2.318054745665877e-06, "loss": 1.003685474395752, "step": 1345 }, { "epoch": 2.1399046104928456, "grad_norm": 10.468340083489583, "learning_rate": 2.3102518828426253e-06, "loss": 0.48100218176841736, "step": 1346 }, { "epoch": 2.1414944356120826, "grad_norm": 10.79612151629994, "learning_rate": 2.3024582273950136e-06, "loss": 0.2844333350658417, "step": 1347 }, { "epoch": 2.1430842607313196, "grad_norm": 16.202827122761803, "learning_rate": 2.2946738060017947e-06, "loss": 0.9998087882995605, "step": 1348 }, { "epoch": 2.1446740858505566, "grad_norm": 12.545135440285307, "learning_rate": 2.2868986453101044e-06, "loss": 0.3493340313434601, "step": 1349 }, { "epoch": 2.146263910969793, "grad_norm": 15.89328036419762, "learning_rate": 2.2791327719353847e-06, "loss": 0.2903032898902893, "step": 1350 }, { "epoch": 2.14785373608903, "grad_norm": 12.85129240340186, "learning_rate": 2.2713762124612794e-06, "loss": 0.2625690996646881, "step": 1351 }, { "epoch": 2.149443561208267, "grad_norm": 14.737479053475628, "learning_rate": 2.2636289934395506e-06, "loss": 0.4280650317668915, "step": 1352 }, { "epoch": 2.151033386327504, "grad_norm": 11.465910225292893, "learning_rate": 2.2558911413899933e-06, "loss": 0.4616699516773224, "step": 1353 }, { "epoch": 2.1526232114467407, "grad_norm": 12.150448305231292, "learning_rate": 2.24816268280033e-06, "loss": 0.5582720041275024, "step": 1354 }, { "epoch": 2.1542130365659777, "grad_norm": 9.110587400603661, "learning_rate": 2.2404436441261305e-06, "loss": 0.2962486445903778, "step": 1355 }, { "epoch": 2.1558028616852147, "grad_norm": 6.569549294651593, "learning_rate": 2.2327340517907232e-06, "loss": 0.28374403715133667, "step": 1356 }, { "epoch": 2.1573926868044513, "grad_norm": 13.75762553276656, "learning_rate": 2.2250339321850934e-06, "loss": 0.6057413816452026, "step": 1357 }, { "epoch": 2.1589825119236883, "grad_norm": 8.669490763404966, "learning_rate": 2.217343311667807e-06, "loss": 0.3976594805717468, "step": 1358 }, { "epoch": 2.1605723370429253, "grad_norm": 9.243989451796354, "learning_rate": 2.2096622165649082e-06, "loss": 0.36878764629364014, "step": 1359 }, { "epoch": 2.1621621621621623, "grad_norm": 9.941680984264261, "learning_rate": 2.2019906731698337e-06, "loss": 0.3705075979232788, "step": 1360 }, { "epoch": 2.1637519872813993, "grad_norm": 16.291839171810043, "learning_rate": 2.1943287077433302e-06, "loss": 1.8146584033966064, "step": 1361 }, { "epoch": 2.165341812400636, "grad_norm": 11.59444692669252, "learning_rate": 2.1866763465133483e-06, "loss": 0.9066391587257385, "step": 1362 }, { "epoch": 2.166931637519873, "grad_norm": 14.500132701105679, "learning_rate": 2.179033615674971e-06, "loss": 0.5588962435722351, "step": 1363 }, { "epoch": 2.16852146263911, "grad_norm": 19.71889006674096, "learning_rate": 2.1714005413903105e-06, "loss": 0.4099670350551605, "step": 1364 }, { "epoch": 2.1701112877583464, "grad_norm": 9.778223957653488, "learning_rate": 2.1637771497884208e-06, "loss": 0.5692353248596191, "step": 1365 }, { "epoch": 2.1717011128775834, "grad_norm": 26.06102052980053, "learning_rate": 2.156163466965218e-06, "loss": 0.5873644948005676, "step": 1366 }, { "epoch": 2.1732909379968204, "grad_norm": 10.595142110604016, "learning_rate": 2.1485595189833773e-06, "loss": 0.29316580295562744, "step": 1367 }, { "epoch": 2.1748807631160574, "grad_norm": 19.21378248414993, "learning_rate": 2.1409653318722517e-06, "loss": 1.0865153074264526, "step": 1368 }, { "epoch": 2.176470588235294, "grad_norm": 8.650427105241409, "learning_rate": 2.1333809316277854e-06, "loss": 0.4446878135204315, "step": 1369 }, { "epoch": 2.178060413354531, "grad_norm": 14.234623763340775, "learning_rate": 2.125806344212413e-06, "loss": 0.17620611190795898, "step": 1370 }, { "epoch": 2.179650238473768, "grad_norm": 7.848600881166288, "learning_rate": 2.1182415955549905e-06, "loss": 0.29350030422210693, "step": 1371 }, { "epoch": 2.181240063593005, "grad_norm": 11.44030757418845, "learning_rate": 2.110686711550678e-06, "loss": 0.3198983669281006, "step": 1372 }, { "epoch": 2.1828298887122415, "grad_norm": 7.244211102436453, "learning_rate": 2.103141718060883e-06, "loss": 0.31241801381111145, "step": 1373 }, { "epoch": 2.1844197138314785, "grad_norm": 10.576054299489337, "learning_rate": 2.095606640913149e-06, "loss": 0.32452335953712463, "step": 1374 }, { "epoch": 2.1860095389507155, "grad_norm": 8.527885992710734, "learning_rate": 2.0880815059010716e-06, "loss": 0.28504425287246704, "step": 1375 }, { "epoch": 2.1875993640699525, "grad_norm": 10.577578790165875, "learning_rate": 2.080566338784222e-06, "loss": 0.38088685274124146, "step": 1376 }, { "epoch": 2.189189189189189, "grad_norm": 10.395418619328238, "learning_rate": 2.0730611652880435e-06, "loss": 0.44801580905914307, "step": 1377 }, { "epoch": 2.190779014308426, "grad_norm": 8.837899961147453, "learning_rate": 2.0655660111037685e-06, "loss": 0.33956456184387207, "step": 1378 }, { "epoch": 2.192368839427663, "grad_norm": 11.45866723702121, "learning_rate": 2.0580809018883397e-06, "loss": 0.5810889601707458, "step": 1379 }, { "epoch": 2.1939586645469, "grad_norm": 9.413304931538189, "learning_rate": 2.0506058632643044e-06, "loss": 0.4389991760253906, "step": 1380 }, { "epoch": 2.1955484896661366, "grad_norm": 12.110966125967227, "learning_rate": 2.043140920819747e-06, "loss": 1.0444782972335815, "step": 1381 }, { "epoch": 2.1971383147853736, "grad_norm": 10.789879175191414, "learning_rate": 2.0356861001081833e-06, "loss": 0.4147152006626129, "step": 1382 }, { "epoch": 2.1987281399046106, "grad_norm": 14.597872553807216, "learning_rate": 2.028241426648484e-06, "loss": 1.171076774597168, "step": 1383 }, { "epoch": 2.2003179650238476, "grad_norm": 10.365723003698111, "learning_rate": 2.0208069259247866e-06, "loss": 0.30065760016441345, "step": 1384 }, { "epoch": 2.201907790143084, "grad_norm": 10.161072256605, "learning_rate": 2.0133826233864023e-06, "loss": 0.7872642278671265, "step": 1385 }, { "epoch": 2.203497615262321, "grad_norm": 10.02599250498573, "learning_rate": 2.005968544447733e-06, "loss": 0.31960901618003845, "step": 1386 }, { "epoch": 2.205087440381558, "grad_norm": 7.6759113967009736, "learning_rate": 1.998564714488187e-06, "loss": 0.5354412794113159, "step": 1387 }, { "epoch": 2.2066772655007947, "grad_norm": 38.3513743093145, "learning_rate": 1.9911711588520845e-06, "loss": 1.2136881351470947, "step": 1388 }, { "epoch": 2.2082670906200317, "grad_norm": 9.27571774690589, "learning_rate": 1.98378790284858e-06, "loss": 0.4841063916683197, "step": 1389 }, { "epoch": 2.2098569157392687, "grad_norm": 15.933026654093874, "learning_rate": 1.976414971751568e-06, "loss": 0.5539760589599609, "step": 1390 }, { "epoch": 2.2114467408585057, "grad_norm": 15.932623846230143, "learning_rate": 1.9690523907995968e-06, "loss": 0.5171653032302856, "step": 1391 }, { "epoch": 2.2130365659777427, "grad_norm": 7.63792357629499, "learning_rate": 1.9617001851957924e-06, "loss": 0.3998708724975586, "step": 1392 }, { "epoch": 2.2146263910969792, "grad_norm": 8.332002906686242, "learning_rate": 1.9543583801077567e-06, "loss": 0.16828802227973938, "step": 1393 }, { "epoch": 2.2162162162162162, "grad_norm": 10.513665677086328, "learning_rate": 1.9470270006674944e-06, "loss": 0.5081832408905029, "step": 1394 }, { "epoch": 2.2178060413354532, "grad_norm": 8.948435977246879, "learning_rate": 1.93970607197132e-06, "loss": 0.4326249361038208, "step": 1395 }, { "epoch": 2.21939586645469, "grad_norm": 18.420956573075454, "learning_rate": 1.932395619079771e-06, "loss": 0.5133779644966125, "step": 1396 }, { "epoch": 2.220985691573927, "grad_norm": 13.78197766711825, "learning_rate": 1.9250956670175315e-06, "loss": 0.8035323619842529, "step": 1397 }, { "epoch": 2.2225755166931638, "grad_norm": 14.628351212094666, "learning_rate": 1.917806240773333e-06, "loss": 0.25781551003456116, "step": 1398 }, { "epoch": 2.2241653418124008, "grad_norm": 8.528547429899023, "learning_rate": 1.910527365299879e-06, "loss": 0.5672615766525269, "step": 1399 }, { "epoch": 2.2257551669316373, "grad_norm": 7.602237876830262, "learning_rate": 1.9032590655137557e-06, "loss": 0.24282558262348175, "step": 1400 }, { "epoch": 2.2273449920508743, "grad_norm": 13.220120887526408, "learning_rate": 1.8960013662953452e-06, "loss": 0.3136386573314667, "step": 1401 }, { "epoch": 2.2289348171701113, "grad_norm": 7.446562133244804, "learning_rate": 1.8887542924887486e-06, "loss": 0.4707590937614441, "step": 1402 }, { "epoch": 2.2305246422893483, "grad_norm": 11.925665302137256, "learning_rate": 1.8815178689016862e-06, "loss": 0.598182201385498, "step": 1403 }, { "epoch": 2.232114467408585, "grad_norm": 10.82355623400178, "learning_rate": 1.87429212030543e-06, "loss": 0.17749015986919403, "step": 1404 }, { "epoch": 2.233704292527822, "grad_norm": 14.204338243223313, "learning_rate": 1.8670770714347024e-06, "loss": 0.2691603899002075, "step": 1405 }, { "epoch": 2.235294117647059, "grad_norm": 13.602729724029773, "learning_rate": 1.8598727469876027e-06, "loss": 0.4229642152786255, "step": 1406 }, { "epoch": 2.236883942766296, "grad_norm": 10.317753597321978, "learning_rate": 1.8526791716255205e-06, "loss": 0.3552161157131195, "step": 1407 }, { "epoch": 2.2384737678855324, "grad_norm": 8.135565138075629, "learning_rate": 1.8454963699730471e-06, "loss": 0.275473952293396, "step": 1408 }, { "epoch": 2.2400635930047694, "grad_norm": 9.137160576002834, "learning_rate": 1.8383243666178929e-06, "loss": 0.2102886140346527, "step": 1409 }, { "epoch": 2.2416534181240064, "grad_norm": 9.360877896344201, "learning_rate": 1.8311631861108097e-06, "loss": 0.1918153464794159, "step": 1410 }, { "epoch": 2.2432432432432434, "grad_norm": 8.962443180010503, "learning_rate": 1.8240128529654944e-06, "loss": 0.5940899848937988, "step": 1411 }, { "epoch": 2.24483306836248, "grad_norm": 11.917226870320402, "learning_rate": 1.816873391658518e-06, "loss": 0.3903201222419739, "step": 1412 }, { "epoch": 2.246422893481717, "grad_norm": 9.826294489562173, "learning_rate": 1.8097448266292322e-06, "loss": 0.6102331876754761, "step": 1413 }, { "epoch": 2.248012718600954, "grad_norm": 13.517844391934938, "learning_rate": 1.802627182279687e-06, "loss": 0.4329093098640442, "step": 1414 }, { "epoch": 2.249602543720191, "grad_norm": 13.547536022058566, "learning_rate": 1.7955204829745571e-06, "loss": 0.33794981241226196, "step": 1415 }, { "epoch": 2.2511923688394275, "grad_norm": 13.285342374121825, "learning_rate": 1.7884247530410436e-06, "loss": 0.31554755568504333, "step": 1416 }, { "epoch": 2.2527821939586645, "grad_norm": 12.434017910171066, "learning_rate": 1.781340016768799e-06, "loss": 0.21422359347343445, "step": 1417 }, { "epoch": 2.2543720190779015, "grad_norm": 12.28525539103986, "learning_rate": 1.774266298409848e-06, "loss": 0.9881556034088135, "step": 1418 }, { "epoch": 2.255961844197138, "grad_norm": 8.457333426414545, "learning_rate": 1.7672036221784917e-06, "loss": 0.4250524044036865, "step": 1419 }, { "epoch": 2.257551669316375, "grad_norm": 13.958386580318106, "learning_rate": 1.760152012251241e-06, "loss": 1.0648713111877441, "step": 1420 }, { "epoch": 2.259141494435612, "grad_norm": 5.811308085366414, "learning_rate": 1.7531114927667192e-06, "loss": 0.2532946765422821, "step": 1421 }, { "epoch": 2.260731319554849, "grad_norm": 10.684705977173687, "learning_rate": 1.7460820878255853e-06, "loss": 0.5165972113609314, "step": 1422 }, { "epoch": 2.262321144674086, "grad_norm": 9.810147715383405, "learning_rate": 1.7390638214904576e-06, "loss": 0.3521971106529236, "step": 1423 }, { "epoch": 2.2639109697933226, "grad_norm": 12.393073778447988, "learning_rate": 1.7320567177858188e-06, "loss": 0.6745153665542603, "step": 1424 }, { "epoch": 2.2655007949125596, "grad_norm": 15.83584524219963, "learning_rate": 1.7250608006979447e-06, "loss": 1.0417171716690063, "step": 1425 }, { "epoch": 2.2670906200317966, "grad_norm": 10.451819766363082, "learning_rate": 1.7180760941748132e-06, "loss": 0.6115795373916626, "step": 1426 }, { "epoch": 2.268680445151033, "grad_norm": 11.76193784891942, "learning_rate": 1.7111026221260334e-06, "loss": 0.2907196879386902, "step": 1427 }, { "epoch": 2.27027027027027, "grad_norm": 7.9808693082080655, "learning_rate": 1.704140408422753e-06, "loss": 0.5010504126548767, "step": 1428 }, { "epoch": 2.271860095389507, "grad_norm": 9.392613329966396, "learning_rate": 1.6971894768975794e-06, "loss": 0.4334721565246582, "step": 1429 }, { "epoch": 2.273449920508744, "grad_norm": 10.483383181145562, "learning_rate": 1.6902498513445053e-06, "loss": 0.8298370838165283, "step": 1430 }, { "epoch": 2.275039745627981, "grad_norm": 8.48946421286137, "learning_rate": 1.683321555518816e-06, "loss": 0.309246689081192, "step": 1431 }, { "epoch": 2.2766295707472177, "grad_norm": 12.704343511214526, "learning_rate": 1.6764046131370142e-06, "loss": 0.3676954507827759, "step": 1432 }, { "epoch": 2.2782193958664547, "grad_norm": 14.891085610228341, "learning_rate": 1.6694990478767432e-06, "loss": 1.0234798192977905, "step": 1433 }, { "epoch": 2.2798092209856917, "grad_norm": 13.465531205203249, "learning_rate": 1.6626048833766927e-06, "loss": 0.8669548630714417, "step": 1434 }, { "epoch": 2.2813990461049283, "grad_norm": 9.123390257093554, "learning_rate": 1.6557221432365355e-06, "loss": 0.3418199419975281, "step": 1435 }, { "epoch": 2.2829888712241653, "grad_norm": 7.501081284249146, "learning_rate": 1.64885085101683e-06, "loss": 0.19252462685108185, "step": 1436 }, { "epoch": 2.2845786963434023, "grad_norm": 13.366478333865068, "learning_rate": 1.6419910302389475e-06, "loss": 0.3347507119178772, "step": 1437 }, { "epoch": 2.2861685214626393, "grad_norm": 8.47034032498675, "learning_rate": 1.6351427043849955e-06, "loss": 0.3430837094783783, "step": 1438 }, { "epoch": 2.287758346581876, "grad_norm": 11.75645210643237, "learning_rate": 1.6283058968977289e-06, "loss": 0.27441170811653137, "step": 1439 }, { "epoch": 2.289348171701113, "grad_norm": 11.279080865104184, "learning_rate": 1.621480631180473e-06, "loss": 0.4956094026565552, "step": 1440 }, { "epoch": 2.29093799682035, "grad_norm": 15.448500087736196, "learning_rate": 1.6146669305970493e-06, "loss": 0.7079007625579834, "step": 1441 }, { "epoch": 2.292527821939587, "grad_norm": 9.231845523966356, "learning_rate": 1.6078648184716827e-06, "loss": 0.4254942536354065, "step": 1442 }, { "epoch": 2.2941176470588234, "grad_norm": 11.905805737388867, "learning_rate": 1.601074318088937e-06, "loss": 0.7360373735427856, "step": 1443 }, { "epoch": 2.2957074721780604, "grad_norm": 12.941732719146424, "learning_rate": 1.5942954526936217e-06, "loss": 0.6561013460159302, "step": 1444 }, { "epoch": 2.2972972972972974, "grad_norm": 7.9487159972862536, "learning_rate": 1.5875282454907187e-06, "loss": 0.08330412209033966, "step": 1445 }, { "epoch": 2.2988871224165344, "grad_norm": 10.834323705822364, "learning_rate": 1.5807727196453065e-06, "loss": 0.6605761647224426, "step": 1446 }, { "epoch": 2.300476947535771, "grad_norm": 9.209055284124979, "learning_rate": 1.574028898282472e-06, "loss": 0.37634578347206116, "step": 1447 }, { "epoch": 2.302066772655008, "grad_norm": 8.042351795789497, "learning_rate": 1.5672968044872395e-06, "loss": 0.4199534058570862, "step": 1448 }, { "epoch": 2.303656597774245, "grad_norm": 14.25177973250915, "learning_rate": 1.560576461304486e-06, "loss": 0.736668586730957, "step": 1449 }, { "epoch": 2.3052464228934815, "grad_norm": 13.585164988982509, "learning_rate": 1.5538678917388638e-06, "loss": 0.9653232097625732, "step": 1450 }, { "epoch": 2.3068362480127185, "grad_norm": 8.887027967619444, "learning_rate": 1.5471711187547284e-06, "loss": 0.38832274079322815, "step": 1451 }, { "epoch": 2.3084260731319555, "grad_norm": 6.267315249131483, "learning_rate": 1.5404861652760434e-06, "loss": 0.31964796781539917, "step": 1452 }, { "epoch": 2.3100158982511925, "grad_norm": 11.376526955253818, "learning_rate": 1.5338130541863233e-06, "loss": 0.44085028767585754, "step": 1453 }, { "epoch": 2.3116057233704295, "grad_norm": 10.160907274712994, "learning_rate": 1.527151808328538e-06, "loss": 0.670817494392395, "step": 1454 }, { "epoch": 2.313195548489666, "grad_norm": 12.580031788905817, "learning_rate": 1.5205024505050424e-06, "loss": 0.3954434394836426, "step": 1455 }, { "epoch": 2.314785373608903, "grad_norm": 15.317878122772438, "learning_rate": 1.5138650034775004e-06, "loss": 0.2827695310115814, "step": 1456 }, { "epoch": 2.31637519872814, "grad_norm": 7.489429551522345, "learning_rate": 1.5072394899667974e-06, "loss": 0.41150280833244324, "step": 1457 }, { "epoch": 2.3179650238473766, "grad_norm": 10.601003962682444, "learning_rate": 1.5006259326529755e-06, "loss": 0.36833304166793823, "step": 1458 }, { "epoch": 2.3195548489666136, "grad_norm": 7.868890901677638, "learning_rate": 1.4940243541751449e-06, "loss": 0.2758025825023651, "step": 1459 }, { "epoch": 2.3211446740858506, "grad_norm": 7.750782960909832, "learning_rate": 1.487434777131409e-06, "loss": 0.32291388511657715, "step": 1460 }, { "epoch": 2.3227344992050876, "grad_norm": 8.392809402568808, "learning_rate": 1.4808572240787943e-06, "loss": 0.3069787621498108, "step": 1461 }, { "epoch": 2.3243243243243246, "grad_norm": 7.163401464386679, "learning_rate": 1.4742917175331644e-06, "loss": 0.16901874542236328, "step": 1462 }, { "epoch": 2.325914149443561, "grad_norm": 14.390629374585183, "learning_rate": 1.4677382799691425e-06, "loss": 1.3525761365890503, "step": 1463 }, { "epoch": 2.327503974562798, "grad_norm": 6.678330116875288, "learning_rate": 1.461196933820046e-06, "loss": 0.20596320927143097, "step": 1464 }, { "epoch": 2.329093799682035, "grad_norm": 12.532799405880928, "learning_rate": 1.4546677014777938e-06, "loss": 0.322355717420578, "step": 1465 }, { "epoch": 2.3306836248012717, "grad_norm": 16.793179759046723, "learning_rate": 1.4481506052928445e-06, "loss": 0.3826938271522522, "step": 1466 }, { "epoch": 2.3322734499205087, "grad_norm": 9.905044680880017, "learning_rate": 1.4416456675741076e-06, "loss": 0.3694925904273987, "step": 1467 }, { "epoch": 2.3338632750397457, "grad_norm": 10.461931343295252, "learning_rate": 1.4351529105888735e-06, "loss": 0.29014065861701965, "step": 1468 }, { "epoch": 2.3354531001589827, "grad_norm": 14.000724794554966, "learning_rate": 1.4286723565627397e-06, "loss": 0.3127569556236267, "step": 1469 }, { "epoch": 2.337042925278219, "grad_norm": 7.689620479500538, "learning_rate": 1.4222040276795273e-06, "loss": 0.2523467540740967, "step": 1470 }, { "epoch": 2.338632750397456, "grad_norm": 14.373753716425853, "learning_rate": 1.41574794608121e-06, "loss": 0.3797416090965271, "step": 1471 }, { "epoch": 2.340222575516693, "grad_norm": 9.054123209399933, "learning_rate": 1.4093041338678404e-06, "loss": 0.4659533202648163, "step": 1472 }, { "epoch": 2.34181240063593, "grad_norm": 15.438748343325372, "learning_rate": 1.4028726130974662e-06, "loss": 0.3603324890136719, "step": 1473 }, { "epoch": 2.3434022257551668, "grad_norm": 10.560118294734133, "learning_rate": 1.3964534057860652e-06, "loss": 0.6682232618331909, "step": 1474 }, { "epoch": 2.3449920508744038, "grad_norm": 11.401281185340983, "learning_rate": 1.3900465339074609e-06, "loss": 0.6063719391822815, "step": 1475 }, { "epoch": 2.3465818759936408, "grad_norm": 11.52440897543909, "learning_rate": 1.3836520193932495e-06, "loss": 0.44089335203170776, "step": 1476 }, { "epoch": 2.3481717011128778, "grad_norm": 13.657311760357237, "learning_rate": 1.3772698841327347e-06, "loss": 0.6193767189979553, "step": 1477 }, { "epoch": 2.3497615262321143, "grad_norm": 9.295719556907873, "learning_rate": 1.3709001499728308e-06, "loss": 0.3555140495300293, "step": 1478 }, { "epoch": 2.3513513513513513, "grad_norm": 11.645080860666992, "learning_rate": 1.3645428387180137e-06, "loss": 0.6090583801269531, "step": 1479 }, { "epoch": 2.3529411764705883, "grad_norm": 8.470948193400224, "learning_rate": 1.3581979721302286e-06, "loss": 0.34452319145202637, "step": 1480 }, { "epoch": 2.3545310015898253, "grad_norm": 9.909508425078533, "learning_rate": 1.3518655719288193e-06, "loss": 0.39460867643356323, "step": 1481 }, { "epoch": 2.356120826709062, "grad_norm": 13.853217505747493, "learning_rate": 1.3455456597904605e-06, "loss": 0.43775254487991333, "step": 1482 }, { "epoch": 2.357710651828299, "grad_norm": 10.876942487445342, "learning_rate": 1.339238257349073e-06, "loss": 0.2791391611099243, "step": 1483 }, { "epoch": 2.359300476947536, "grad_norm": 12.82786673007337, "learning_rate": 1.3329433861957614e-06, "loss": 0.6097040772438049, "step": 1484 }, { "epoch": 2.360890302066773, "grad_norm": 8.760167908169596, "learning_rate": 1.3266610678787283e-06, "loss": 0.2755807936191559, "step": 1485 }, { "epoch": 2.3624801271860094, "grad_norm": 20.110558363159395, "learning_rate": 1.3203913239032074e-06, "loss": 0.73570317029953, "step": 1486 }, { "epoch": 2.3640699523052464, "grad_norm": 11.972504031113099, "learning_rate": 1.3141341757313924e-06, "loss": 0.6548980474472046, "step": 1487 }, { "epoch": 2.3656597774244834, "grad_norm": 13.51699058640222, "learning_rate": 1.3078896447823547e-06, "loss": 0.3493387699127197, "step": 1488 }, { "epoch": 2.36724960254372, "grad_norm": 10.566178335617067, "learning_rate": 1.30165775243198e-06, "loss": 0.33287400007247925, "step": 1489 }, { "epoch": 2.368839427662957, "grad_norm": 12.768459562661848, "learning_rate": 1.295438520012887e-06, "loss": 0.40229332447052, "step": 1490 }, { "epoch": 2.370429252782194, "grad_norm": 10.88296099547651, "learning_rate": 1.2892319688143578e-06, "loss": 0.4362897276878357, "step": 1491 }, { "epoch": 2.372019077901431, "grad_norm": 17.82269470385356, "learning_rate": 1.283038120082268e-06, "loss": 0.684404194355011, "step": 1492 }, { "epoch": 2.373608903020668, "grad_norm": 8.305480002810821, "learning_rate": 1.2768569950190074e-06, "loss": 0.4447444677352905, "step": 1493 }, { "epoch": 2.3751987281399045, "grad_norm": 8.946310245620616, "learning_rate": 1.2706886147834114e-06, "loss": 0.2831430435180664, "step": 1494 }, { "epoch": 2.3767885532591415, "grad_norm": 12.95406255499207, "learning_rate": 1.2645330004906919e-06, "loss": 0.750991702079773, "step": 1495 }, { "epoch": 2.3783783783783785, "grad_norm": 9.138565382188945, "learning_rate": 1.2583901732123555e-06, "loss": 0.3836651146411896, "step": 1496 }, { "epoch": 2.379968203497615, "grad_norm": 10.766809743823636, "learning_rate": 1.252260153976143e-06, "loss": 0.2510315775871277, "step": 1497 }, { "epoch": 2.381558028616852, "grad_norm": 17.47014235175777, "learning_rate": 1.2461429637659466e-06, "loss": 0.7679001688957214, "step": 1498 }, { "epoch": 2.383147853736089, "grad_norm": 13.745920912677008, "learning_rate": 1.2400386235217444e-06, "loss": 0.3160330653190613, "step": 1499 }, { "epoch": 2.384737678855326, "grad_norm": 8.265195502867067, "learning_rate": 1.2339471541395304e-06, "loss": 0.6791805624961853, "step": 1500 }, { "epoch": 2.3863275039745626, "grad_norm": 8.707414478394726, "learning_rate": 1.2278685764712356e-06, "loss": 0.4915718138217926, "step": 1501 }, { "epoch": 2.3879173290937996, "grad_norm": 14.070274594206333, "learning_rate": 1.2218029113246616e-06, "loss": 0.37402093410491943, "step": 1502 }, { "epoch": 2.3895071542130366, "grad_norm": 7.969660705498184, "learning_rate": 1.2157501794634118e-06, "loss": 0.5574676990509033, "step": 1503 }, { "epoch": 2.3910969793322736, "grad_norm": 13.836136257943703, "learning_rate": 1.2097104016068146e-06, "loss": 1.1037708520889282, "step": 1504 }, { "epoch": 2.39268680445151, "grad_norm": 13.651653278356111, "learning_rate": 1.203683598429855e-06, "loss": 0.5910956859588623, "step": 1505 }, { "epoch": 2.394276629570747, "grad_norm": 9.494496018410414, "learning_rate": 1.1976697905631036e-06, "loss": 0.6444679498672485, "step": 1506 }, { "epoch": 2.395866454689984, "grad_norm": 9.573835932767102, "learning_rate": 1.1916689985926494e-06, "loss": 0.41417208313941956, "step": 1507 }, { "epoch": 2.397456279809221, "grad_norm": 10.069807567033948, "learning_rate": 1.1856812430600228e-06, "loss": 0.7423563599586487, "step": 1508 }, { "epoch": 2.3990461049284577, "grad_norm": 7.835882290899644, "learning_rate": 1.1797065444621286e-06, "loss": 0.44855624437332153, "step": 1509 }, { "epoch": 2.4006359300476947, "grad_norm": 12.999112533935575, "learning_rate": 1.1737449232511799e-06, "loss": 0.5191413760185242, "step": 1510 }, { "epoch": 2.4022257551669317, "grad_norm": 15.62081982034232, "learning_rate": 1.1677963998346182e-06, "loss": 0.3923201560974121, "step": 1511 }, { "epoch": 2.4038155802861687, "grad_norm": 11.802569751706425, "learning_rate": 1.1618609945750558e-06, "loss": 0.6530142426490784, "step": 1512 }, { "epoch": 2.4054054054054053, "grad_norm": 7.39912848585775, "learning_rate": 1.1559387277901958e-06, "loss": 0.18781472742557526, "step": 1513 }, { "epoch": 2.4069952305246423, "grad_norm": 12.320828388785662, "learning_rate": 1.1500296197527643e-06, "loss": 0.739341676235199, "step": 1514 }, { "epoch": 2.4085850556438793, "grad_norm": 10.052444335743562, "learning_rate": 1.1441336906904504e-06, "loss": 0.3746979534626007, "step": 1515 }, { "epoch": 2.4101748807631163, "grad_norm": 10.274927395942361, "learning_rate": 1.1382509607858233e-06, "loss": 0.5018946528434753, "step": 1516 }, { "epoch": 2.411764705882353, "grad_norm": 12.618193233095981, "learning_rate": 1.1323814501762714e-06, "loss": 0.4841238856315613, "step": 1517 }, { "epoch": 2.41335453100159, "grad_norm": 9.154264171272432, "learning_rate": 1.126525178953935e-06, "loss": 0.6009554862976074, "step": 1518 }, { "epoch": 2.414944356120827, "grad_norm": 13.673528451375187, "learning_rate": 1.1206821671656277e-06, "loss": 0.4096434712409973, "step": 1519 }, { "epoch": 2.4165341812400634, "grad_norm": 10.476072583039118, "learning_rate": 1.114852434812781e-06, "loss": 0.3984518051147461, "step": 1520 }, { "epoch": 2.4181240063593004, "grad_norm": 7.612865941416167, "learning_rate": 1.1090360018513652e-06, "loss": 0.3841229975223541, "step": 1521 }, { "epoch": 2.4197138314785374, "grad_norm": 10.054845231558906, "learning_rate": 1.1032328881918237e-06, "loss": 0.35508057475090027, "step": 1522 }, { "epoch": 2.4213036565977744, "grad_norm": 10.924634607628944, "learning_rate": 1.0974431136990115e-06, "loss": 0.538759708404541, "step": 1523 }, { "epoch": 2.4228934817170114, "grad_norm": 9.906978216623733, "learning_rate": 1.0916666981921164e-06, "loss": 0.267231285572052, "step": 1524 }, { "epoch": 2.424483306836248, "grad_norm": 8.161633923040958, "learning_rate": 1.0859036614445977e-06, "loss": 0.20279455184936523, "step": 1525 }, { "epoch": 2.426073131955485, "grad_norm": 9.59102246457474, "learning_rate": 1.0801540231841213e-06, "loss": 0.2875203490257263, "step": 1526 }, { "epoch": 2.427662957074722, "grad_norm": 8.82762418705778, "learning_rate": 1.0744178030924817e-06, "loss": 0.4241867661476135, "step": 1527 }, { "epoch": 2.4292527821939585, "grad_norm": 12.305547733445321, "learning_rate": 1.0686950208055486e-06, "loss": 0.5881316065788269, "step": 1528 }, { "epoch": 2.4308426073131955, "grad_norm": 10.00443798673381, "learning_rate": 1.0629856959131861e-06, "loss": 0.26692256331443787, "step": 1529 }, { "epoch": 2.4324324324324325, "grad_norm": 11.916760785837651, "learning_rate": 1.0572898479591942e-06, "loss": 0.390109121799469, "step": 1530 }, { "epoch": 2.4340222575516695, "grad_norm": 12.113929514101105, "learning_rate": 1.05160749644124e-06, "loss": 0.39742356538772583, "step": 1531 }, { "epoch": 2.435612082670906, "grad_norm": 9.975231061986387, "learning_rate": 1.045938660810788e-06, "loss": 0.18685436248779297, "step": 1532 }, { "epoch": 2.437201907790143, "grad_norm": 14.781171323125493, "learning_rate": 1.04028336047304e-06, "loss": 1.9972474575042725, "step": 1533 }, { "epoch": 2.43879173290938, "grad_norm": 12.799015715830842, "learning_rate": 1.034641614786862e-06, "loss": 0.896147608757019, "step": 1534 }, { "epoch": 2.440381558028617, "grad_norm": 14.960349944848524, "learning_rate": 1.0290134430647196e-06, "loss": 0.6048048734664917, "step": 1535 }, { "epoch": 2.4419713831478536, "grad_norm": 9.68263048334465, "learning_rate": 1.0233988645726166e-06, "loss": 0.3319162130355835, "step": 1536 }, { "epoch": 2.4435612082670906, "grad_norm": 11.347543341617659, "learning_rate": 1.0177978985300203e-06, "loss": 1.3338065147399902, "step": 1537 }, { "epoch": 2.4451510333863276, "grad_norm": 13.339900500358667, "learning_rate": 1.0122105641098062e-06, "loss": 0.36019963026046753, "step": 1538 }, { "epoch": 2.4467408585055646, "grad_norm": 9.811098173161305, "learning_rate": 1.0066368804381833e-06, "loss": 0.46575456857681274, "step": 1539 }, { "epoch": 2.448330683624801, "grad_norm": 11.560409893961497, "learning_rate": 1.0010768665946309e-06, "loss": 0.29183441400527954, "step": 1540 }, { "epoch": 2.449920508744038, "grad_norm": 14.482821614446557, "learning_rate": 9.9553054161184e-07, "loss": 0.3994084298610687, "step": 1541 }, { "epoch": 2.451510333863275, "grad_norm": 10.71186356653993, "learning_rate": 9.899979244756358e-07, "loss": 0.6856818795204163, "step": 1542 }, { "epoch": 2.453100158982512, "grad_norm": 15.849209979348569, "learning_rate": 9.844790341249276e-07, "loss": 0.29254722595214844, "step": 1543 }, { "epoch": 2.4546899841017487, "grad_norm": 13.001389500403421, "learning_rate": 9.789738894516294e-07, "loss": 0.5249795913696289, "step": 1544 }, { "epoch": 2.4562798092209857, "grad_norm": 11.939268619149114, "learning_rate": 9.734825093006034e-07, "loss": 0.5683445930480957, "step": 1545 }, { "epoch": 2.4578696343402227, "grad_norm": 15.97735060116779, "learning_rate": 9.680049124695973e-07, "loss": 1.1561030149459839, "step": 1546 }, { "epoch": 2.4594594594594597, "grad_norm": 12.890313458125426, "learning_rate": 9.625411177091731e-07, "loss": 0.5048433542251587, "step": 1547 }, { "epoch": 2.461049284578696, "grad_norm": 13.662968435753552, "learning_rate": 9.570911437226454e-07, "loss": 0.416229248046875, "step": 1548 }, { "epoch": 2.462639109697933, "grad_norm": 13.285516516957886, "learning_rate": 9.516550091660237e-07, "loss": 0.4497603178024292, "step": 1549 }, { "epoch": 2.46422893481717, "grad_norm": 6.541102361228737, "learning_rate": 9.462327326479376e-07, "loss": 0.38231202960014343, "step": 1550 }, { "epoch": 2.4658187599364068, "grad_norm": 10.61910465149233, "learning_rate": 9.408243327295835e-07, "loss": 0.34195244312286377, "step": 1551 }, { "epoch": 2.4674085850556438, "grad_norm": 8.454641509882954, "learning_rate": 9.35429827924652e-07, "loss": 0.5992942452430725, "step": 1552 }, { "epoch": 2.4689984101748808, "grad_norm": 10.462490419970619, "learning_rate": 9.300492366992708e-07, "loss": 0.49838757514953613, "step": 1553 }, { "epoch": 2.4705882352941178, "grad_norm": 11.436890037466688, "learning_rate": 9.246825774719409e-07, "loss": 0.42259490489959717, "step": 1554 }, { "epoch": 2.4721780604133547, "grad_norm": 14.950258496228066, "learning_rate": 9.193298686134699e-07, "loss": 0.6221784949302673, "step": 1555 }, { "epoch": 2.4737678855325913, "grad_norm": 8.371376759124747, "learning_rate": 9.139911284469111e-07, "loss": 0.2900388836860657, "step": 1556 }, { "epoch": 2.4753577106518283, "grad_norm": 7.643866077422786, "learning_rate": 9.086663752475061e-07, "loss": 0.2714345455169678, "step": 1557 }, { "epoch": 2.4769475357710653, "grad_norm": 10.447803262264925, "learning_rate": 9.033556272426075e-07, "loss": 0.24355517327785492, "step": 1558 }, { "epoch": 2.478537360890302, "grad_norm": 23.242538512807673, "learning_rate": 8.980589026116365e-07, "loss": 0.6441739797592163, "step": 1559 }, { "epoch": 2.480127186009539, "grad_norm": 6.995497707685557, "learning_rate": 8.927762194860034e-07, "loss": 0.38172465562820435, "step": 1560 }, { "epoch": 2.481717011128776, "grad_norm": 10.39319097779616, "learning_rate": 8.87507595949057e-07, "loss": 0.42091259360313416, "step": 1561 }, { "epoch": 2.483306836248013, "grad_norm": 9.574348703016542, "learning_rate": 8.822530500360149e-07, "loss": 0.463512659072876, "step": 1562 }, { "epoch": 2.48489666136725, "grad_norm": 13.936979413116735, "learning_rate": 8.770125997339058e-07, "loss": 0.414907842874527, "step": 1563 }, { "epoch": 2.4864864864864864, "grad_norm": 7.396202045732436, "learning_rate": 8.717862629815099e-07, "loss": 0.27465397119522095, "step": 1564 }, { "epoch": 2.4880763116057234, "grad_norm": 10.039782203585236, "learning_rate": 8.665740576692905e-07, "loss": 0.3823194205760956, "step": 1565 }, { "epoch": 2.4896661367249604, "grad_norm": 14.621887501884695, "learning_rate": 8.613760016393396e-07, "loss": 0.4154517948627472, "step": 1566 }, { "epoch": 2.491255961844197, "grad_norm": 10.717313100279354, "learning_rate": 8.561921126853151e-07, "loss": 0.6768748760223389, "step": 1567 }, { "epoch": 2.492845786963434, "grad_norm": 15.849704032961357, "learning_rate": 8.510224085523755e-07, "loss": 0.8631222248077393, "step": 1568 }, { "epoch": 2.494435612082671, "grad_norm": 9.36745553642082, "learning_rate": 8.458669069371278e-07, "loss": 0.6986095905303955, "step": 1569 }, { "epoch": 2.496025437201908, "grad_norm": 9.584416315406488, "learning_rate": 8.407256254875573e-07, "loss": 0.48314929008483887, "step": 1570 }, { "epoch": 2.4976152623211445, "grad_norm": 17.063669106186904, "learning_rate": 8.355985818029733e-07, "loss": 0.3524690866470337, "step": 1571 }, { "epoch": 2.4992050874403815, "grad_norm": 10.396412903566285, "learning_rate": 8.304857934339494e-07, "loss": 0.5200834274291992, "step": 1572 }, { "epoch": 2.5007949125596185, "grad_norm": 7.711979616193333, "learning_rate": 8.253872778822564e-07, "loss": 0.2625265419483185, "step": 1573 }, { "epoch": 2.502384737678855, "grad_norm": 9.955593458884781, "learning_rate": 8.203030526008132e-07, "loss": 0.3811442255973816, "step": 1574 }, { "epoch": 2.503974562798092, "grad_norm": 18.040384809390915, "learning_rate": 8.152331349936177e-07, "loss": 0.3784019351005554, "step": 1575 }, { "epoch": 2.505564387917329, "grad_norm": 9.853858936601155, "learning_rate": 8.101775424156888e-07, "loss": 0.526119589805603, "step": 1576 }, { "epoch": 2.507154213036566, "grad_norm": 14.200070785171242, "learning_rate": 8.051362921730139e-07, "loss": 0.3691103458404541, "step": 1577 }, { "epoch": 2.508744038155803, "grad_norm": 10.184463467888294, "learning_rate": 8.001094015224813e-07, "loss": 0.9008461236953735, "step": 1578 }, { "epoch": 2.5103338632750396, "grad_norm": 10.603969424774617, "learning_rate": 7.95096887671824e-07, "loss": 0.4990682005882263, "step": 1579 }, { "epoch": 2.5119236883942766, "grad_norm": 10.785510067750703, "learning_rate": 7.900987677795646e-07, "loss": 0.48411014676094055, "step": 1580 }, { "epoch": 2.5135135135135136, "grad_norm": 14.022141952676426, "learning_rate": 7.851150589549483e-07, "loss": 0.32116425037384033, "step": 1581 }, { "epoch": 2.51510333863275, "grad_norm": 8.850164925121264, "learning_rate": 7.801457782578947e-07, "loss": 0.2642374336719513, "step": 1582 }, { "epoch": 2.516693163751987, "grad_norm": 8.03220177579705, "learning_rate": 7.751909426989296e-07, "loss": 0.3452329933643341, "step": 1583 }, { "epoch": 2.518282988871224, "grad_norm": 9.267574245471948, "learning_rate": 7.702505692391332e-07, "loss": 0.21324273943901062, "step": 1584 }, { "epoch": 2.519872813990461, "grad_norm": 15.753336355835474, "learning_rate": 7.653246747900794e-07, "loss": 0.45646774768829346, "step": 1585 }, { "epoch": 2.521462639109698, "grad_norm": 17.20248048464396, "learning_rate": 7.604132762137773e-07, "loss": 0.7054089307785034, "step": 1586 }, { "epoch": 2.5230524642289347, "grad_norm": 16.001424238132984, "learning_rate": 7.555163903226182e-07, "loss": 0.22410085797309875, "step": 1587 }, { "epoch": 2.5246422893481717, "grad_norm": 37.693778212601856, "learning_rate": 7.506340338793111e-07, "loss": 0.45611119270324707, "step": 1588 }, { "epoch": 2.5262321144674087, "grad_norm": 11.700490279132813, "learning_rate": 7.457662235968283e-07, "loss": 0.4790339171886444, "step": 1589 }, { "epoch": 2.5278219395866453, "grad_norm": 11.098838776133144, "learning_rate": 7.409129761383527e-07, "loss": 0.3785508871078491, "step": 1590 }, { "epoch": 2.5294117647058822, "grad_norm": 16.927801069701665, "learning_rate": 7.360743081172122e-07, "loss": 0.4303959608078003, "step": 1591 }, { "epoch": 2.5310015898251192, "grad_norm": 10.982760647223845, "learning_rate": 7.312502360968305e-07, "loss": 0.7339632511138916, "step": 1592 }, { "epoch": 2.5325914149443562, "grad_norm": 10.853451470652951, "learning_rate": 7.26440776590665e-07, "loss": 0.23932099342346191, "step": 1593 }, { "epoch": 2.5341812400635932, "grad_norm": 12.964493817987583, "learning_rate": 7.216459460621528e-07, "loss": 0.6250436305999756, "step": 1594 }, { "epoch": 2.53577106518283, "grad_norm": 7.910019100161251, "learning_rate": 7.16865760924656e-07, "loss": 0.1341008096933365, "step": 1595 }, { "epoch": 2.537360890302067, "grad_norm": 13.263447526640324, "learning_rate": 7.121002375413999e-07, "loss": 0.36424165964126587, "step": 1596 }, { "epoch": 2.538950715421304, "grad_norm": 14.3513570530778, "learning_rate": 7.073493922254254e-07, "loss": 0.2999057471752167, "step": 1597 }, { "epoch": 2.5405405405405403, "grad_norm": 10.559755824923737, "learning_rate": 7.026132412395247e-07, "loss": 0.7477350234985352, "step": 1598 }, { "epoch": 2.5421303656597773, "grad_norm": 8.702177987866476, "learning_rate": 6.978918007961888e-07, "loss": 0.20698505640029907, "step": 1599 }, { "epoch": 2.5437201907790143, "grad_norm": 7.471293346977418, "learning_rate": 6.931850870575563e-07, "loss": 0.2608323097229004, "step": 1600 }, { "epoch": 2.5453100158982513, "grad_norm": 15.229612340693974, "learning_rate": 6.884931161353509e-07, "loss": 0.4971431493759155, "step": 1601 }, { "epoch": 2.5468998410174883, "grad_norm": 9.507301452331756, "learning_rate": 6.838159040908294e-07, "loss": 0.37410780787467957, "step": 1602 }, { "epoch": 2.548489666136725, "grad_norm": 13.095317314498544, "learning_rate": 6.791534669347311e-07, "loss": 0.807184100151062, "step": 1603 }, { "epoch": 2.550079491255962, "grad_norm": 6.730399733139424, "learning_rate": 6.745058206272132e-07, "loss": 0.38807761669158936, "step": 1604 }, { "epoch": 2.551669316375199, "grad_norm": 8.515536935569736, "learning_rate": 6.698729810778065e-07, "loss": 0.5509893894195557, "step": 1605 }, { "epoch": 2.5532591414944354, "grad_norm": 12.284475731688362, "learning_rate": 6.652549641453543e-07, "loss": 0.20167602598667145, "step": 1606 }, { "epoch": 2.5548489666136724, "grad_norm": 12.986517686509385, "learning_rate": 6.606517856379585e-07, "loss": 0.695963978767395, "step": 1607 }, { "epoch": 2.5564387917329094, "grad_norm": 10.273584333782026, "learning_rate": 6.560634613129308e-07, "loss": 0.6035486459732056, "step": 1608 }, { "epoch": 2.5580286168521464, "grad_norm": 17.874198920320232, "learning_rate": 6.514900068767316e-07, "loss": 0.7390936613082886, "step": 1609 }, { "epoch": 2.559618441971383, "grad_norm": 16.7843093457424, "learning_rate": 6.469314379849212e-07, "loss": 0.9999498128890991, "step": 1610 }, { "epoch": 2.56120826709062, "grad_norm": 8.422352180286689, "learning_rate": 6.423877702421038e-07, "loss": 0.21220804750919342, "step": 1611 }, { "epoch": 2.562798092209857, "grad_norm": 8.76100880894034, "learning_rate": 6.378590192018752e-07, "loss": 0.30193108320236206, "step": 1612 }, { "epoch": 2.5643879173290935, "grad_norm": 11.065829417180284, "learning_rate": 6.333452003667712e-07, "loss": 0.4609653353691101, "step": 1613 }, { "epoch": 2.5659777424483305, "grad_norm": 7.808157083980439, "learning_rate": 6.288463291882085e-07, "loss": 0.1653745174407959, "step": 1614 }, { "epoch": 2.5675675675675675, "grad_norm": 11.188389838014922, "learning_rate": 6.243624210664406e-07, "loss": 0.4522320032119751, "step": 1615 }, { "epoch": 2.5691573926868045, "grad_norm": 11.317333524643795, "learning_rate": 6.198934913504978e-07, "loss": 0.2544410228729248, "step": 1616 }, { "epoch": 2.5707472178060415, "grad_norm": 12.812176029042877, "learning_rate": 6.15439555338136e-07, "loss": 0.6666281223297119, "step": 1617 }, { "epoch": 2.572337042925278, "grad_norm": 16.036796436324735, "learning_rate": 6.110006282757897e-07, "loss": 1.2459876537322998, "step": 1618 }, { "epoch": 2.573926868044515, "grad_norm": 10.515581594009006, "learning_rate": 6.065767253585125e-07, "loss": 0.3247292637825012, "step": 1619 }, { "epoch": 2.575516693163752, "grad_norm": 12.829504232267913, "learning_rate": 6.021678617299271e-07, "loss": 0.709840714931488, "step": 1620 }, { "epoch": 2.5771065182829886, "grad_norm": 13.365765765443042, "learning_rate": 5.977740524821796e-07, "loss": 0.3702651858329773, "step": 1621 }, { "epoch": 2.5786963434022256, "grad_norm": 12.440548025842865, "learning_rate": 5.933953126558772e-07, "loss": 0.7516118288040161, "step": 1622 }, { "epoch": 2.5802861685214626, "grad_norm": 10.061767061306744, "learning_rate": 5.890316572400478e-07, "loss": 0.2932838797569275, "step": 1623 }, { "epoch": 2.5818759936406996, "grad_norm": 10.82001039911602, "learning_rate": 5.846831011720789e-07, "loss": 0.45325106382369995, "step": 1624 }, { "epoch": 2.5834658187599366, "grad_norm": 11.537942990883321, "learning_rate": 5.803496593376722e-07, "loss": 0.38455528020858765, "step": 1625 }, { "epoch": 2.585055643879173, "grad_norm": 14.921829719693056, "learning_rate": 5.76031346570794e-07, "loss": 0.1773259937763214, "step": 1626 }, { "epoch": 2.58664546899841, "grad_norm": 16.04680262394032, "learning_rate": 5.717281776536166e-07, "loss": 0.83031165599823, "step": 1627 }, { "epoch": 2.588235294117647, "grad_norm": 15.320530679465007, "learning_rate": 5.674401673164781e-07, "loss": 0.39672306180000305, "step": 1628 }, { "epoch": 2.5898251192368837, "grad_norm": 11.056819549654698, "learning_rate": 5.631673302378238e-07, "loss": 0.6218395829200745, "step": 1629 }, { "epoch": 2.5914149443561207, "grad_norm": 7.723791379520601, "learning_rate": 5.589096810441574e-07, "loss": 0.29606300592422485, "step": 1630 }, { "epoch": 2.5930047694753577, "grad_norm": 18.886174979076156, "learning_rate": 5.546672343099968e-07, "loss": 0.5936672687530518, "step": 1631 }, { "epoch": 2.5945945945945947, "grad_norm": 10.563788848015669, "learning_rate": 5.504400045578167e-07, "loss": 0.4816802144050598, "step": 1632 }, { "epoch": 2.5961844197138317, "grad_norm": 14.40919010146339, "learning_rate": 5.462280062580011e-07, "loss": 0.7849152684211731, "step": 1633 }, { "epoch": 2.5977742448330683, "grad_norm": 12.083284436625968, "learning_rate": 5.420312538287981e-07, "loss": 0.23630741238594055, "step": 1634 }, { "epoch": 2.5993640699523053, "grad_norm": 17.052932534514387, "learning_rate": 5.378497616362638e-07, "loss": 0.5553884506225586, "step": 1635 }, { "epoch": 2.6009538950715423, "grad_norm": 13.1190492109006, "learning_rate": 5.3368354399422e-07, "loss": 0.2689938545227051, "step": 1636 }, { "epoch": 2.602543720190779, "grad_norm": 9.06212736745851, "learning_rate": 5.295326151641966e-07, "loss": 0.4750140309333801, "step": 1637 }, { "epoch": 2.604133545310016, "grad_norm": 11.792696999459864, "learning_rate": 5.253969893553929e-07, "loss": 0.2415129542350769, "step": 1638 }, { "epoch": 2.605723370429253, "grad_norm": 7.8365374598623765, "learning_rate": 5.212766807246206e-07, "loss": 0.20322957634925842, "step": 1639 }, { "epoch": 2.60731319554849, "grad_norm": 12.072105526436122, "learning_rate": 5.171717033762585e-07, "loss": 0.7623979449272156, "step": 1640 }, { "epoch": 2.6089030206677264, "grad_norm": 12.667251885854688, "learning_rate": 5.130820713622076e-07, "loss": 0.5517823696136475, "step": 1641 }, { "epoch": 2.6104928457869634, "grad_norm": 12.32673641615129, "learning_rate": 5.090077986818365e-07, "loss": 0.6303294897079468, "step": 1642 }, { "epoch": 2.6120826709062004, "grad_norm": 6.0750732760489905, "learning_rate": 5.049488992819373e-07, "loss": 0.14904797077178955, "step": 1643 }, { "epoch": 2.613672496025437, "grad_norm": 12.847502830626928, "learning_rate": 5.009053870566793e-07, "loss": 0.9003888964653015, "step": 1644 }, { "epoch": 2.615262321144674, "grad_norm": 12.970180432873189, "learning_rate": 4.968772758475554e-07, "loss": 0.7371312379837036, "step": 1645 }, { "epoch": 2.616852146263911, "grad_norm": 8.484598724820248, "learning_rate": 4.92864579443344e-07, "loss": 0.2992517948150635, "step": 1646 }, { "epoch": 2.618441971383148, "grad_norm": 10.894264933359624, "learning_rate": 4.888673115800519e-07, "loss": 0.39728879928588867, "step": 1647 }, { "epoch": 2.620031796502385, "grad_norm": 9.56178561855161, "learning_rate": 4.848854859408731e-07, "loss": 0.7288751602172852, "step": 1648 }, { "epoch": 2.6216216216216215, "grad_norm": 9.516919345378364, "learning_rate": 4.809191161561432e-07, "loss": 0.36588314175605774, "step": 1649 }, { "epoch": 2.6232114467408585, "grad_norm": 13.863795640760882, "learning_rate": 4.769682158032873e-07, "loss": 0.3402399718761444, "step": 1650 }, { "epoch": 2.6248012718600955, "grad_norm": 15.993452666820417, "learning_rate": 4.7303279840677675e-07, "loss": 0.5849899649620056, "step": 1651 }, { "epoch": 2.626391096979332, "grad_norm": 7.7860113567275056, "learning_rate": 4.6911287743808486e-07, "loss": 0.15354162454605103, "step": 1652 }, { "epoch": 2.627980922098569, "grad_norm": 7.792147534654751, "learning_rate": 4.652084663156364e-07, "loss": 0.10557040572166443, "step": 1653 }, { "epoch": 2.629570747217806, "grad_norm": 12.969276276510007, "learning_rate": 4.613195784047653e-07, "loss": 0.7443052530288696, "step": 1654 }, { "epoch": 2.631160572337043, "grad_norm": 23.001421263013384, "learning_rate": 4.574462270176666e-07, "loss": 0.4804832935333252, "step": 1655 }, { "epoch": 2.63275039745628, "grad_norm": 8.452243256757463, "learning_rate": 4.5358842541335047e-07, "loss": 0.4740391969680786, "step": 1656 }, { "epoch": 2.6343402225755166, "grad_norm": 9.240565385002999, "learning_rate": 4.4974618679760164e-07, "loss": 0.4672290086746216, "step": 1657 }, { "epoch": 2.6359300476947536, "grad_norm": 11.37108360105448, "learning_rate": 4.4591952432292584e-07, "loss": 0.9921671152114868, "step": 1658 }, { "epoch": 2.6375198728139906, "grad_norm": 9.649192907468974, "learning_rate": 4.421084510885143e-07, "loss": 0.7385834455490112, "step": 1659 }, { "epoch": 2.639109697933227, "grad_norm": 19.59520026392758, "learning_rate": 4.3831298014019144e-07, "loss": 0.8268567323684692, "step": 1660 }, { "epoch": 2.640699523052464, "grad_norm": 16.78123363872058, "learning_rate": 4.34533124470371e-07, "loss": 0.3472693860530853, "step": 1661 }, { "epoch": 2.642289348171701, "grad_norm": 11.874912683513834, "learning_rate": 4.3076889701801905e-07, "loss": 0.2339482307434082, "step": 1662 }, { "epoch": 2.643879173290938, "grad_norm": 8.252398533852787, "learning_rate": 4.2702031066859993e-07, "loss": 0.351040780544281, "step": 1663 }, { "epoch": 2.645468998410175, "grad_norm": 10.127133252654218, "learning_rate": 4.2328737825403645e-07, "loss": 0.7667810916900635, "step": 1664 }, { "epoch": 2.6470588235294117, "grad_norm": 9.643925839418634, "learning_rate": 4.195701125526674e-07, "loss": 0.4055326282978058, "step": 1665 }, { "epoch": 2.6486486486486487, "grad_norm": 14.210118121798061, "learning_rate": 4.1586852628920095e-07, "loss": 0.9189319610595703, "step": 1666 }, { "epoch": 2.6502384737678857, "grad_norm": 8.495696836045818, "learning_rate": 4.121826321346739e-07, "loss": 0.49973970651626587, "step": 1667 }, { "epoch": 2.6518282988871222, "grad_norm": 10.94379871167478, "learning_rate": 4.085124427064052e-07, "loss": 0.5071969628334045, "step": 1668 }, { "epoch": 2.6534181240063592, "grad_norm": 11.065890666647261, "learning_rate": 4.0485797056795675e-07, "loss": 0.5154078006744385, "step": 1669 }, { "epoch": 2.6550079491255962, "grad_norm": 10.462810011955655, "learning_rate": 4.0121922822908556e-07, "loss": 0.6714562177658081, "step": 1670 }, { "epoch": 2.6565977742448332, "grad_norm": 8.423522122403073, "learning_rate": 3.975962281457035e-07, "loss": 0.32664385437965393, "step": 1671 }, { "epoch": 2.65818759936407, "grad_norm": 11.128517439642401, "learning_rate": 3.939889827198362e-07, "loss": 0.4714875817298889, "step": 1672 }, { "epoch": 2.659777424483307, "grad_norm": 13.923378222882656, "learning_rate": 3.9039750429957835e-07, "loss": 0.33897116780281067, "step": 1673 }, { "epoch": 2.661367249602544, "grad_norm": 20.032379341723246, "learning_rate": 3.868218051790501e-07, "loss": 2.706486940383911, "step": 1674 }, { "epoch": 2.6629570747217803, "grad_norm": 10.415209305400058, "learning_rate": 3.8326189759836097e-07, "loss": 0.6098494529724121, "step": 1675 }, { "epoch": 2.6645468998410173, "grad_norm": 10.828557752450559, "learning_rate": 3.7971779374355866e-07, "loss": 0.3058406412601471, "step": 1676 }, { "epoch": 2.6661367249602543, "grad_norm": 11.462076872010293, "learning_rate": 3.7618950574659807e-07, "loss": 0.3489352762699127, "step": 1677 }, { "epoch": 2.6677265500794913, "grad_norm": 6.422738759532046, "learning_rate": 3.7267704568529015e-07, "loss": 0.23481842875480652, "step": 1678 }, { "epoch": 2.6693163751987283, "grad_norm": 17.138628565054685, "learning_rate": 3.6918042558326597e-07, "loss": 0.5964177846908569, "step": 1679 }, { "epoch": 2.670906200317965, "grad_norm": 15.512416207448021, "learning_rate": 3.6569965740993475e-07, "loss": 0.4641593098640442, "step": 1680 }, { "epoch": 2.672496025437202, "grad_norm": 17.229910284953995, "learning_rate": 3.622347530804415e-07, "loss": 0.603171706199646, "step": 1681 }, { "epoch": 2.674085850556439, "grad_norm": 15.562808091273189, "learning_rate": 3.5878572445562754e-07, "loss": 0.6970657110214233, "step": 1682 }, { "epoch": 2.6756756756756754, "grad_norm": 12.024300997243945, "learning_rate": 3.553525833419902e-07, "loss": 0.4082297384738922, "step": 1683 }, { "epoch": 2.6772655007949124, "grad_norm": 10.420633401843734, "learning_rate": 3.519353414916404e-07, "loss": 0.2764531970024109, "step": 1684 }, { "epoch": 2.6788553259141494, "grad_norm": 15.681718598155335, "learning_rate": 3.48534010602265e-07, "loss": 0.8584216237068176, "step": 1685 }, { "epoch": 2.6804451510333864, "grad_norm": 9.83380597552202, "learning_rate": 3.4514860231708414e-07, "loss": 0.2912900149822235, "step": 1686 }, { "epoch": 2.6820349761526234, "grad_norm": 24.192951811930875, "learning_rate": 3.4177912822481286e-07, "loss": 0.5030669569969177, "step": 1687 }, { "epoch": 2.68362480127186, "grad_norm": 11.88358896966254, "learning_rate": 3.3842559985962363e-07, "loss": 1.0259262323379517, "step": 1688 }, { "epoch": 2.685214626391097, "grad_norm": 11.567529976995225, "learning_rate": 3.3508802870109993e-07, "loss": 0.3652951717376709, "step": 1689 }, { "epoch": 2.686804451510334, "grad_norm": 12.496576402718473, "learning_rate": 3.3176642617420817e-07, "loss": 0.4120873212814331, "step": 1690 }, { "epoch": 2.6883942766295705, "grad_norm": 7.095600452380263, "learning_rate": 3.2846080364924373e-07, "loss": 0.1631387323141098, "step": 1691 }, { "epoch": 2.6899841017488075, "grad_norm": 9.065489804730754, "learning_rate": 3.251711724418072e-07, "loss": 1.10606050491333, "step": 1692 }, { "epoch": 2.6915739268680445, "grad_norm": 13.879363856578601, "learning_rate": 3.218975438127558e-07, "loss": 0.4376085698604584, "step": 1693 }, { "epoch": 2.6931637519872815, "grad_norm": 10.554489461431915, "learning_rate": 3.1863992896816634e-07, "loss": 0.3944145441055298, "step": 1694 }, { "epoch": 2.6947535771065185, "grad_norm": 9.418832517818354, "learning_rate": 3.153983390593024e-07, "loss": 0.32152724266052246, "step": 1695 }, { "epoch": 2.696343402225755, "grad_norm": 8.092387137721639, "learning_rate": 3.1217278518256844e-07, "loss": 0.28785014152526855, "step": 1696 }, { "epoch": 2.697933227344992, "grad_norm": 10.686618269640556, "learning_rate": 3.089632783794755e-07, "loss": 0.3542977273464203, "step": 1697 }, { "epoch": 2.699523052464229, "grad_norm": 10.530056142263104, "learning_rate": 3.0576982963660575e-07, "loss": 0.45689189434051514, "step": 1698 }, { "epoch": 2.7011128775834656, "grad_norm": 11.937948434636159, "learning_rate": 3.0259244988556977e-07, "loss": 0.657897412776947, "step": 1699 }, { "epoch": 2.7027027027027026, "grad_norm": 7.739921962689363, "learning_rate": 2.9943115000297453e-07, "loss": 0.17261117696762085, "step": 1700 }, { "epoch": 2.7042925278219396, "grad_norm": 11.964371899552889, "learning_rate": 2.962859408103808e-07, "loss": 0.5867359638214111, "step": 1701 }, { "epoch": 2.7058823529411766, "grad_norm": 22.050755638219414, "learning_rate": 2.93156833074269e-07, "loss": 0.38635432720184326, "step": 1702 }, { "epoch": 2.7074721780604136, "grad_norm": 12.547226895622131, "learning_rate": 2.9004383750600495e-07, "loss": 0.679887056350708, "step": 1703 }, { "epoch": 2.70906200317965, "grad_norm": 14.929427715187519, "learning_rate": 2.869469647617967e-07, "loss": 0.9920786619186401, "step": 1704 }, { "epoch": 2.710651828298887, "grad_norm": 11.228509815017409, "learning_rate": 2.8386622544266273e-07, "loss": 0.34728148579597473, "step": 1705 }, { "epoch": 2.7122416534181237, "grad_norm": 15.131913430676436, "learning_rate": 2.808016300943961e-07, "loss": 0.5798835158348083, "step": 1706 }, { "epoch": 2.7138314785373607, "grad_norm": 11.26085110975895, "learning_rate": 2.777531892075253e-07, "loss": 0.4404895603656769, "step": 1707 }, { "epoch": 2.7154213036565977, "grad_norm": 12.7970246377021, "learning_rate": 2.7472091321728067e-07, "loss": 0.2727869749069214, "step": 1708 }, { "epoch": 2.7170111287758347, "grad_norm": 6.175092667265632, "learning_rate": 2.717048125035582e-07, "loss": 0.4119480550289154, "step": 1709 }, { "epoch": 2.7186009538950717, "grad_norm": 9.03462824372083, "learning_rate": 2.6870489739088124e-07, "loss": 0.3013087511062622, "step": 1710 }, { "epoch": 2.7201907790143083, "grad_norm": 11.724798586268738, "learning_rate": 2.6572117814837096e-07, "loss": 1.1680785417556763, "step": 1711 }, { "epoch": 2.7217806041335453, "grad_norm": 14.084016441362202, "learning_rate": 2.6275366498970553e-07, "loss": 0.44381779432296753, "step": 1712 }, { "epoch": 2.7233704292527823, "grad_norm": 7.543543684045383, "learning_rate": 2.598023680730899e-07, "loss": 0.39759576320648193, "step": 1713 }, { "epoch": 2.724960254372019, "grad_norm": 12.813558667772158, "learning_rate": 2.568672975012154e-07, "loss": 0.29264506697654724, "step": 1714 }, { "epoch": 2.726550079491256, "grad_norm": 15.940170547428268, "learning_rate": 2.5394846332123026e-07, "loss": 0.38988327980041504, "step": 1715 }, { "epoch": 2.728139904610493, "grad_norm": 9.447351866023746, "learning_rate": 2.510458755247042e-07, "loss": 0.3465936779975891, "step": 1716 }, { "epoch": 2.72972972972973, "grad_norm": 8.66900468551076, "learning_rate": 2.4815954404759034e-07, "loss": 0.32057106494903564, "step": 1717 }, { "epoch": 2.731319554848967, "grad_norm": 11.317074550394812, "learning_rate": 2.4528947877019706e-07, "loss": 0.4713735282421112, "step": 1718 }, { "epoch": 2.7329093799682034, "grad_norm": 11.300950450913268, "learning_rate": 2.424356895171509e-07, "loss": 0.8660344481468201, "step": 1719 }, { "epoch": 2.7344992050874404, "grad_norm": 12.591955390910721, "learning_rate": 2.3959818605736095e-07, "loss": 0.5647845268249512, "step": 1720 }, { "epoch": 2.7360890302066774, "grad_norm": 10.689880018027425, "learning_rate": 2.3677697810399135e-07, "loss": 0.7428934574127197, "step": 1721 }, { "epoch": 2.737678855325914, "grad_norm": 14.450292631625917, "learning_rate": 2.3397207531442144e-07, "loss": 0.6741989850997925, "step": 1722 }, { "epoch": 2.739268680445151, "grad_norm": 10.934277929099444, "learning_rate": 2.3118348729021856e-07, "loss": 0.3719398081302643, "step": 1723 }, { "epoch": 2.740858505564388, "grad_norm": 8.582688526207399, "learning_rate": 2.284112235771002e-07, "loss": 0.35930246114730835, "step": 1724 }, { "epoch": 2.742448330683625, "grad_norm": 11.584902631146877, "learning_rate": 2.2565529366490312e-07, "loss": 0.35735633969306946, "step": 1725 }, { "epoch": 2.744038155802862, "grad_norm": 8.992750559088263, "learning_rate": 2.229157069875537e-07, "loss": 0.32046449184417725, "step": 1726 }, { "epoch": 2.7456279809220985, "grad_norm": 11.202624312115873, "learning_rate": 2.2019247292303148e-07, "loss": 0.33610597252845764, "step": 1727 }, { "epoch": 2.7472178060413355, "grad_norm": 11.550655643274695, "learning_rate": 2.174856007933379e-07, "loss": 0.3931558132171631, "step": 1728 }, { "epoch": 2.7488076311605725, "grad_norm": 14.989140380847523, "learning_rate": 2.1479509986446822e-07, "loss": 0.32899802923202515, "step": 1729 }, { "epoch": 2.750397456279809, "grad_norm": 9.482109135796849, "learning_rate": 2.1212097934637356e-07, "loss": 0.7810277938842773, "step": 1730 }, { "epoch": 2.751987281399046, "grad_norm": 15.35055441486757, "learning_rate": 2.094632483929354e-07, "loss": 0.5776809453964233, "step": 1731 }, { "epoch": 2.753577106518283, "grad_norm": 8.57731695828125, "learning_rate": 2.068219161019297e-07, "loss": 0.6691749095916748, "step": 1732 }, { "epoch": 2.75516693163752, "grad_norm": 7.381290455070177, "learning_rate": 2.0419699151499773e-07, "loss": 0.27193355560302734, "step": 1733 }, { "epoch": 2.756756756756757, "grad_norm": 5.699874460976283, "learning_rate": 2.015884836176163e-07, "loss": 0.1572606861591339, "step": 1734 }, { "epoch": 2.7583465818759936, "grad_norm": 10.571373513647107, "learning_rate": 1.9899640133906384e-07, "loss": 0.436165452003479, "step": 1735 }, { "epoch": 2.7599364069952306, "grad_norm": 12.62306507671066, "learning_rate": 1.964207535523921e-07, "loss": 1.0329084396362305, "step": 1736 }, { "epoch": 2.7615262321144676, "grad_norm": 20.404350098474076, "learning_rate": 1.938615490743967e-07, "loss": 0.9515388011932373, "step": 1737 }, { "epoch": 2.763116057233704, "grad_norm": 16.516993328724855, "learning_rate": 1.9131879666558385e-07, "loss": 0.5335630774497986, "step": 1738 }, { "epoch": 2.764705882352941, "grad_norm": 9.50707389592946, "learning_rate": 1.8879250503014367e-07, "loss": 0.23655389249324799, "step": 1739 }, { "epoch": 2.766295707472178, "grad_norm": 8.11653483870117, "learning_rate": 1.86282682815917e-07, "loss": 0.24722740054130554, "step": 1740 }, { "epoch": 2.767885532591415, "grad_norm": 9.555643771432486, "learning_rate": 1.8378933861436855e-07, "loss": 0.4017741084098816, "step": 1741 }, { "epoch": 2.7694753577106517, "grad_norm": 11.245709313022, "learning_rate": 1.813124809605571e-07, "loss": 0.8513485193252563, "step": 1742 }, { "epoch": 2.7710651828298887, "grad_norm": 10.289858176218264, "learning_rate": 1.788521183331049e-07, "loss": 0.4348328709602356, "step": 1743 }, { "epoch": 2.7726550079491257, "grad_norm": 13.033295111219543, "learning_rate": 1.7640825915416994e-07, "loss": 0.34685057401657104, "step": 1744 }, { "epoch": 2.7742448330683622, "grad_norm": 14.12925895553136, "learning_rate": 1.739809117894148e-07, "loss": 1.3768744468688965, "step": 1745 }, { "epoch": 2.7758346581875992, "grad_norm": 10.75066455008062, "learning_rate": 1.7157008454798395e-07, "loss": 0.5484975576400757, "step": 1746 }, { "epoch": 2.7774244833068362, "grad_norm": 12.245859424903898, "learning_rate": 1.6917578568246717e-07, "loss": 0.6153717041015625, "step": 1747 }, { "epoch": 2.779014308426073, "grad_norm": 8.749615472468598, "learning_rate": 1.6679802338887662e-07, "loss": 0.39723843336105347, "step": 1748 }, { "epoch": 2.78060413354531, "grad_norm": 10.912593855202864, "learning_rate": 1.644368058066187e-07, "loss": 0.5568326711654663, "step": 1749 }, { "epoch": 2.7821939586645468, "grad_norm": 6.709663791277373, "learning_rate": 1.6209214101846394e-07, "loss": 0.1708815097808838, "step": 1750 }, { "epoch": 2.7837837837837838, "grad_norm": 16.67528928478053, "learning_rate": 1.597640370505199e-07, "loss": 0.4418516755104065, "step": 1751 }, { "epoch": 2.7853736089030208, "grad_norm": 14.998369237081626, "learning_rate": 1.5745250187220617e-07, "loss": 1.134207844734192, "step": 1752 }, { "epoch": 2.7869634340222573, "grad_norm": 11.137562040895848, "learning_rate": 1.5515754339622214e-07, "loss": 0.35479578375816345, "step": 1753 }, { "epoch": 2.7885532591414943, "grad_norm": 11.636705668013217, "learning_rate": 1.5287916947852643e-07, "loss": 0.46284008026123047, "step": 1754 }, { "epoch": 2.7901430842607313, "grad_norm": 16.63965921358452, "learning_rate": 1.506173879183026e-07, "loss": 1.0378285646438599, "step": 1755 }, { "epoch": 2.7917329093799683, "grad_norm": 8.155129987603422, "learning_rate": 1.4837220645793905e-07, "loss": 0.39164969325065613, "step": 1756 }, { "epoch": 2.7933227344992053, "grad_norm": 10.211018022040458, "learning_rate": 1.461436327829996e-07, "loss": 0.48248136043548584, "step": 1757 }, { "epoch": 2.794912559618442, "grad_norm": 15.348307269375146, "learning_rate": 1.4393167452219637e-07, "loss": 0.2933463156223297, "step": 1758 }, { "epoch": 2.796502384737679, "grad_norm": 9.875084012501972, "learning_rate": 1.4173633924736364e-07, "loss": 0.20340853929519653, "step": 1759 }, { "epoch": 2.798092209856916, "grad_norm": 9.633227056455548, "learning_rate": 1.3955763447343618e-07, "loss": 0.5475227236747742, "step": 1760 }, { "epoch": 2.7996820349761524, "grad_norm": 12.958123384803194, "learning_rate": 1.3739556765841712e-07, "loss": 0.4999805688858032, "step": 1761 }, { "epoch": 2.8012718600953894, "grad_norm": 8.208968385354538, "learning_rate": 1.3525014620335786e-07, "loss": 0.23228999972343445, "step": 1762 }, { "epoch": 2.8028616852146264, "grad_norm": 10.342733229057272, "learning_rate": 1.3312137745232878e-07, "loss": 0.34087157249450684, "step": 1763 }, { "epoch": 2.8044515103338634, "grad_norm": 7.714862229275131, "learning_rate": 1.3100926869239583e-07, "loss": 0.2724880576133728, "step": 1764 }, { "epoch": 2.8060413354531004, "grad_norm": 9.850744407589888, "learning_rate": 1.289138271535978e-07, "loss": 0.3237707316875458, "step": 1765 }, { "epoch": 2.807631160572337, "grad_norm": 12.26752422629431, "learning_rate": 1.2683506000891634e-07, "loss": 0.3386530876159668, "step": 1766 }, { "epoch": 2.809220985691574, "grad_norm": 9.679795722605048, "learning_rate": 1.2477297437425596e-07, "loss": 0.26200276613235474, "step": 1767 }, { "epoch": 2.810810810810811, "grad_norm": 8.943234925828914, "learning_rate": 1.2272757730841744e-07, "loss": 0.5326122045516968, "step": 1768 }, { "epoch": 2.8124006359300475, "grad_norm": 13.672979508806772, "learning_rate": 1.2069887581307615e-07, "loss": 0.4719829857349396, "step": 1769 }, { "epoch": 2.8139904610492845, "grad_norm": 14.36269478305396, "learning_rate": 1.1868687683275259e-07, "loss": 0.3456736207008362, "step": 1770 }, { "epoch": 2.8155802861685215, "grad_norm": 9.423080558364749, "learning_rate": 1.1669158725479579e-07, "loss": 0.4019232988357544, "step": 1771 }, { "epoch": 2.8171701112877585, "grad_norm": 9.173722366323783, "learning_rate": 1.1471301390935497e-07, "loss": 0.6657088398933411, "step": 1772 }, { "epoch": 2.818759936406995, "grad_norm": 9.737309689447205, "learning_rate": 1.1275116356935622e-07, "loss": 1.166163444519043, "step": 1773 }, { "epoch": 2.820349761526232, "grad_norm": 9.741914720900354, "learning_rate": 1.1080604295048203e-07, "loss": 0.5329183340072632, "step": 1774 }, { "epoch": 2.821939586645469, "grad_norm": 12.579467080243882, "learning_rate": 1.0887765871114731e-07, "loss": 0.3091738522052765, "step": 1775 }, { "epoch": 2.8235294117647056, "grad_norm": 6.7573935666940015, "learning_rate": 1.0696601745247337e-07, "loss": 0.20654040575027466, "step": 1776 }, { "epoch": 2.8251192368839426, "grad_norm": 11.409061390186178, "learning_rate": 1.0507112571827072e-07, "loss": 0.5325148701667786, "step": 1777 }, { "epoch": 2.8267090620031796, "grad_norm": 13.405759164053116, "learning_rate": 1.0319298999501293e-07, "loss": 0.9440799355506897, "step": 1778 }, { "epoch": 2.8282988871224166, "grad_norm": 6.761965750103733, "learning_rate": 1.0133161671181447e-07, "loss": 0.17241652309894562, "step": 1779 }, { "epoch": 2.8298887122416536, "grad_norm": 9.47481619139041, "learning_rate": 9.948701224041124e-08, "loss": 0.28443360328674316, "step": 1780 }, { "epoch": 2.83147853736089, "grad_norm": 7.723901421020683, "learning_rate": 9.765918289513731e-08, "loss": 0.31056225299835205, "step": 1781 }, { "epoch": 2.833068362480127, "grad_norm": 11.520132965767612, "learning_rate": 9.584813493290157e-08, "loss": 0.3421097993850708, "step": 1782 }, { "epoch": 2.834658187599364, "grad_norm": 12.072702849486587, "learning_rate": 9.405387455316884e-08, "loss": 0.3810597062110901, "step": 1783 }, { "epoch": 2.8362480127186007, "grad_norm": 12.027592705127029, "learning_rate": 9.227640789793823e-08, "loss": 0.3737994432449341, "step": 1784 }, { "epoch": 2.8378378378378377, "grad_norm": 7.216954866434148, "learning_rate": 9.051574105172101e-08, "loss": 0.6682506799697876, "step": 1785 }, { "epoch": 2.8394276629570747, "grad_norm": 12.646397663815016, "learning_rate": 8.877188004152104e-08, "loss": 0.28744667768478394, "step": 1786 }, { "epoch": 2.8410174880763117, "grad_norm": 10.695651972504482, "learning_rate": 8.704483083681159e-08, "loss": 0.25048574805259705, "step": 1787 }, { "epoch": 2.8426073131955487, "grad_norm": 16.61769931593027, "learning_rate": 8.533459934952026e-08, "loss": 0.5408765077590942, "step": 1788 }, { "epoch": 2.8441971383147853, "grad_norm": 9.70944893214215, "learning_rate": 8.364119143400185e-08, "loss": 0.9409646987915039, "step": 1789 }, { "epoch": 2.8457869634340223, "grad_norm": 9.090433208717643, "learning_rate": 8.196461288702384e-08, "loss": 0.8656669855117798, "step": 1790 }, { "epoch": 2.8473767885532593, "grad_norm": 7.850531662470157, "learning_rate": 8.030486944774374e-08, "loss": 0.2649431824684143, "step": 1791 }, { "epoch": 2.848966613672496, "grad_norm": 12.160314277952102, "learning_rate": 7.866196679768956e-08, "loss": 0.3169953525066376, "step": 1792 }, { "epoch": 2.850556438791733, "grad_norm": 12.203789685459478, "learning_rate": 7.703591056074377e-08, "loss": 0.3425353765487671, "step": 1793 }, { "epoch": 2.85214626391097, "grad_norm": 9.92531773421057, "learning_rate": 7.542670630311721e-08, "loss": 0.22243787348270416, "step": 1794 }, { "epoch": 2.853736089030207, "grad_norm": 10.55218952704597, "learning_rate": 7.383435953333684e-08, "loss": 0.42897993326187134, "step": 1795 }, { "epoch": 2.855325914149444, "grad_norm": 8.544479560565039, "learning_rate": 7.225887570222412e-08, "loss": 0.2135796844959259, "step": 1796 }, { "epoch": 2.8569157392686804, "grad_norm": 8.879226711644062, "learning_rate": 7.070026020287446e-08, "loss": 0.3496057391166687, "step": 1797 }, { "epoch": 2.8585055643879174, "grad_norm": 12.957349298601518, "learning_rate": 6.91585183706428e-08, "loss": 0.8193036317825317, "step": 1798 }, { "epoch": 2.8600953895071544, "grad_norm": 10.197298296785444, "learning_rate": 6.76336554831214e-08, "loss": 0.3923993706703186, "step": 1799 }, { "epoch": 2.861685214626391, "grad_norm": 16.559874726296197, "learning_rate": 6.612567676012538e-08, "loss": 1.1482793092727661, "step": 1800 }, { "epoch": 2.863275039745628, "grad_norm": 7.444020657790237, "learning_rate": 6.463458736367111e-08, "loss": 0.4067334234714508, "step": 1801 }, { "epoch": 2.864864864864865, "grad_norm": 9.179057916272, "learning_rate": 6.316039239796235e-08, "loss": 0.4661983549594879, "step": 1802 }, { "epoch": 2.866454689984102, "grad_norm": 15.313397406588937, "learning_rate": 6.170309690937015e-08, "loss": 1.1066529750823975, "step": 1803 }, { "epoch": 2.868044515103339, "grad_norm": 10.898770814177196, "learning_rate": 6.02627058864158e-08, "loss": 0.5644552707672119, "step": 1804 }, { "epoch": 2.8696343402225755, "grad_norm": 13.306162346511908, "learning_rate": 5.883922425975464e-08, "loss": 0.8384277820587158, "step": 1805 }, { "epoch": 2.8712241653418125, "grad_norm": 9.004595737987405, "learning_rate": 5.743265690215938e-08, "loss": 0.41449958086013794, "step": 1806 }, { "epoch": 2.872813990461049, "grad_norm": 8.57015304482371, "learning_rate": 5.604300862850187e-08, "loss": 0.6183022856712341, "step": 1807 }, { "epoch": 2.874403815580286, "grad_norm": 8.604669091621398, "learning_rate": 5.467028419573861e-08, "loss": 0.4073048532009125, "step": 1808 }, { "epoch": 2.875993640699523, "grad_norm": 10.173799114716752, "learning_rate": 5.331448830289354e-08, "loss": 0.4159020185470581, "step": 1809 }, { "epoch": 2.87758346581876, "grad_norm": 11.635023497299457, "learning_rate": 5.19756255910403e-08, "loss": 0.5088472962379456, "step": 1810 }, { "epoch": 2.879173290937997, "grad_norm": 15.532805142667417, "learning_rate": 5.0653700643290006e-08, "loss": 1.051582932472229, "step": 1811 }, { "epoch": 2.8807631160572336, "grad_norm": 12.281460786775908, "learning_rate": 4.934871798477236e-08, "loss": 0.305294394493103, "step": 1812 }, { "epoch": 2.8823529411764706, "grad_norm": 13.511513272154906, "learning_rate": 4.806068208262071e-08, "loss": 0.270668625831604, "step": 1813 }, { "epoch": 2.8839427662957076, "grad_norm": 12.19454070516872, "learning_rate": 4.6789597345959223e-08, "loss": 0.38206809759140015, "step": 1814 }, { "epoch": 2.885532591414944, "grad_norm": 10.512867995816194, "learning_rate": 4.5535468125883496e-08, "loss": 0.4620250165462494, "step": 1815 }, { "epoch": 2.887122416534181, "grad_norm": 10.831279386109593, "learning_rate": 4.429829871545055e-08, "loss": 0.5227418541908264, "step": 1816 }, { "epoch": 2.888712241653418, "grad_norm": 6.535067821791534, "learning_rate": 4.3078093349659955e-08, "loss": 0.08256521075963974, "step": 1817 }, { "epoch": 2.890302066772655, "grad_norm": 8.583564015284736, "learning_rate": 4.187485620544163e-08, "loss": 0.3790075182914734, "step": 1818 }, { "epoch": 2.891891891891892, "grad_norm": 7.114382474719892, "learning_rate": 4.068859140164083e-08, "loss": 0.44887927174568176, "step": 1819 }, { "epoch": 2.8934817170111287, "grad_norm": 9.563998007491127, "learning_rate": 3.9519302999004305e-08, "loss": 0.2510707378387451, "step": 1820 }, { "epoch": 2.8950715421303657, "grad_norm": 9.712756564236205, "learning_rate": 3.836699500016583e-08, "loss": 0.43258237838745117, "step": 1821 }, { "epoch": 2.8966613672496027, "grad_norm": 12.81052648821815, "learning_rate": 3.7231671349634015e-08, "loss": 0.2630723714828491, "step": 1822 }, { "epoch": 2.898251192368839, "grad_norm": 14.894067536846054, "learning_rate": 3.611333593377564e-08, "loss": 0.2762864828109741, "step": 1823 }, { "epoch": 2.899841017488076, "grad_norm": 11.847490898397657, "learning_rate": 3.501199258080734e-08, "loss": 0.4702332019805908, "step": 1824 }, { "epoch": 2.901430842607313, "grad_norm": 9.937636788408996, "learning_rate": 3.3927645060776725e-08, "loss": 0.25099214911460876, "step": 1825 }, { "epoch": 2.90302066772655, "grad_norm": 10.55993956444248, "learning_rate": 3.286029708555405e-08, "loss": 0.4693886339664459, "step": 1826 }, { "epoch": 2.904610492845787, "grad_norm": 12.608332995664167, "learning_rate": 3.1809952308818336e-08, "loss": 0.2815450429916382, "step": 1827 }, { "epoch": 2.9062003179650238, "grad_norm": 8.572207822982811, "learning_rate": 3.077661432604184e-08, "loss": 0.2545127272605896, "step": 1828 }, { "epoch": 2.9077901430842608, "grad_norm": 15.921191531162503, "learning_rate": 2.976028667448283e-08, "loss": 1.7518559694290161, "step": 1829 }, { "epoch": 2.9093799682034978, "grad_norm": 9.007500414306167, "learning_rate": 2.8760972833170032e-08, "loss": 0.34792059659957886, "step": 1830 }, { "epoch": 2.9109697933227343, "grad_norm": 13.566823435631076, "learning_rate": 2.7778676222890433e-08, "loss": 0.5375604629516602, "step": 1831 }, { "epoch": 2.9125596184419713, "grad_norm": 9.262644800722068, "learning_rate": 2.6813400206180394e-08, "loss": 0.6013987064361572, "step": 1832 }, { "epoch": 2.9141494435612083, "grad_norm": 11.66773133548321, "learning_rate": 2.586514808731122e-08, "loss": 0.207585871219635, "step": 1833 }, { "epoch": 2.9157392686804453, "grad_norm": 8.883715589798319, "learning_rate": 2.4933923112279712e-08, "loss": 0.22254782915115356, "step": 1834 }, { "epoch": 2.9173290937996823, "grad_norm": 13.622198817526261, "learning_rate": 2.4019728468797077e-08, "loss": 0.6427359580993652, "step": 1835 }, { "epoch": 2.918918918918919, "grad_norm": 8.570336486844115, "learning_rate": 2.31225672862756e-08, "loss": 0.3003300726413727, "step": 1836 }, { "epoch": 2.920508744038156, "grad_norm": 12.493611999424541, "learning_rate": 2.224244263582087e-08, "loss": 0.39228111505508423, "step": 1837 }, { "epoch": 2.9220985691573924, "grad_norm": 23.237304392000453, "learning_rate": 2.137935753022069e-08, "loss": 1.1374372243881226, "step": 1838 }, { "epoch": 2.9236883942766294, "grad_norm": 9.929956522519397, "learning_rate": 2.053331492393229e-08, "loss": 0.24724097549915314, "step": 1839 }, { "epoch": 2.9252782193958664, "grad_norm": 13.859185232682037, "learning_rate": 1.9704317713076236e-08, "loss": 0.5617753863334656, "step": 1840 }, { "epoch": 2.9268680445151034, "grad_norm": 8.689194933842717, "learning_rate": 1.8892368735422552e-08, "loss": 0.1591765284538269, "step": 1841 }, { "epoch": 2.9284578696343404, "grad_norm": 9.592978204928395, "learning_rate": 1.8097470770384596e-08, "loss": 0.685766339302063, "step": 1842 }, { "epoch": 2.930047694753577, "grad_norm": 8.85441695249632, "learning_rate": 1.7319626539005762e-08, "loss": 0.30965808033943176, "step": 1843 }, { "epoch": 2.931637519872814, "grad_norm": 9.140691655581454, "learning_rate": 1.655883870395336e-08, "loss": 0.25469398498535156, "step": 1844 }, { "epoch": 2.933227344992051, "grad_norm": 14.984105659136183, "learning_rate": 1.5815109869509183e-08, "loss": 0.31988632678985596, "step": 1845 }, { "epoch": 2.9348171701112875, "grad_norm": 8.04716745056648, "learning_rate": 1.508844258155728e-08, "loss": 0.5178359150886536, "step": 1846 }, { "epoch": 2.9364069952305245, "grad_norm": 11.080451900012784, "learning_rate": 1.4378839327580663e-08, "loss": 0.31709960103034973, "step": 1847 }, { "epoch": 2.9379968203497615, "grad_norm": 34.195811485082544, "learning_rate": 1.3686302536647378e-08, "loss": 0.7739929556846619, "step": 1848 }, { "epoch": 2.9395866454689985, "grad_norm": 10.74556954521327, "learning_rate": 1.3010834579405552e-08, "loss": 0.2143116295337677, "step": 1849 }, { "epoch": 2.9411764705882355, "grad_norm": 11.30842772051544, "learning_rate": 1.2352437768074487e-08, "loss": 0.3580181896686554, "step": 1850 }, { "epoch": 2.942766295707472, "grad_norm": 9.620337397959851, "learning_rate": 1.1711114356436903e-08, "loss": 0.5074045062065125, "step": 1851 }, { "epoch": 2.944356120826709, "grad_norm": 9.053749272015484, "learning_rate": 1.1086866539830044e-08, "loss": 0.2650204002857208, "step": 1852 }, { "epoch": 2.945945945945946, "grad_norm": 9.516284200897292, "learning_rate": 1.0479696455139576e-08, "loss": 0.340106725692749, "step": 1853 }, { "epoch": 2.9475357710651826, "grad_norm": 12.839207556626045, "learning_rate": 9.889606180792378e-09, "loss": 1.0296393632888794, "step": 1854 }, { "epoch": 2.9491255961844196, "grad_norm": 13.736410843682803, "learning_rate": 9.316597736747091e-09, "loss": 0.8265128135681152, "step": 1855 }, { "epoch": 2.9507154213036566, "grad_norm": 8.982349734294708, "learning_rate": 8.7606730844908e-09, "loss": 0.2861822247505188, "step": 1856 }, { "epoch": 2.9523052464228936, "grad_norm": 10.159889957225653, "learning_rate": 8.221834127029593e-09, "loss": 0.670875072479248, "step": 1857 }, { "epoch": 2.9538950715421306, "grad_norm": 13.273432006247354, "learning_rate": 7.700082708883006e-09, "loss": 0.9394416213035583, "step": 1858 }, { "epoch": 2.955484896661367, "grad_norm": 9.790433583461628, "learning_rate": 7.1954206160768096e-09, "loss": 0.9223511219024658, "step": 1859 }, { "epoch": 2.957074721780604, "grad_norm": 23.37243487658773, "learning_rate": 6.7078495761385695e-09, "loss": 0.3571210205554962, "step": 1860 }, { "epoch": 2.958664546899841, "grad_norm": 11.351052610839561, "learning_rate": 6.237371258090985e-09, "loss": 0.2777283191680908, "step": 1861 }, { "epoch": 2.9602543720190777, "grad_norm": 8.885306462727502, "learning_rate": 5.783987272445779e-09, "loss": 0.3929477334022522, "step": 1862 }, { "epoch": 2.9618441971383147, "grad_norm": 16.28630558472394, "learning_rate": 5.347699171197595e-09, "loss": 1.0299521684646606, "step": 1863 }, { "epoch": 2.9634340222575517, "grad_norm": 13.806574966471507, "learning_rate": 4.928508447821223e-09, "loss": 0.6577022075653076, "step": 1864 }, { "epoch": 2.9650238473767887, "grad_norm": 12.093973318345537, "learning_rate": 4.526416537263267e-09, "loss": 0.6602462530136108, "step": 1865 }, { "epoch": 2.9666136724960257, "grad_norm": 9.484393327040737, "learning_rate": 4.141424815938822e-09, "loss": 0.42496952414512634, "step": 1866 }, { "epoch": 2.9682034976152623, "grad_norm": 7.945852000270968, "learning_rate": 3.77353460172869e-09, "loss": 0.20469367504119873, "step": 1867 }, { "epoch": 2.9697933227344993, "grad_norm": 10.084966174225068, "learning_rate": 3.422747153969952e-09, "loss": 0.26603108644485474, "step": 1868 }, { "epoch": 2.9713831478537363, "grad_norm": 9.533596522923068, "learning_rate": 3.089063673456516e-09, "loss": 0.5473682284355164, "step": 1869 }, { "epoch": 2.972972972972973, "grad_norm": 11.424298125013499, "learning_rate": 2.7724853024324594e-09, "loss": 0.22095058858394623, "step": 1870 }, { "epoch": 2.97456279809221, "grad_norm": 9.266786716981644, "learning_rate": 2.473013124589252e-09, "loss": 0.31715139746665955, "step": 1871 }, { "epoch": 2.976152623211447, "grad_norm": 9.974606963747325, "learning_rate": 2.1906481650613153e-09, "loss": 0.32596707344055176, "step": 1872 }, { "epoch": 2.977742448330684, "grad_norm": 10.174795901337596, "learning_rate": 1.925391390421583e-09, "loss": 0.841127872467041, "step": 1873 }, { "epoch": 2.9793322734499204, "grad_norm": 22.599584004927102, "learning_rate": 1.6772437086803873e-09, "loss": 0.6419406533241272, "step": 1874 }, { "epoch": 2.9809220985691574, "grad_norm": 11.655290018579056, "learning_rate": 1.446205969282133e-09, "loss": 0.2810055613517761, "step": 1875 }, { "epoch": 2.9825119236883944, "grad_norm": 7.105036483547834, "learning_rate": 1.2322789630997422e-09, "loss": 0.23816505074501038, "step": 1876 }, { "epoch": 2.984101748807631, "grad_norm": 11.368820848007578, "learning_rate": 1.0354634224346572e-09, "loss": 0.30685269832611084, "step": 1877 }, { "epoch": 2.985691573926868, "grad_norm": 13.391698269787614, "learning_rate": 8.557600210140627e-10, "loss": 1.1483169794082642, "step": 1878 }, { "epoch": 2.987281399046105, "grad_norm": 14.95082540693622, "learning_rate": 6.931693739864465e-10, "loss": 0.5511203408241272, "step": 1879 }, { "epoch": 2.988871224165342, "grad_norm": 12.50188330470179, "learning_rate": 5.476920379221539e-10, "loss": 0.4261815547943115, "step": 1880 }, { "epoch": 2.990461049284579, "grad_norm": 16.76303275226908, "learning_rate": 4.1932851081005753e-10, "loss": 0.5755501389503479, "step": 1881 }, { "epoch": 2.9920508744038155, "grad_norm": 16.077528372751395, "learning_rate": 3.080792320564463e-10, "loss": 0.3609001636505127, "step": 1882 }, { "epoch": 2.9936406995230525, "grad_norm": 13.199387427384234, "learning_rate": 2.1394458248169548e-10, "loss": 0.5746971368789673, "step": 1883 }, { "epoch": 2.9952305246422894, "grad_norm": 11.976309923228456, "learning_rate": 1.3692488432304195e-10, "loss": 0.8742420673370361, "step": 1884 }, { "epoch": 2.996820349761526, "grad_norm": 12.499556410677085, "learning_rate": 7.702040122847809e-11, "loss": 0.3924184739589691, "step": 1885 }, { "epoch": 2.998410174880763, "grad_norm": 11.370980136761439, "learning_rate": 3.423133825897207e-11, "loss": 0.27480587363243103, "step": 1886 }, { "epoch": 3.0, "grad_norm": 12.874401912588322, "learning_rate": 8.557841888467977e-12, "loss": 0.2574765980243683, "step": 1887 }, { "epoch": 3.0, "step": 1887, "total_flos": 5124506050560.0, "train_loss": 1.6660761659738874, "train_runtime": 2303.3437, "train_samples_per_second": 3.274, "train_steps_per_second": 0.819 } ], "logging_steps": 1, "max_steps": 1887, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5124506050560.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }