{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2907, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010319917440660474, "grad_norm": 38.264573129438034, "learning_rate": 0.0, "loss": 4.170958995819092, "step": 1 }, { "epoch": 0.0020639834881320948, "grad_norm": 32.07645559117946, "learning_rate": 3.436426116838488e-08, "loss": 4.053407669067383, "step": 2 }, { "epoch": 0.0030959752321981426, "grad_norm": 33.360400309719275, "learning_rate": 6.872852233676976e-08, "loss": 4.419272422790527, "step": 3 }, { "epoch": 0.0041279669762641896, "grad_norm": 38.690889015820794, "learning_rate": 1.0309278350515465e-07, "loss": 4.089800834655762, "step": 4 }, { "epoch": 0.005159958720330237, "grad_norm": 40.53568672789824, "learning_rate": 1.3745704467353952e-07, "loss": 4.314361095428467, "step": 5 }, { "epoch": 0.006191950464396285, "grad_norm": 52.73132919762138, "learning_rate": 1.7182130584192442e-07, "loss": 4.534241676330566, "step": 6 }, { "epoch": 0.007223942208462332, "grad_norm": 37.467034340256674, "learning_rate": 2.061855670103093e-07, "loss": 4.245081424713135, "step": 7 }, { "epoch": 0.008255933952528379, "grad_norm": 36.54412560626581, "learning_rate": 2.405498281786942e-07, "loss": 3.9605345726013184, "step": 8 }, { "epoch": 0.009287925696594427, "grad_norm": 32.09650929914277, "learning_rate": 2.7491408934707903e-07, "loss": 3.553675889968872, "step": 9 }, { "epoch": 0.010319917440660475, "grad_norm": 39.99737645231538, "learning_rate": 3.0927835051546394e-07, "loss": 3.8991663455963135, "step": 10 }, { "epoch": 0.011351909184726523, "grad_norm": 26.94996491123709, "learning_rate": 3.4364261168384884e-07, "loss": 4.447433948516846, "step": 11 }, { "epoch": 0.01238390092879257, "grad_norm": 38.29183176227633, "learning_rate": 3.780068728522337e-07, "loss": 3.8410043716430664, "step": 12 }, { "epoch": 0.013415892672858616, "grad_norm": 39.44434132664455, "learning_rate": 4.123711340206186e-07, "loss": 4.28723669052124, "step": 13 }, { "epoch": 0.014447884416924664, "grad_norm": 44.60959999422548, "learning_rate": 4.467353951890035e-07, "loss": 4.403202056884766, "step": 14 }, { "epoch": 0.015479876160990712, "grad_norm": 33.86401023900952, "learning_rate": 4.810996563573884e-07, "loss": 4.3327813148498535, "step": 15 }, { "epoch": 0.016511867905056758, "grad_norm": 36.37314241341025, "learning_rate": 5.154639175257732e-07, "loss": 4.586869716644287, "step": 16 }, { "epoch": 0.017543859649122806, "grad_norm": 40.95405299759724, "learning_rate": 5.498281786941581e-07, "loss": 4.356800079345703, "step": 17 }, { "epoch": 0.018575851393188854, "grad_norm": 44.68055417441504, "learning_rate": 5.84192439862543e-07, "loss": 3.605020523071289, "step": 18 }, { "epoch": 0.0196078431372549, "grad_norm": 34.30658098751594, "learning_rate": 6.185567010309279e-07, "loss": 3.9082632064819336, "step": 19 }, { "epoch": 0.02063983488132095, "grad_norm": 39.53723456862595, "learning_rate": 6.529209621993128e-07, "loss": 4.336679935455322, "step": 20 }, { "epoch": 0.021671826625386997, "grad_norm": 44.1317837523831, "learning_rate": 6.872852233676977e-07, "loss": 4.653647422790527, "step": 21 }, { "epoch": 0.022703818369453045, "grad_norm": 39.93848572620918, "learning_rate": 7.216494845360824e-07, "loss": 4.44484806060791, "step": 22 }, { "epoch": 0.023735810113519093, "grad_norm": 37.7809949162029, "learning_rate": 7.560137457044674e-07, "loss": 4.18214225769043, "step": 23 }, { "epoch": 0.02476780185758514, "grad_norm": 25.863583611599758, "learning_rate": 7.903780068728522e-07, "loss": 3.864325761795044, "step": 24 }, { "epoch": 0.025799793601651185, "grad_norm": 37.58953378307994, "learning_rate": 8.247422680412372e-07, "loss": 4.450252056121826, "step": 25 }, { "epoch": 0.026831785345717233, "grad_norm": 19.483525440807544, "learning_rate": 8.59106529209622e-07, "loss": 3.714585304260254, "step": 26 }, { "epoch": 0.02786377708978328, "grad_norm": 16.580078896381973, "learning_rate": 8.93470790378007e-07, "loss": 2.6292967796325684, "step": 27 }, { "epoch": 0.02889576883384933, "grad_norm": 23.906848784358015, "learning_rate": 9.278350515463919e-07, "loss": 3.2508959770202637, "step": 28 }, { "epoch": 0.029927760577915376, "grad_norm": 23.113354867848006, "learning_rate": 9.621993127147767e-07, "loss": 4.082338333129883, "step": 29 }, { "epoch": 0.030959752321981424, "grad_norm": 15.20360049168446, "learning_rate": 9.965635738831617e-07, "loss": 3.178603410720825, "step": 30 }, { "epoch": 0.03199174406604747, "grad_norm": 27.255926573914664, "learning_rate": 1.0309278350515464e-06, "loss": 3.615831136703491, "step": 31 }, { "epoch": 0.033023735810113516, "grad_norm": 18.358659884678197, "learning_rate": 1.0652920962199314e-06, "loss": 3.769883871078491, "step": 32 }, { "epoch": 0.034055727554179564, "grad_norm": 20.18024340362498, "learning_rate": 1.0996563573883161e-06, "loss": 3.5580708980560303, "step": 33 }, { "epoch": 0.03508771929824561, "grad_norm": 12.713459341070733, "learning_rate": 1.134020618556701e-06, "loss": 3.3640944957733154, "step": 34 }, { "epoch": 0.03611971104231166, "grad_norm": 15.175494098184558, "learning_rate": 1.168384879725086e-06, "loss": 3.2460312843322754, "step": 35 }, { "epoch": 0.03715170278637771, "grad_norm": 28.349265846819563, "learning_rate": 1.202749140893471e-06, "loss": 4.279034614562988, "step": 36 }, { "epoch": 0.038183694530443756, "grad_norm": 14.84497695062972, "learning_rate": 1.2371134020618557e-06, "loss": 3.5686206817626953, "step": 37 }, { "epoch": 0.0392156862745098, "grad_norm": 12.18324253859612, "learning_rate": 1.2714776632302405e-06, "loss": 3.2601733207702637, "step": 38 }, { "epoch": 0.04024767801857585, "grad_norm": 18.125128941879503, "learning_rate": 1.3058419243986257e-06, "loss": 3.5317866802215576, "step": 39 }, { "epoch": 0.0412796697626419, "grad_norm": 15.44482786670929, "learning_rate": 1.3402061855670104e-06, "loss": 3.243781089782715, "step": 40 }, { "epoch": 0.04231166150670795, "grad_norm": 13.768437323999384, "learning_rate": 1.3745704467353954e-06, "loss": 2.8587775230407715, "step": 41 }, { "epoch": 0.043343653250773995, "grad_norm": 16.008351007245647, "learning_rate": 1.4089347079037801e-06, "loss": 3.319521903991699, "step": 42 }, { "epoch": 0.04437564499484004, "grad_norm": 28.007268290570234, "learning_rate": 1.4432989690721649e-06, "loss": 3.675022602081299, "step": 43 }, { "epoch": 0.04540763673890609, "grad_norm": 17.116348976722346, "learning_rate": 1.47766323024055e-06, "loss": 3.6678833961486816, "step": 44 }, { "epoch": 0.04643962848297214, "grad_norm": 8.269409663120342, "learning_rate": 1.5120274914089348e-06, "loss": 3.1754953861236572, "step": 45 }, { "epoch": 0.047471620227038186, "grad_norm": 11.838220487063731, "learning_rate": 1.5463917525773197e-06, "loss": 4.087137699127197, "step": 46 }, { "epoch": 0.048503611971104234, "grad_norm": 13.156253776640677, "learning_rate": 1.5807560137457045e-06, "loss": 3.4292521476745605, "step": 47 }, { "epoch": 0.04953560371517028, "grad_norm": 12.019537855742236, "learning_rate": 1.6151202749140896e-06, "loss": 3.0743494033813477, "step": 48 }, { "epoch": 0.05056759545923633, "grad_norm": 22.60220162648615, "learning_rate": 1.6494845360824744e-06, "loss": 3.8045639991760254, "step": 49 }, { "epoch": 0.05159958720330237, "grad_norm": 17.72197752886121, "learning_rate": 1.6838487972508594e-06, "loss": 3.7670891284942627, "step": 50 }, { "epoch": 0.05263157894736842, "grad_norm": 12.61715753067164, "learning_rate": 1.718213058419244e-06, "loss": 2.6255364418029785, "step": 51 }, { "epoch": 0.053663570691434466, "grad_norm": 14.76027652711265, "learning_rate": 1.7525773195876288e-06, "loss": 3.2327818870544434, "step": 52 }, { "epoch": 0.054695562435500514, "grad_norm": 12.127995456182449, "learning_rate": 1.786941580756014e-06, "loss": 3.5888636112213135, "step": 53 }, { "epoch": 0.05572755417956656, "grad_norm": 12.814387754432278, "learning_rate": 1.8213058419243988e-06, "loss": 3.601221799850464, "step": 54 }, { "epoch": 0.05675954592363261, "grad_norm": 13.054485091375586, "learning_rate": 1.8556701030927837e-06, "loss": 2.9378113746643066, "step": 55 }, { "epoch": 0.05779153766769866, "grad_norm": 15.675205835371122, "learning_rate": 1.8900343642611685e-06, "loss": 3.0619659423828125, "step": 56 }, { "epoch": 0.058823529411764705, "grad_norm": 18.601477307180467, "learning_rate": 1.9243986254295534e-06, "loss": 3.913945436477661, "step": 57 }, { "epoch": 0.05985552115583075, "grad_norm": 14.684932137524926, "learning_rate": 1.9587628865979384e-06, "loss": 3.16741943359375, "step": 58 }, { "epoch": 0.0608875128998968, "grad_norm": 10.584505171302856, "learning_rate": 1.9931271477663233e-06, "loss": 2.752635955810547, "step": 59 }, { "epoch": 0.06191950464396285, "grad_norm": 14.294337494468676, "learning_rate": 2.027491408934708e-06, "loss": 3.683850049972534, "step": 60 }, { "epoch": 0.0629514963880289, "grad_norm": 14.548967939892366, "learning_rate": 2.061855670103093e-06, "loss": 3.897719383239746, "step": 61 }, { "epoch": 0.06398348813209494, "grad_norm": 10.784203027141196, "learning_rate": 2.096219931271478e-06, "loss": 3.5739078521728516, "step": 62 }, { "epoch": 0.06501547987616099, "grad_norm": 9.005770358258374, "learning_rate": 2.1305841924398628e-06, "loss": 3.4974448680877686, "step": 63 }, { "epoch": 0.06604747162022703, "grad_norm": 13.801645228196314, "learning_rate": 2.1649484536082477e-06, "loss": 3.6671719551086426, "step": 64 }, { "epoch": 0.06707946336429309, "grad_norm": 18.864311571048795, "learning_rate": 2.1993127147766322e-06, "loss": 3.5606706142425537, "step": 65 }, { "epoch": 0.06811145510835913, "grad_norm": 36.121188402287004, "learning_rate": 2.2336769759450176e-06, "loss": 3.54666805267334, "step": 66 }, { "epoch": 0.06914344685242518, "grad_norm": 18.418938373726004, "learning_rate": 2.268041237113402e-06, "loss": 3.6477246284484863, "step": 67 }, { "epoch": 0.07017543859649122, "grad_norm": 10.342314919135225, "learning_rate": 2.302405498281787e-06, "loss": 3.859053611755371, "step": 68 }, { "epoch": 0.07120743034055728, "grad_norm": 10.856757226736953, "learning_rate": 2.336769759450172e-06, "loss": 3.555589199066162, "step": 69 }, { "epoch": 0.07223942208462332, "grad_norm": 11.203267403062268, "learning_rate": 2.3711340206185566e-06, "loss": 2.384549140930176, "step": 70 }, { "epoch": 0.07327141382868937, "grad_norm": 9.751556559109623, "learning_rate": 2.405498281786942e-06, "loss": 3.5311636924743652, "step": 71 }, { "epoch": 0.07430340557275542, "grad_norm": 24.74278527119019, "learning_rate": 2.4398625429553265e-06, "loss": 3.828981399536133, "step": 72 }, { "epoch": 0.07533539731682147, "grad_norm": 20.20134665078964, "learning_rate": 2.4742268041237115e-06, "loss": 3.5808427333831787, "step": 73 }, { "epoch": 0.07636738906088751, "grad_norm": 23.478145098032897, "learning_rate": 2.5085910652920964e-06, "loss": 3.169694423675537, "step": 74 }, { "epoch": 0.07739938080495357, "grad_norm": 8.074437086627722, "learning_rate": 2.542955326460481e-06, "loss": 3.299074172973633, "step": 75 }, { "epoch": 0.0784313725490196, "grad_norm": 12.350822645184348, "learning_rate": 2.577319587628866e-06, "loss": 3.151115655899048, "step": 76 }, { "epoch": 0.07946336429308566, "grad_norm": 11.854286237324086, "learning_rate": 2.6116838487972513e-06, "loss": 3.488835096359253, "step": 77 }, { "epoch": 0.0804953560371517, "grad_norm": 14.751144818698055, "learning_rate": 2.646048109965636e-06, "loss": 3.2102808952331543, "step": 78 }, { "epoch": 0.08152734778121776, "grad_norm": 12.450756671913153, "learning_rate": 2.680412371134021e-06, "loss": 3.0494484901428223, "step": 79 }, { "epoch": 0.0825593395252838, "grad_norm": 10.909290889970654, "learning_rate": 2.7147766323024053e-06, "loss": 3.182155132293701, "step": 80 }, { "epoch": 0.08359133126934984, "grad_norm": 17.007046121547553, "learning_rate": 2.7491408934707907e-06, "loss": 3.132920026779175, "step": 81 }, { "epoch": 0.0846233230134159, "grad_norm": 8.156744910941768, "learning_rate": 2.7835051546391757e-06, "loss": 2.959625244140625, "step": 82 }, { "epoch": 0.08565531475748193, "grad_norm": 11.237150545090403, "learning_rate": 2.8178694158075602e-06, "loss": 2.8158531188964844, "step": 83 }, { "epoch": 0.08668730650154799, "grad_norm": 8.124664173201067, "learning_rate": 2.852233676975945e-06, "loss": 2.988208532333374, "step": 84 }, { "epoch": 0.08771929824561403, "grad_norm": 12.562368133864503, "learning_rate": 2.8865979381443297e-06, "loss": 2.8602449893951416, "step": 85 }, { "epoch": 0.08875128998968008, "grad_norm": 10.04284020697332, "learning_rate": 2.920962199312715e-06, "loss": 3.2334365844726562, "step": 86 }, { "epoch": 0.08978328173374613, "grad_norm": 7.610534456472524, "learning_rate": 2.9553264604811e-06, "loss": 2.1243410110473633, "step": 87 }, { "epoch": 0.09081527347781218, "grad_norm": 19.188341920491524, "learning_rate": 2.9896907216494846e-06, "loss": 3.877272605895996, "step": 88 }, { "epoch": 0.09184726522187822, "grad_norm": 10.75878999285903, "learning_rate": 3.0240549828178695e-06, "loss": 2.8275046348571777, "step": 89 }, { "epoch": 0.09287925696594428, "grad_norm": 17.36699124390256, "learning_rate": 3.058419243986255e-06, "loss": 3.3012256622314453, "step": 90 }, { "epoch": 0.09391124871001032, "grad_norm": 7.627420340539254, "learning_rate": 3.0927835051546395e-06, "loss": 2.723191976547241, "step": 91 }, { "epoch": 0.09494324045407637, "grad_norm": 16.901290623449444, "learning_rate": 3.1271477663230244e-06, "loss": 3.4844679832458496, "step": 92 }, { "epoch": 0.09597523219814241, "grad_norm": 18.36579239064143, "learning_rate": 3.161512027491409e-06, "loss": 3.245941638946533, "step": 93 }, { "epoch": 0.09700722394220847, "grad_norm": 6.816226068152801, "learning_rate": 3.195876288659794e-06, "loss": 1.0889668464660645, "step": 94 }, { "epoch": 0.09803921568627451, "grad_norm": 11.855599758847085, "learning_rate": 3.2302405498281793e-06, "loss": 2.9265670776367188, "step": 95 }, { "epoch": 0.09907120743034056, "grad_norm": 11.85375182382243, "learning_rate": 3.264604810996564e-06, "loss": 2.8669631481170654, "step": 96 }, { "epoch": 0.1001031991744066, "grad_norm": 12.46248341052778, "learning_rate": 3.298969072164949e-06, "loss": 3.630988121032715, "step": 97 }, { "epoch": 0.10113519091847266, "grad_norm": 14.491951832964455, "learning_rate": 3.3333333333333333e-06, "loss": 3.4562764167785645, "step": 98 }, { "epoch": 0.1021671826625387, "grad_norm": 9.862773480939577, "learning_rate": 3.3676975945017187e-06, "loss": 3.196944236755371, "step": 99 }, { "epoch": 0.10319917440660474, "grad_norm": 15.116742234147907, "learning_rate": 3.4020618556701037e-06, "loss": 3.2567732334136963, "step": 100 }, { "epoch": 0.1042311661506708, "grad_norm": 13.64659923131581, "learning_rate": 3.436426116838488e-06, "loss": 2.071723461151123, "step": 101 }, { "epoch": 0.10526315789473684, "grad_norm": 9.92773943086049, "learning_rate": 3.470790378006873e-06, "loss": 2.6918675899505615, "step": 102 }, { "epoch": 0.10629514963880289, "grad_norm": 13.873607215983421, "learning_rate": 3.5051546391752577e-06, "loss": 2.8453264236450195, "step": 103 }, { "epoch": 0.10732714138286893, "grad_norm": 7.974277010169032, "learning_rate": 3.539518900343643e-06, "loss": 2.940626382827759, "step": 104 }, { "epoch": 0.10835913312693499, "grad_norm": 10.445936177248788, "learning_rate": 3.573883161512028e-06, "loss": 3.002101421356201, "step": 105 }, { "epoch": 0.10939112487100103, "grad_norm": 16.347438947139267, "learning_rate": 3.6082474226804126e-06, "loss": 3.2126479148864746, "step": 106 }, { "epoch": 0.11042311661506708, "grad_norm": 10.598312696913382, "learning_rate": 3.6426116838487975e-06, "loss": 2.8869528770446777, "step": 107 }, { "epoch": 0.11145510835913312, "grad_norm": 9.446228078701083, "learning_rate": 3.6769759450171825e-06, "loss": 2.824401378631592, "step": 108 }, { "epoch": 0.11248710010319918, "grad_norm": 9.051408990447767, "learning_rate": 3.7113402061855674e-06, "loss": 2.923281192779541, "step": 109 }, { "epoch": 0.11351909184726522, "grad_norm": 11.532599229799272, "learning_rate": 3.7457044673539524e-06, "loss": 3.263378381729126, "step": 110 }, { "epoch": 0.11455108359133127, "grad_norm": 8.612569290133255, "learning_rate": 3.780068728522337e-06, "loss": 2.3272271156311035, "step": 111 }, { "epoch": 0.11558307533539731, "grad_norm": 10.217127793164705, "learning_rate": 3.814432989690722e-06, "loss": 2.9798202514648438, "step": 112 }, { "epoch": 0.11661506707946337, "grad_norm": 7.251982120801302, "learning_rate": 3.848797250859107e-06, "loss": 3.069607973098755, "step": 113 }, { "epoch": 0.11764705882352941, "grad_norm": 8.606629694041775, "learning_rate": 3.883161512027492e-06, "loss": 2.5642192363739014, "step": 114 }, { "epoch": 0.11867905056759546, "grad_norm": 17.037513857081912, "learning_rate": 3.917525773195877e-06, "loss": 3.3925039768218994, "step": 115 }, { "epoch": 0.1197110423116615, "grad_norm": 36.93516693280702, "learning_rate": 3.951890034364262e-06, "loss": 3.3626632690429688, "step": 116 }, { "epoch": 0.12074303405572756, "grad_norm": 18.3571141791971, "learning_rate": 3.986254295532647e-06, "loss": 3.111511707305908, "step": 117 }, { "epoch": 0.1217750257997936, "grad_norm": 16.195077738348424, "learning_rate": 4.020618556701032e-06, "loss": 2.9979894161224365, "step": 118 }, { "epoch": 0.12280701754385964, "grad_norm": 11.22682971528628, "learning_rate": 4.054982817869416e-06, "loss": 2.9094398021698, "step": 119 }, { "epoch": 0.1238390092879257, "grad_norm": 9.818020752484045, "learning_rate": 4.089347079037801e-06, "loss": 2.995288610458374, "step": 120 }, { "epoch": 0.12487100103199174, "grad_norm": 15.677881529415794, "learning_rate": 4.123711340206186e-06, "loss": 3.0458688735961914, "step": 121 }, { "epoch": 0.1259029927760578, "grad_norm": 10.561459834532538, "learning_rate": 4.158075601374571e-06, "loss": 3.482619047164917, "step": 122 }, { "epoch": 0.12693498452012383, "grad_norm": 12.542195355260022, "learning_rate": 4.192439862542956e-06, "loss": 3.0959300994873047, "step": 123 }, { "epoch": 0.12796697626418987, "grad_norm": 10.857060562341111, "learning_rate": 4.2268041237113405e-06, "loss": 3.2271957397460938, "step": 124 }, { "epoch": 0.12899896800825594, "grad_norm": 18.219131423659785, "learning_rate": 4.2611683848797255e-06, "loss": 2.5085105895996094, "step": 125 }, { "epoch": 0.13003095975232198, "grad_norm": 9.401192182845163, "learning_rate": 4.2955326460481105e-06, "loss": 2.629058837890625, "step": 126 }, { "epoch": 0.13106295149638802, "grad_norm": 11.257897352313293, "learning_rate": 4.329896907216495e-06, "loss": 3.031485080718994, "step": 127 }, { "epoch": 0.13209494324045407, "grad_norm": 14.99015554328189, "learning_rate": 4.36426116838488e-06, "loss": 3.0450351238250732, "step": 128 }, { "epoch": 0.13312693498452013, "grad_norm": 8.43577882737364, "learning_rate": 4.3986254295532645e-06, "loss": 2.7244393825531006, "step": 129 }, { "epoch": 0.13415892672858618, "grad_norm": 20.916171658532548, "learning_rate": 4.4329896907216494e-06, "loss": 2.0315990447998047, "step": 130 }, { "epoch": 0.13519091847265222, "grad_norm": 7.513190417589719, "learning_rate": 4.467353951890035e-06, "loss": 3.0035698413848877, "step": 131 }, { "epoch": 0.13622291021671826, "grad_norm": 26.148958761673452, "learning_rate": 4.501718213058419e-06, "loss": 3.425873279571533, "step": 132 }, { "epoch": 0.13725490196078433, "grad_norm": 18.60377751563881, "learning_rate": 4.536082474226804e-06, "loss": 2.837137222290039, "step": 133 }, { "epoch": 0.13828689370485037, "grad_norm": 9.231386213638103, "learning_rate": 4.570446735395189e-06, "loss": 2.713682174682617, "step": 134 }, { "epoch": 0.1393188854489164, "grad_norm": 8.302335013209317, "learning_rate": 4.604810996563574e-06, "loss": 2.447545289993286, "step": 135 }, { "epoch": 0.14035087719298245, "grad_norm": 8.539319304391766, "learning_rate": 4.639175257731959e-06, "loss": 2.9703640937805176, "step": 136 }, { "epoch": 0.14138286893704852, "grad_norm": 10.406180966902255, "learning_rate": 4.673539518900344e-06, "loss": 2.8641350269317627, "step": 137 }, { "epoch": 0.14241486068111456, "grad_norm": 9.85807004865754, "learning_rate": 4.707903780068729e-06, "loss": 2.8812713623046875, "step": 138 }, { "epoch": 0.1434468524251806, "grad_norm": 12.483093886606463, "learning_rate": 4.742268041237113e-06, "loss": 2.8616321086883545, "step": 139 }, { "epoch": 0.14447884416924664, "grad_norm": 14.725269738219037, "learning_rate": 4.776632302405499e-06, "loss": 2.9235153198242188, "step": 140 }, { "epoch": 0.14551083591331268, "grad_norm": 14.22587761070801, "learning_rate": 4.810996563573884e-06, "loss": 2.8137569427490234, "step": 141 }, { "epoch": 0.14654282765737875, "grad_norm": 12.328982503358834, "learning_rate": 4.845360824742268e-06, "loss": 2.68989896774292, "step": 142 }, { "epoch": 0.1475748194014448, "grad_norm": 9.893270455608677, "learning_rate": 4.879725085910653e-06, "loss": 3.09940505027771, "step": 143 }, { "epoch": 0.14860681114551083, "grad_norm": 14.968488810940084, "learning_rate": 4.914089347079038e-06, "loss": 3.154467821121216, "step": 144 }, { "epoch": 0.14963880288957687, "grad_norm": 8.77535414976699, "learning_rate": 4.948453608247423e-06, "loss": 2.3697733879089355, "step": 145 }, { "epoch": 0.15067079463364294, "grad_norm": 12.60090393189173, "learning_rate": 4.982817869415808e-06, "loss": 2.832059383392334, "step": 146 }, { "epoch": 0.15170278637770898, "grad_norm": 8.133074225973754, "learning_rate": 5.017182130584193e-06, "loss": 3.008730888366699, "step": 147 }, { "epoch": 0.15273477812177502, "grad_norm": 11.669903098407282, "learning_rate": 5.051546391752578e-06, "loss": 3.4180641174316406, "step": 148 }, { "epoch": 0.15376676986584106, "grad_norm": 11.657155258917168, "learning_rate": 5.085910652920962e-06, "loss": 3.033390522003174, "step": 149 }, { "epoch": 0.15479876160990713, "grad_norm": 8.538656667770233, "learning_rate": 5.120274914089347e-06, "loss": 2.817418098449707, "step": 150 }, { "epoch": 0.15583075335397317, "grad_norm": 14.328660767942383, "learning_rate": 5.154639175257732e-06, "loss": 3.162787914276123, "step": 151 }, { "epoch": 0.1568627450980392, "grad_norm": 11.115423828752997, "learning_rate": 5.189003436426118e-06, "loss": 2.862689256668091, "step": 152 }, { "epoch": 0.15789473684210525, "grad_norm": 8.211023882248943, "learning_rate": 5.223367697594503e-06, "loss": 3.511646032333374, "step": 153 }, { "epoch": 0.15892672858617132, "grad_norm": 10.547321886955585, "learning_rate": 5.257731958762888e-06, "loss": 2.5060176849365234, "step": 154 }, { "epoch": 0.15995872033023736, "grad_norm": 9.183030025162694, "learning_rate": 5.292096219931272e-06, "loss": 3.171947717666626, "step": 155 }, { "epoch": 0.1609907120743034, "grad_norm": 9.116532341417292, "learning_rate": 5.326460481099657e-06, "loss": 2.5070362091064453, "step": 156 }, { "epoch": 0.16202270381836945, "grad_norm": 11.011061651573083, "learning_rate": 5.360824742268042e-06, "loss": 2.4879579544067383, "step": 157 }, { "epoch": 0.16305469556243551, "grad_norm": 12.155789588921165, "learning_rate": 5.395189003436427e-06, "loss": 2.769174098968506, "step": 158 }, { "epoch": 0.16408668730650156, "grad_norm": 8.52073235657292, "learning_rate": 5.429553264604811e-06, "loss": 2.3643741607666016, "step": 159 }, { "epoch": 0.1651186790505676, "grad_norm": 13.23465003546493, "learning_rate": 5.463917525773196e-06, "loss": 2.593092918395996, "step": 160 }, { "epoch": 0.16615067079463364, "grad_norm": 12.101671965226927, "learning_rate": 5.4982817869415815e-06, "loss": 2.8370893001556396, "step": 161 }, { "epoch": 0.16718266253869968, "grad_norm": 11.888079691465743, "learning_rate": 5.532646048109966e-06, "loss": 2.9755825996398926, "step": 162 }, { "epoch": 0.16821465428276575, "grad_norm": 19.214763509261946, "learning_rate": 5.567010309278351e-06, "loss": 3.475999355316162, "step": 163 }, { "epoch": 0.1692466460268318, "grad_norm": 7.9945005629139425, "learning_rate": 5.601374570446736e-06, "loss": 2.942070484161377, "step": 164 }, { "epoch": 0.17027863777089783, "grad_norm": 8.303932327257701, "learning_rate": 5.6357388316151204e-06, "loss": 2.83166241645813, "step": 165 }, { "epoch": 0.17131062951496387, "grad_norm": 8.905346416558189, "learning_rate": 5.670103092783505e-06, "loss": 2.9068546295166016, "step": 166 }, { "epoch": 0.17234262125902994, "grad_norm": 9.579295533050125, "learning_rate": 5.70446735395189e-06, "loss": 3.112548351287842, "step": 167 }, { "epoch": 0.17337461300309598, "grad_norm": 10.536784663679345, "learning_rate": 5.738831615120275e-06, "loss": 3.296271800994873, "step": 168 }, { "epoch": 0.17440660474716202, "grad_norm": 24.842411581016172, "learning_rate": 5.7731958762886594e-06, "loss": 3.670775890350342, "step": 169 }, { "epoch": 0.17543859649122806, "grad_norm": 22.019557529324477, "learning_rate": 5.807560137457045e-06, "loss": 3.7383432388305664, "step": 170 }, { "epoch": 0.17647058823529413, "grad_norm": 13.704368388684442, "learning_rate": 5.84192439862543e-06, "loss": 2.4311046600341797, "step": 171 }, { "epoch": 0.17750257997936017, "grad_norm": 8.387017897178954, "learning_rate": 5.876288659793815e-06, "loss": 2.6996443271636963, "step": 172 }, { "epoch": 0.1785345717234262, "grad_norm": 15.693639993341684, "learning_rate": 5.9106529209622e-06, "loss": 2.783320426940918, "step": 173 }, { "epoch": 0.17956656346749225, "grad_norm": 20.3581897243643, "learning_rate": 5.945017182130585e-06, "loss": 2.5705931186676025, "step": 174 }, { "epoch": 0.18059855521155832, "grad_norm": 10.17140010492615, "learning_rate": 5.979381443298969e-06, "loss": 2.9759726524353027, "step": 175 }, { "epoch": 0.18163054695562436, "grad_norm": 29.834325540874072, "learning_rate": 6.013745704467354e-06, "loss": 2.3043899536132812, "step": 176 }, { "epoch": 0.1826625386996904, "grad_norm": 12.9811040950101, "learning_rate": 6.048109965635739e-06, "loss": 2.7947998046875, "step": 177 }, { "epoch": 0.18369453044375644, "grad_norm": 13.007887184214686, "learning_rate": 6.082474226804124e-06, "loss": 2.867368698120117, "step": 178 }, { "epoch": 0.18472652218782248, "grad_norm": 10.97250328468093, "learning_rate": 6.11683848797251e-06, "loss": 2.964649200439453, "step": 179 }, { "epoch": 0.18575851393188855, "grad_norm": 13.663161398038788, "learning_rate": 6.151202749140894e-06, "loss": 2.844449043273926, "step": 180 }, { "epoch": 0.1867905056759546, "grad_norm": 16.7880436257611, "learning_rate": 6.185567010309279e-06, "loss": 2.888267993927002, "step": 181 }, { "epoch": 0.18782249742002063, "grad_norm": 13.60494749666835, "learning_rate": 6.219931271477664e-06, "loss": 2.7521345615386963, "step": 182 }, { "epoch": 0.18885448916408668, "grad_norm": 30.82620894708794, "learning_rate": 6.254295532646049e-06, "loss": 3.2509548664093018, "step": 183 }, { "epoch": 0.18988648090815274, "grad_norm": 13.528624823741277, "learning_rate": 6.288659793814433e-06, "loss": 2.8344573974609375, "step": 184 }, { "epoch": 0.19091847265221878, "grad_norm": 18.761538401728718, "learning_rate": 6.323024054982818e-06, "loss": 2.790778398513794, "step": 185 }, { "epoch": 0.19195046439628483, "grad_norm": 10.532250495313523, "learning_rate": 6.357388316151203e-06, "loss": 2.844142198562622, "step": 186 }, { "epoch": 0.19298245614035087, "grad_norm": 10.737588464332136, "learning_rate": 6.391752577319588e-06, "loss": 2.4085793495178223, "step": 187 }, { "epoch": 0.19401444788441694, "grad_norm": 12.716202236391176, "learning_rate": 6.426116838487974e-06, "loss": 3.11259126663208, "step": 188 }, { "epoch": 0.19504643962848298, "grad_norm": 18.863162432919182, "learning_rate": 6.460481099656359e-06, "loss": 2.130476713180542, "step": 189 }, { "epoch": 0.19607843137254902, "grad_norm": 16.592077675123832, "learning_rate": 6.494845360824743e-06, "loss": 3.2449631690979004, "step": 190 }, { "epoch": 0.19711042311661506, "grad_norm": 8.856414937670612, "learning_rate": 6.529209621993128e-06, "loss": 3.328016996383667, "step": 191 }, { "epoch": 0.19814241486068113, "grad_norm": 44.59005446367263, "learning_rate": 6.563573883161513e-06, "loss": 2.5433740615844727, "step": 192 }, { "epoch": 0.19917440660474717, "grad_norm": 42.20840631794685, "learning_rate": 6.597938144329898e-06, "loss": 3.115938186645508, "step": 193 }, { "epoch": 0.2002063983488132, "grad_norm": 21.49432776270929, "learning_rate": 6.632302405498282e-06, "loss": 3.705045223236084, "step": 194 }, { "epoch": 0.20123839009287925, "grad_norm": 13.153220389904382, "learning_rate": 6.666666666666667e-06, "loss": 3.270660877227783, "step": 195 }, { "epoch": 0.20227038183694532, "grad_norm": 10.291045681689145, "learning_rate": 6.701030927835052e-06, "loss": 2.783212900161743, "step": 196 }, { "epoch": 0.20330237358101136, "grad_norm": 9.438804617099628, "learning_rate": 6.735395189003437e-06, "loss": 2.624166488647461, "step": 197 }, { "epoch": 0.2043343653250774, "grad_norm": 9.565752702177734, "learning_rate": 6.769759450171822e-06, "loss": 2.792402505874634, "step": 198 }, { "epoch": 0.20536635706914344, "grad_norm": 8.203774518764854, "learning_rate": 6.804123711340207e-06, "loss": 2.9766368865966797, "step": 199 }, { "epoch": 0.20639834881320948, "grad_norm": 16.64174398612072, "learning_rate": 6.8384879725085914e-06, "loss": 2.8217639923095703, "step": 200 }, { "epoch": 0.20743034055727555, "grad_norm": 15.499296910290152, "learning_rate": 6.872852233676976e-06, "loss": 3.113293170928955, "step": 201 }, { "epoch": 0.2084623323013416, "grad_norm": 12.533221265320801, "learning_rate": 6.907216494845361e-06, "loss": 2.725703001022339, "step": 202 }, { "epoch": 0.20949432404540763, "grad_norm": 10.18270298827117, "learning_rate": 6.941580756013746e-06, "loss": 3.741483449935913, "step": 203 }, { "epoch": 0.21052631578947367, "grad_norm": 16.4388133098385, "learning_rate": 6.9759450171821304e-06, "loss": 2.6681666374206543, "step": 204 }, { "epoch": 0.21155830753353974, "grad_norm": 7.800822880325412, "learning_rate": 7.010309278350515e-06, "loss": 2.4429688453674316, "step": 205 }, { "epoch": 0.21259029927760578, "grad_norm": 11.2671154522289, "learning_rate": 7.044673539518901e-06, "loss": 3.0641002655029297, "step": 206 }, { "epoch": 0.21362229102167182, "grad_norm": 17.704159664048625, "learning_rate": 7.079037800687286e-06, "loss": 3.075429677963257, "step": 207 }, { "epoch": 0.21465428276573786, "grad_norm": 10.682158078938821, "learning_rate": 7.113402061855671e-06, "loss": 2.7346138954162598, "step": 208 }, { "epoch": 0.21568627450980393, "grad_norm": 9.431220771114651, "learning_rate": 7.147766323024056e-06, "loss": 2.5908002853393555, "step": 209 }, { "epoch": 0.21671826625386997, "grad_norm": 17.15737144502723, "learning_rate": 7.18213058419244e-06, "loss": 2.7248172760009766, "step": 210 }, { "epoch": 0.21775025799793601, "grad_norm": 18.55275798328875, "learning_rate": 7.216494845360825e-06, "loss": 3.1422934532165527, "step": 211 }, { "epoch": 0.21878224974200206, "grad_norm": 8.674665435079042, "learning_rate": 7.25085910652921e-06, "loss": 2.5640807151794434, "step": 212 }, { "epoch": 0.21981424148606812, "grad_norm": 9.27329731014561, "learning_rate": 7.285223367697595e-06, "loss": 2.792649984359741, "step": 213 }, { "epoch": 0.22084623323013416, "grad_norm": 13.285088154216584, "learning_rate": 7.319587628865979e-06, "loss": 3.1613028049468994, "step": 214 }, { "epoch": 0.2218782249742002, "grad_norm": 21.421706016244368, "learning_rate": 7.353951890034365e-06, "loss": 3.27880597114563, "step": 215 }, { "epoch": 0.22291021671826625, "grad_norm": 9.498086512903509, "learning_rate": 7.38831615120275e-06, "loss": 2.89503812789917, "step": 216 }, { "epoch": 0.22394220846233232, "grad_norm": 12.444719476190334, "learning_rate": 7.422680412371135e-06, "loss": 2.5208213329315186, "step": 217 }, { "epoch": 0.22497420020639836, "grad_norm": 8.326855341727157, "learning_rate": 7.45704467353952e-06, "loss": 2.224851131439209, "step": 218 }, { "epoch": 0.2260061919504644, "grad_norm": 10.38571627684951, "learning_rate": 7.491408934707905e-06, "loss": 2.8946280479431152, "step": 219 }, { "epoch": 0.22703818369453044, "grad_norm": 7.740368769601456, "learning_rate": 7.525773195876289e-06, "loss": 3.0110769271850586, "step": 220 }, { "epoch": 0.22807017543859648, "grad_norm": 8.267476570780092, "learning_rate": 7.560137457044674e-06, "loss": 2.925208568572998, "step": 221 }, { "epoch": 0.22910216718266255, "grad_norm": 13.692029885184212, "learning_rate": 7.594501718213059e-06, "loss": 3.7805609703063965, "step": 222 }, { "epoch": 0.2301341589267286, "grad_norm": 7.932163154931762, "learning_rate": 7.628865979381444e-06, "loss": 2.7688980102539062, "step": 223 }, { "epoch": 0.23116615067079463, "grad_norm": 11.007263664299453, "learning_rate": 7.663230240549829e-06, "loss": 2.930605173110962, "step": 224 }, { "epoch": 0.23219814241486067, "grad_norm": 10.212261427573413, "learning_rate": 7.697594501718214e-06, "loss": 3.478551149368286, "step": 225 }, { "epoch": 0.23323013415892674, "grad_norm": 7.683193085540734, "learning_rate": 7.731958762886599e-06, "loss": 3.014065742492676, "step": 226 }, { "epoch": 0.23426212590299278, "grad_norm": 15.024778804729713, "learning_rate": 7.766323024054984e-06, "loss": 2.694744110107422, "step": 227 }, { "epoch": 0.23529411764705882, "grad_norm": 9.927780165927903, "learning_rate": 7.800687285223369e-06, "loss": 2.865319013595581, "step": 228 }, { "epoch": 0.23632610939112486, "grad_norm": 22.148139267313475, "learning_rate": 7.835051546391754e-06, "loss": 2.258620262145996, "step": 229 }, { "epoch": 0.23735810113519093, "grad_norm": 7.5511306699872, "learning_rate": 7.869415807560138e-06, "loss": 2.5735924243927, "step": 230 }, { "epoch": 0.23839009287925697, "grad_norm": 10.257255964053067, "learning_rate": 7.903780068728523e-06, "loss": 2.8336105346679688, "step": 231 }, { "epoch": 0.239422084623323, "grad_norm": 20.642576922687905, "learning_rate": 7.938144329896907e-06, "loss": 2.992154836654663, "step": 232 }, { "epoch": 0.24045407636738905, "grad_norm": 6.970555634840997, "learning_rate": 7.972508591065293e-06, "loss": 2.60292911529541, "step": 233 }, { "epoch": 0.24148606811145512, "grad_norm": 14.079903341685629, "learning_rate": 8.006872852233678e-06, "loss": 2.760301113128662, "step": 234 }, { "epoch": 0.24251805985552116, "grad_norm": 13.243679329890357, "learning_rate": 8.041237113402063e-06, "loss": 2.595752239227295, "step": 235 }, { "epoch": 0.2435500515995872, "grad_norm": 9.004451209153824, "learning_rate": 8.075601374570448e-06, "loss": 3.067572593688965, "step": 236 }, { "epoch": 0.24458204334365324, "grad_norm": 8.942965479095568, "learning_rate": 8.109965635738832e-06, "loss": 2.9863030910491943, "step": 237 }, { "epoch": 0.24561403508771928, "grad_norm": 10.513165239313906, "learning_rate": 8.144329896907216e-06, "loss": 2.3195133209228516, "step": 238 }, { "epoch": 0.24664602683178535, "grad_norm": 7.156933615533272, "learning_rate": 8.178694158075601e-06, "loss": 2.628549814224243, "step": 239 }, { "epoch": 0.2476780185758514, "grad_norm": 13.868964538667283, "learning_rate": 8.213058419243986e-06, "loss": 2.612640619277954, "step": 240 }, { "epoch": 0.24871001031991744, "grad_norm": 30.38753833791798, "learning_rate": 8.247422680412371e-06, "loss": 2.8055434226989746, "step": 241 }, { "epoch": 0.24974200206398348, "grad_norm": 13.238825374613219, "learning_rate": 8.281786941580758e-06, "loss": 3.2974088191986084, "step": 242 }, { "epoch": 0.25077399380804954, "grad_norm": 28.315627867846285, "learning_rate": 8.316151202749141e-06, "loss": 2.8685109615325928, "step": 243 }, { "epoch": 0.2518059855521156, "grad_norm": 9.305515908821278, "learning_rate": 8.350515463917526e-06, "loss": 2.907991886138916, "step": 244 }, { "epoch": 0.2528379772961816, "grad_norm": 6.222471020559953, "learning_rate": 8.384879725085911e-06, "loss": 2.106353282928467, "step": 245 }, { "epoch": 0.25386996904024767, "grad_norm": 10.045321472502492, "learning_rate": 8.419243986254296e-06, "loss": 2.985814332962036, "step": 246 }, { "epoch": 0.2549019607843137, "grad_norm": 21.398479137508037, "learning_rate": 8.453608247422681e-06, "loss": 2.043405294418335, "step": 247 }, { "epoch": 0.25593395252837975, "grad_norm": 10.274413900421287, "learning_rate": 8.487972508591066e-06, "loss": 2.8896231651306152, "step": 248 }, { "epoch": 0.25696594427244585, "grad_norm": 8.087671858243937, "learning_rate": 8.522336769759451e-06, "loss": 3.0149636268615723, "step": 249 }, { "epoch": 0.2579979360165119, "grad_norm": 10.698229808587508, "learning_rate": 8.556701030927836e-06, "loss": 2.5276103019714355, "step": 250 }, { "epoch": 0.2590299277605779, "grad_norm": 9.667088415612039, "learning_rate": 8.591065292096221e-06, "loss": 2.8755786418914795, "step": 251 }, { "epoch": 0.26006191950464397, "grad_norm": 14.564032491233844, "learning_rate": 8.625429553264606e-06, "loss": 3.011821746826172, "step": 252 }, { "epoch": 0.26109391124871, "grad_norm": 11.007757031318695, "learning_rate": 8.65979381443299e-06, "loss": 3.234562873840332, "step": 253 }, { "epoch": 0.26212590299277605, "grad_norm": 10.321254117575847, "learning_rate": 8.694158075601376e-06, "loss": 2.898616075515747, "step": 254 }, { "epoch": 0.2631578947368421, "grad_norm": 13.245489550571538, "learning_rate": 8.72852233676976e-06, "loss": 3.1038565635681152, "step": 255 }, { "epoch": 0.26418988648090813, "grad_norm": 11.376158862004893, "learning_rate": 8.762886597938146e-06, "loss": 3.243223190307617, "step": 256 }, { "epoch": 0.26522187822497423, "grad_norm": 39.21634222490642, "learning_rate": 8.797250859106529e-06, "loss": 3.3660576343536377, "step": 257 }, { "epoch": 0.26625386996904027, "grad_norm": 15.732748791203472, "learning_rate": 8.831615120274914e-06, "loss": 2.6311941146850586, "step": 258 }, { "epoch": 0.2672858617131063, "grad_norm": 8.307884590331216, "learning_rate": 8.865979381443299e-06, "loss": 3.070228338241577, "step": 259 }, { "epoch": 0.26831785345717235, "grad_norm": 20.763674204739655, "learning_rate": 8.900343642611684e-06, "loss": 3.5172173976898193, "step": 260 }, { "epoch": 0.2693498452012384, "grad_norm": 22.557605282511833, "learning_rate": 8.93470790378007e-06, "loss": 2.4271297454833984, "step": 261 }, { "epoch": 0.27038183694530443, "grad_norm": 14.37219569989753, "learning_rate": 8.969072164948455e-06, "loss": 2.866842746734619, "step": 262 }, { "epoch": 0.2714138286893705, "grad_norm": 8.68631119483259, "learning_rate": 9.003436426116839e-06, "loss": 2.546846389770508, "step": 263 }, { "epoch": 0.2724458204334365, "grad_norm": 10.697039335181826, "learning_rate": 9.037800687285224e-06, "loss": 3.255836009979248, "step": 264 }, { "epoch": 0.27347781217750256, "grad_norm": 12.364532427739105, "learning_rate": 9.072164948453609e-06, "loss": 2.631788730621338, "step": 265 }, { "epoch": 0.27450980392156865, "grad_norm": 8.89024681407495, "learning_rate": 9.106529209621994e-06, "loss": 2.0813612937927246, "step": 266 }, { "epoch": 0.2755417956656347, "grad_norm": 12.714578000012677, "learning_rate": 9.140893470790379e-06, "loss": 3.210515022277832, "step": 267 }, { "epoch": 0.27657378740970073, "grad_norm": 14.202532100418049, "learning_rate": 9.175257731958764e-06, "loss": 2.2079906463623047, "step": 268 }, { "epoch": 0.2776057791537668, "grad_norm": 10.81421745806548, "learning_rate": 9.209621993127148e-06, "loss": 2.2032227516174316, "step": 269 }, { "epoch": 0.2786377708978328, "grad_norm": 13.109956618269027, "learning_rate": 9.243986254295533e-06, "loss": 3.036953926086426, "step": 270 }, { "epoch": 0.27966976264189886, "grad_norm": 9.630762729673748, "learning_rate": 9.278350515463918e-06, "loss": 2.6770286560058594, "step": 271 }, { "epoch": 0.2807017543859649, "grad_norm": 11.626984563836794, "learning_rate": 9.312714776632303e-06, "loss": 2.9310131072998047, "step": 272 }, { "epoch": 0.28173374613003094, "grad_norm": 12.749203348361391, "learning_rate": 9.347079037800688e-06, "loss": 2.9083001613616943, "step": 273 }, { "epoch": 0.28276573787409703, "grad_norm": 15.838893812041686, "learning_rate": 9.381443298969073e-06, "loss": 2.9983253479003906, "step": 274 }, { "epoch": 0.2837977296181631, "grad_norm": 13.8172980151992, "learning_rate": 9.415807560137458e-06, "loss": 2.9297034740448, "step": 275 }, { "epoch": 0.2848297213622291, "grad_norm": 9.755184721368341, "learning_rate": 9.450171821305843e-06, "loss": 3.3235621452331543, "step": 276 }, { "epoch": 0.28586171310629516, "grad_norm": 10.544935847627189, "learning_rate": 9.484536082474226e-06, "loss": 2.7880706787109375, "step": 277 }, { "epoch": 0.2868937048503612, "grad_norm": 16.007278048200753, "learning_rate": 9.518900343642611e-06, "loss": 2.841766834259033, "step": 278 }, { "epoch": 0.28792569659442724, "grad_norm": 7.504369408272682, "learning_rate": 9.553264604810998e-06, "loss": 2.679216146469116, "step": 279 }, { "epoch": 0.2889576883384933, "grad_norm": 21.208047797032336, "learning_rate": 9.587628865979383e-06, "loss": 2.819913387298584, "step": 280 }, { "epoch": 0.2899896800825593, "grad_norm": 10.024430545679552, "learning_rate": 9.621993127147768e-06, "loss": 2.6728343963623047, "step": 281 }, { "epoch": 0.29102167182662536, "grad_norm": 32.63663115821379, "learning_rate": 9.656357388316153e-06, "loss": 2.7835605144500732, "step": 282 }, { "epoch": 0.29205366357069146, "grad_norm": 17.371321854452674, "learning_rate": 9.690721649484536e-06, "loss": 2.8734326362609863, "step": 283 }, { "epoch": 0.2930856553147575, "grad_norm": 11.588865129366797, "learning_rate": 9.725085910652921e-06, "loss": 2.65207576751709, "step": 284 }, { "epoch": 0.29411764705882354, "grad_norm": 8.213931559350788, "learning_rate": 9.759450171821306e-06, "loss": 3.1204328536987305, "step": 285 }, { "epoch": 0.2951496388028896, "grad_norm": 9.921519173515582, "learning_rate": 9.793814432989691e-06, "loss": 2.9491803646087646, "step": 286 }, { "epoch": 0.2961816305469556, "grad_norm": 9.509914069257567, "learning_rate": 9.828178694158076e-06, "loss": 2.441004991531372, "step": 287 }, { "epoch": 0.29721362229102166, "grad_norm": 7.715460900071529, "learning_rate": 9.862542955326461e-06, "loss": 2.7245256900787354, "step": 288 }, { "epoch": 0.2982456140350877, "grad_norm": 8.822780073205767, "learning_rate": 9.896907216494846e-06, "loss": 2.8065128326416016, "step": 289 }, { "epoch": 0.29927760577915374, "grad_norm": 19.18711691630486, "learning_rate": 9.931271477663231e-06, "loss": 3.9420652389526367, "step": 290 }, { "epoch": 0.30030959752321984, "grad_norm": 27.577856706805292, "learning_rate": 9.965635738831616e-06, "loss": 3.3128182888031006, "step": 291 }, { "epoch": 0.3013415892672859, "grad_norm": 10.735643267608587, "learning_rate": 1e-05, "loss": 2.7581353187561035, "step": 292 }, { "epoch": 0.3023735810113519, "grad_norm": 10.827298953198202, "learning_rate": 9.999996394510604e-06, "loss": 2.6423697471618652, "step": 293 }, { "epoch": 0.30340557275541796, "grad_norm": 52.09810992012433, "learning_rate": 9.999985578047618e-06, "loss": 2.7856717109680176, "step": 294 }, { "epoch": 0.304437564499484, "grad_norm": 8.015444030673175, "learning_rate": 9.99996755062664e-06, "loss": 2.587672233581543, "step": 295 }, { "epoch": 0.30546955624355004, "grad_norm": 10.412156584943583, "learning_rate": 9.999942312273667e-06, "loss": 2.918144702911377, "step": 296 }, { "epoch": 0.3065015479876161, "grad_norm": 15.334737859529346, "learning_rate": 9.9999098630251e-06, "loss": 3.476226329803467, "step": 297 }, { "epoch": 0.3075335397316821, "grad_norm": 21.535339149470804, "learning_rate": 9.999870202927739e-06, "loss": 2.8158390522003174, "step": 298 }, { "epoch": 0.30856553147574817, "grad_norm": 15.501940000678802, "learning_rate": 9.999823332038779e-06, "loss": 3.293022871017456, "step": 299 }, { "epoch": 0.30959752321981426, "grad_norm": 8.473506114508018, "learning_rate": 9.999769250425817e-06, "loss": 3.0215983390808105, "step": 300 }, { "epoch": 0.3106295149638803, "grad_norm": 12.507129335204262, "learning_rate": 9.999707958166849e-06, "loss": 2.851978063583374, "step": 301 }, { "epoch": 0.31166150670794635, "grad_norm": 11.4112811515218, "learning_rate": 9.999639455350272e-06, "loss": 3.32511043548584, "step": 302 }, { "epoch": 0.3126934984520124, "grad_norm": 13.739752311404821, "learning_rate": 9.999563742074881e-06, "loss": 2.245746612548828, "step": 303 }, { "epoch": 0.3137254901960784, "grad_norm": 7.6278657739069775, "learning_rate": 9.999480818449868e-06, "loss": 2.3384673595428467, "step": 304 }, { "epoch": 0.31475748194014447, "grad_norm": 9.983488457149306, "learning_rate": 9.999390684594824e-06, "loss": 3.0112438201904297, "step": 305 }, { "epoch": 0.3157894736842105, "grad_norm": 7.467844366957447, "learning_rate": 9.99929334063974e-06, "loss": 2.795377254486084, "step": 306 }, { "epoch": 0.31682146542827655, "grad_norm": 16.83206766868716, "learning_rate": 9.999188786725007e-06, "loss": 1.893045425415039, "step": 307 }, { "epoch": 0.31785345717234265, "grad_norm": 25.176127063159374, "learning_rate": 9.999077023001411e-06, "loss": 2.9917497634887695, "step": 308 }, { "epoch": 0.3188854489164087, "grad_norm": 7.922777368099187, "learning_rate": 9.998958049630138e-06, "loss": 2.824535369873047, "step": 309 }, { "epoch": 0.31991744066047473, "grad_norm": 13.433952072522445, "learning_rate": 9.998831866782769e-06, "loss": 1.974698781967163, "step": 310 }, { "epoch": 0.32094943240454077, "grad_norm": 8.553809514306295, "learning_rate": 9.998698474641286e-06, "loss": 2.8490941524505615, "step": 311 }, { "epoch": 0.3219814241486068, "grad_norm": 20.312633204656144, "learning_rate": 9.998557873398066e-06, "loss": 3.004058361053467, "step": 312 }, { "epoch": 0.32301341589267285, "grad_norm": 8.697705945748643, "learning_rate": 9.998410063255883e-06, "loss": 2.712806463241577, "step": 313 }, { "epoch": 0.3240454076367389, "grad_norm": 18.603506945443936, "learning_rate": 9.998255044427912e-06, "loss": 3.0143351554870605, "step": 314 }, { "epoch": 0.32507739938080493, "grad_norm": 14.783978465463441, "learning_rate": 9.998092817137714e-06, "loss": 2.8690080642700195, "step": 315 }, { "epoch": 0.32610939112487103, "grad_norm": 8.454737221004214, "learning_rate": 9.997923381619257e-06, "loss": 2.493039131164551, "step": 316 }, { "epoch": 0.32714138286893707, "grad_norm": 9.462248941946614, "learning_rate": 9.997746738116897e-06, "loss": 3.4006752967834473, "step": 317 }, { "epoch": 0.3281733746130031, "grad_norm": 13.039488480718953, "learning_rate": 9.997562886885393e-06, "loss": 2.318763256072998, "step": 318 }, { "epoch": 0.32920536635706915, "grad_norm": 10.280603609185906, "learning_rate": 9.997371828189892e-06, "loss": 2.6687698364257812, "step": 319 }, { "epoch": 0.3302373581011352, "grad_norm": 10.304930139142693, "learning_rate": 9.997173562305937e-06, "loss": 2.9789810180664062, "step": 320 }, { "epoch": 0.33126934984520123, "grad_norm": 10.891861523702152, "learning_rate": 9.996968089519468e-06, "loss": 2.615915298461914, "step": 321 }, { "epoch": 0.3323013415892673, "grad_norm": 8.947955043261503, "learning_rate": 9.996755410126815e-06, "loss": 2.811088800430298, "step": 322 }, { "epoch": 0.3333333333333333, "grad_norm": 15.51212907567898, "learning_rate": 9.996535524434705e-06, "loss": 3.112473249435425, "step": 323 }, { "epoch": 0.33436532507739936, "grad_norm": 11.932511086933177, "learning_rate": 9.996308432760257e-06, "loss": 2.898390531539917, "step": 324 }, { "epoch": 0.33539731682146545, "grad_norm": 20.972719472534482, "learning_rate": 9.99607413543098e-06, "loss": 3.2577900886535645, "step": 325 }, { "epoch": 0.3364293085655315, "grad_norm": 9.17129674530525, "learning_rate": 9.995832632784777e-06, "loss": 2.911431312561035, "step": 326 }, { "epoch": 0.33746130030959753, "grad_norm": 7.9024820720274755, "learning_rate": 9.99558392516994e-06, "loss": 3.40252685546875, "step": 327 }, { "epoch": 0.3384932920536636, "grad_norm": 9.29051423881268, "learning_rate": 9.995328012945158e-06, "loss": 2.979201316833496, "step": 328 }, { "epoch": 0.3395252837977296, "grad_norm": 12.543344461822567, "learning_rate": 9.995064896479505e-06, "loss": 3.4100840091705322, "step": 329 }, { "epoch": 0.34055727554179566, "grad_norm": 21.478041304262256, "learning_rate": 9.994794576152444e-06, "loss": 1.7510769367218018, "step": 330 }, { "epoch": 0.3415892672858617, "grad_norm": 17.186995192634996, "learning_rate": 9.994517052353835e-06, "loss": 2.7779769897460938, "step": 331 }, { "epoch": 0.34262125902992774, "grad_norm": 14.884852678001963, "learning_rate": 9.994232325483917e-06, "loss": 3.062211513519287, "step": 332 }, { "epoch": 0.34365325077399383, "grad_norm": 11.56452242003458, "learning_rate": 9.993940395953324e-06, "loss": 3.7257943153381348, "step": 333 }, { "epoch": 0.3446852425180599, "grad_norm": 12.805093366422206, "learning_rate": 9.993641264183074e-06, "loss": 2.962735176086426, "step": 334 }, { "epoch": 0.3457172342621259, "grad_norm": 55.42169115560981, "learning_rate": 9.993334930604575e-06, "loss": 2.8952317237854004, "step": 335 }, { "epoch": 0.34674922600619196, "grad_norm": 15.80184371734487, "learning_rate": 9.99302139565962e-06, "loss": 2.249530792236328, "step": 336 }, { "epoch": 0.347781217750258, "grad_norm": 6.869909846606394, "learning_rate": 9.992700659800387e-06, "loss": 3.177823066711426, "step": 337 }, { "epoch": 0.34881320949432404, "grad_norm": 10.887638824184588, "learning_rate": 9.99237272348944e-06, "loss": 2.578218936920166, "step": 338 }, { "epoch": 0.3498452012383901, "grad_norm": 11.668756836891024, "learning_rate": 9.992037587199729e-06, "loss": 2.8234119415283203, "step": 339 }, { "epoch": 0.3508771929824561, "grad_norm": 10.975725441437913, "learning_rate": 9.991695251414584e-06, "loss": 2.981606960296631, "step": 340 }, { "epoch": 0.35190918472652216, "grad_norm": 14.630846465595496, "learning_rate": 9.99134571662772e-06, "loss": 2.5231080055236816, "step": 341 }, { "epoch": 0.35294117647058826, "grad_norm": 12.244846502375566, "learning_rate": 9.990988983343237e-06, "loss": 2.4090816974639893, "step": 342 }, { "epoch": 0.3539731682146543, "grad_norm": 16.87537596953493, "learning_rate": 9.990625052075612e-06, "loss": 3.5845909118652344, "step": 343 }, { "epoch": 0.35500515995872034, "grad_norm": 13.78946221844984, "learning_rate": 9.990253923349706e-06, "loss": 2.855722427368164, "step": 344 }, { "epoch": 0.3560371517027864, "grad_norm": 7.35317353541043, "learning_rate": 9.98987559770076e-06, "loss": 2.6833579540252686, "step": 345 }, { "epoch": 0.3570691434468524, "grad_norm": 8.228648765687204, "learning_rate": 9.98949007567439e-06, "loss": 2.6026723384857178, "step": 346 }, { "epoch": 0.35810113519091846, "grad_norm": 12.63270371305149, "learning_rate": 9.989097357826601e-06, "loss": 3.161715507507324, "step": 347 }, { "epoch": 0.3591331269349845, "grad_norm": 10.945191581908023, "learning_rate": 9.988697444723763e-06, "loss": 2.507702589035034, "step": 348 }, { "epoch": 0.36016511867905054, "grad_norm": 8.265290958176653, "learning_rate": 9.98829033694263e-06, "loss": 2.746232271194458, "step": 349 }, { "epoch": 0.36119711042311664, "grad_norm": 10.712443634776584, "learning_rate": 9.987876035070334e-06, "loss": 2.7389914989471436, "step": 350 }, { "epoch": 0.3622291021671827, "grad_norm": 60.73029115548872, "learning_rate": 9.987454539704377e-06, "loss": 2.6853432655334473, "step": 351 }, { "epoch": 0.3632610939112487, "grad_norm": 14.57137989089784, "learning_rate": 9.98702585145264e-06, "loss": 2.959132671356201, "step": 352 }, { "epoch": 0.36429308565531476, "grad_norm": 7.288243249285789, "learning_rate": 9.986589970933371e-06, "loss": 2.905642032623291, "step": 353 }, { "epoch": 0.3653250773993808, "grad_norm": 17.16941316169951, "learning_rate": 9.986146898775198e-06, "loss": 2.7748100757598877, "step": 354 }, { "epoch": 0.36635706914344685, "grad_norm": 14.12324828442973, "learning_rate": 9.985696635617119e-06, "loss": 3.2043676376342773, "step": 355 }, { "epoch": 0.3673890608875129, "grad_norm": 18.325239784089757, "learning_rate": 9.9852391821085e-06, "loss": 2.4523096084594727, "step": 356 }, { "epoch": 0.3684210526315789, "grad_norm": 19.21617188187594, "learning_rate": 9.984774538909078e-06, "loss": 3.257173776626587, "step": 357 }, { "epoch": 0.36945304437564497, "grad_norm": 11.81568205276225, "learning_rate": 9.984302706688962e-06, "loss": 3.214928150177002, "step": 358 }, { "epoch": 0.37048503611971106, "grad_norm": 11.173774018769517, "learning_rate": 9.983823686128623e-06, "loss": 3.023054599761963, "step": 359 }, { "epoch": 0.3715170278637771, "grad_norm": 13.600635783261229, "learning_rate": 9.983337477918904e-06, "loss": 2.7034380435943604, "step": 360 }, { "epoch": 0.37254901960784315, "grad_norm": 12.108451404270347, "learning_rate": 9.982844082761012e-06, "loss": 2.837873935699463, "step": 361 }, { "epoch": 0.3735810113519092, "grad_norm": 16.766925295407642, "learning_rate": 9.98234350136652e-06, "loss": 2.9200439453125, "step": 362 }, { "epoch": 0.3746130030959752, "grad_norm": 7.704863020577384, "learning_rate": 9.981835734457367e-06, "loss": 2.867964744567871, "step": 363 }, { "epoch": 0.37564499484004127, "grad_norm": 22.36103217474955, "learning_rate": 9.981320782765847e-06, "loss": 2.6212334632873535, "step": 364 }, { "epoch": 0.3766769865841073, "grad_norm": 19.199321631797922, "learning_rate": 9.980798647034623e-06, "loss": 2.153306245803833, "step": 365 }, { "epoch": 0.37770897832817335, "grad_norm": 17.3777675791743, "learning_rate": 9.98026932801672e-06, "loss": 3.090249538421631, "step": 366 }, { "epoch": 0.37874097007223945, "grad_norm": 19.630282766187502, "learning_rate": 9.979732826475515e-06, "loss": 3.281906843185425, "step": 367 }, { "epoch": 0.3797729618163055, "grad_norm": 19.11095646451305, "learning_rate": 9.97918914318475e-06, "loss": 2.9486136436462402, "step": 368 }, { "epoch": 0.38080495356037153, "grad_norm": 9.262762771838458, "learning_rate": 9.978638278928526e-06, "loss": 2.7943458557128906, "step": 369 }, { "epoch": 0.38183694530443757, "grad_norm": 21.806333096154297, "learning_rate": 9.978080234501292e-06, "loss": 1.732006549835205, "step": 370 }, { "epoch": 0.3828689370485036, "grad_norm": 6.763172190678009, "learning_rate": 9.977515010707862e-06, "loss": 2.133474349975586, "step": 371 }, { "epoch": 0.38390092879256965, "grad_norm": 19.366115574534298, "learning_rate": 9.976942608363394e-06, "loss": 3.236906051635742, "step": 372 }, { "epoch": 0.3849329205366357, "grad_norm": 9.249879875236504, "learning_rate": 9.976363028293408e-06, "loss": 2.3459606170654297, "step": 373 }, { "epoch": 0.38596491228070173, "grad_norm": 11.205019811117323, "learning_rate": 9.975776271333772e-06, "loss": 2.967754602432251, "step": 374 }, { "epoch": 0.38699690402476783, "grad_norm": 8.937431500238974, "learning_rate": 9.975182338330704e-06, "loss": 2.5639843940734863, "step": 375 }, { "epoch": 0.38802889576883387, "grad_norm": 8.040846314018593, "learning_rate": 9.97458123014077e-06, "loss": 2.6252379417419434, "step": 376 }, { "epoch": 0.3890608875128999, "grad_norm": 13.693624667041957, "learning_rate": 9.973972947630886e-06, "loss": 3.164388656616211, "step": 377 }, { "epoch": 0.39009287925696595, "grad_norm": 18.601866471494823, "learning_rate": 9.973357491678317e-06, "loss": 3.2664146423339844, "step": 378 }, { "epoch": 0.391124871001032, "grad_norm": 13.00798986403027, "learning_rate": 9.972734863170668e-06, "loss": 2.738879680633545, "step": 379 }, { "epoch": 0.39215686274509803, "grad_norm": 9.189801321181857, "learning_rate": 9.972105063005895e-06, "loss": 2.6619622707366943, "step": 380 }, { "epoch": 0.3931888544891641, "grad_norm": 20.273889494348328, "learning_rate": 9.971468092092289e-06, "loss": 3.262800693511963, "step": 381 }, { "epoch": 0.3942208462332301, "grad_norm": 13.562348770998613, "learning_rate": 9.970823951348488e-06, "loss": 2.8759002685546875, "step": 382 }, { "epoch": 0.39525283797729616, "grad_norm": 10.472991979915335, "learning_rate": 9.970172641703469e-06, "loss": 2.684480905532837, "step": 383 }, { "epoch": 0.39628482972136225, "grad_norm": 12.988097260817664, "learning_rate": 9.969514164096548e-06, "loss": 2.765899658203125, "step": 384 }, { "epoch": 0.3973168214654283, "grad_norm": 14.082206700480766, "learning_rate": 9.968848519477382e-06, "loss": 3.019839286804199, "step": 385 }, { "epoch": 0.39834881320949433, "grad_norm": 22.037728509038477, "learning_rate": 9.968175708805954e-06, "loss": 3.36822509765625, "step": 386 }, { "epoch": 0.3993808049535604, "grad_norm": 32.96293580792141, "learning_rate": 9.967495733052594e-06, "loss": 2.3022584915161133, "step": 387 }, { "epoch": 0.4004127966976264, "grad_norm": 16.977515540691243, "learning_rate": 9.966808593197959e-06, "loss": 3.093107223510742, "step": 388 }, { "epoch": 0.40144478844169246, "grad_norm": 13.576846821861261, "learning_rate": 9.96611429023304e-06, "loss": 3.0452375411987305, "step": 389 }, { "epoch": 0.4024767801857585, "grad_norm": 22.21723302108591, "learning_rate": 9.965412825159156e-06, "loss": 2.6012535095214844, "step": 390 }, { "epoch": 0.40350877192982454, "grad_norm": 14.197779554582961, "learning_rate": 9.964704198987955e-06, "loss": 2.974013328552246, "step": 391 }, { "epoch": 0.40454076367389064, "grad_norm": 15.990117282812127, "learning_rate": 9.96398841274142e-06, "loss": 3.539915084838867, "step": 392 }, { "epoch": 0.4055727554179567, "grad_norm": 16.322394164585692, "learning_rate": 9.963265467451853e-06, "loss": 3.248749017715454, "step": 393 }, { "epoch": 0.4066047471620227, "grad_norm": 9.828830418023808, "learning_rate": 9.962535364161879e-06, "loss": 2.8553173542022705, "step": 394 }, { "epoch": 0.40763673890608876, "grad_norm": 13.454736718150299, "learning_rate": 9.961798103924454e-06, "loss": 2.443535566329956, "step": 395 }, { "epoch": 0.4086687306501548, "grad_norm": 13.941255314244941, "learning_rate": 9.96105368780285e-06, "loss": 2.379136085510254, "step": 396 }, { "epoch": 0.40970072239422084, "grad_norm": 13.117699185931533, "learning_rate": 9.96030211687066e-06, "loss": 3.2327611446380615, "step": 397 }, { "epoch": 0.4107327141382869, "grad_norm": 9.77972211250318, "learning_rate": 9.9595433922118e-06, "loss": 2.840574264526367, "step": 398 }, { "epoch": 0.4117647058823529, "grad_norm": 12.77728328287478, "learning_rate": 9.958777514920498e-06, "loss": 2.725296974182129, "step": 399 }, { "epoch": 0.41279669762641896, "grad_norm": 9.363684373456174, "learning_rate": 9.958004486101293e-06, "loss": 2.8668479919433594, "step": 400 }, { "epoch": 0.41382868937048506, "grad_norm": 7.830675919094182, "learning_rate": 9.957224306869053e-06, "loss": 2.924373149871826, "step": 401 }, { "epoch": 0.4148606811145511, "grad_norm": 11.583533095118902, "learning_rate": 9.956436978348943e-06, "loss": 3.06062650680542, "step": 402 }, { "epoch": 0.41589267285861714, "grad_norm": 13.137555464833737, "learning_rate": 9.955642501676447e-06, "loss": 2.592439651489258, "step": 403 }, { "epoch": 0.4169246646026832, "grad_norm": 6.498482284005015, "learning_rate": 9.954840877997356e-06, "loss": 2.9468114376068115, "step": 404 }, { "epoch": 0.4179566563467492, "grad_norm": 16.005423084911904, "learning_rate": 9.954032108467769e-06, "loss": 2.9270787239074707, "step": 405 }, { "epoch": 0.41898864809081526, "grad_norm": 9.22312215745454, "learning_rate": 9.953216194254088e-06, "loss": 2.422769784927368, "step": 406 }, { "epoch": 0.4200206398348813, "grad_norm": 16.90201457779728, "learning_rate": 9.952393136533021e-06, "loss": 2.977414131164551, "step": 407 }, { "epoch": 0.42105263157894735, "grad_norm": 10.986524956754087, "learning_rate": 9.95156293649158e-06, "loss": 2.8430471420288086, "step": 408 }, { "epoch": 0.42208462332301344, "grad_norm": 8.196133786488467, "learning_rate": 9.950725595327076e-06, "loss": 2.8283088207244873, "step": 409 }, { "epoch": 0.4231166150670795, "grad_norm": 9.997912814132267, "learning_rate": 9.949881114247117e-06, "loss": 3.180129051208496, "step": 410 }, { "epoch": 0.4241486068111455, "grad_norm": 22.911756115565684, "learning_rate": 9.949029494469613e-06, "loss": 2.9284815788269043, "step": 411 }, { "epoch": 0.42518059855521156, "grad_norm": 16.846511577170734, "learning_rate": 9.948170737222763e-06, "loss": 2.946915864944458, "step": 412 }, { "epoch": 0.4262125902992776, "grad_norm": 10.890074775505424, "learning_rate": 9.947304843745065e-06, "loss": 2.7982027530670166, "step": 413 }, { "epoch": 0.42724458204334365, "grad_norm": 16.493152057559257, "learning_rate": 9.946431815285307e-06, "loss": 2.9675230979919434, "step": 414 }, { "epoch": 0.4282765737874097, "grad_norm": 23.323596296720044, "learning_rate": 9.945551653102566e-06, "loss": 2.308884859085083, "step": 415 }, { "epoch": 0.4293085655314757, "grad_norm": 13.906035502172923, "learning_rate": 9.94466435846621e-06, "loss": 2.8153557777404785, "step": 416 }, { "epoch": 0.43034055727554177, "grad_norm": 13.717983567538138, "learning_rate": 9.943769932655889e-06, "loss": 2.911931037902832, "step": 417 }, { "epoch": 0.43137254901960786, "grad_norm": 9.5030223166851, "learning_rate": 9.942868376961542e-06, "loss": 2.2906694412231445, "step": 418 }, { "epoch": 0.4324045407636739, "grad_norm": 14.25839596238375, "learning_rate": 9.941959692683387e-06, "loss": 3.677119016647339, "step": 419 }, { "epoch": 0.43343653250773995, "grad_norm": 11.318600737070227, "learning_rate": 9.941043881131928e-06, "loss": 2.7575297355651855, "step": 420 }, { "epoch": 0.434468524251806, "grad_norm": 8.694948519786168, "learning_rate": 9.94012094362794e-06, "loss": 2.9048759937286377, "step": 421 }, { "epoch": 0.43550051599587203, "grad_norm": 8.718078750395678, "learning_rate": 9.939190881502484e-06, "loss": 2.418113946914673, "step": 422 }, { "epoch": 0.43653250773993807, "grad_norm": 11.446167858175109, "learning_rate": 9.93825369609689e-06, "loss": 2.754148006439209, "step": 423 }, { "epoch": 0.4375644994840041, "grad_norm": 15.718887678684286, "learning_rate": 9.93730938876276e-06, "loss": 2.9343578815460205, "step": 424 }, { "epoch": 0.43859649122807015, "grad_norm": 40.31476387336908, "learning_rate": 9.936357960861972e-06, "loss": 4.214080810546875, "step": 425 }, { "epoch": 0.43962848297213625, "grad_norm": 11.539263308149694, "learning_rate": 9.935399413766672e-06, "loss": 2.7992300987243652, "step": 426 }, { "epoch": 0.4406604747162023, "grad_norm": 10.944370426001614, "learning_rate": 9.934433748859275e-06, "loss": 2.484590530395508, "step": 427 }, { "epoch": 0.44169246646026833, "grad_norm": 8.26905923788389, "learning_rate": 9.933460967532454e-06, "loss": 2.95884370803833, "step": 428 }, { "epoch": 0.44272445820433437, "grad_norm": 10.549063851492853, "learning_rate": 9.932481071189153e-06, "loss": 3.0373470783233643, "step": 429 }, { "epoch": 0.4437564499484004, "grad_norm": 8.449015598464936, "learning_rate": 9.931494061242573e-06, "loss": 2.8038151264190674, "step": 430 }, { "epoch": 0.44478844169246645, "grad_norm": 8.938806234909366, "learning_rate": 9.930499939116176e-06, "loss": 2.8492302894592285, "step": 431 }, { "epoch": 0.4458204334365325, "grad_norm": 15.990285713057496, "learning_rate": 9.929498706243681e-06, "loss": 3.027756929397583, "step": 432 }, { "epoch": 0.44685242518059853, "grad_norm": 8.636867273214362, "learning_rate": 9.928490364069061e-06, "loss": 2.9714016914367676, "step": 433 }, { "epoch": 0.44788441692466463, "grad_norm": 19.47087373298744, "learning_rate": 9.927474914046543e-06, "loss": 2.5500893592834473, "step": 434 }, { "epoch": 0.44891640866873067, "grad_norm": 11.726357779590334, "learning_rate": 9.926452357640606e-06, "loss": 2.5415332317352295, "step": 435 }, { "epoch": 0.4499484004127967, "grad_norm": 12.640200129799714, "learning_rate": 9.925422696325976e-06, "loss": 3.4519457817077637, "step": 436 }, { "epoch": 0.45098039215686275, "grad_norm": 12.751227264561491, "learning_rate": 9.924385931587625e-06, "loss": 2.9185168743133545, "step": 437 }, { "epoch": 0.4520123839009288, "grad_norm": 14.350642201270977, "learning_rate": 9.923342064920771e-06, "loss": 3.1892685890197754, "step": 438 }, { "epoch": 0.45304437564499483, "grad_norm": 17.349109352530736, "learning_rate": 9.922291097830876e-06, "loss": 3.497467041015625, "step": 439 }, { "epoch": 0.4540763673890609, "grad_norm": 8.882606386936544, "learning_rate": 9.921233031833639e-06, "loss": 2.8938982486724854, "step": 440 }, { "epoch": 0.4551083591331269, "grad_norm": 12.00104895756575, "learning_rate": 9.920167868454997e-06, "loss": 2.568723201751709, "step": 441 }, { "epoch": 0.45614035087719296, "grad_norm": 12.24197775502783, "learning_rate": 9.919095609231125e-06, "loss": 2.752882957458496, "step": 442 }, { "epoch": 0.45717234262125905, "grad_norm": 10.373362572382495, "learning_rate": 9.918016255708431e-06, "loss": 2.4421474933624268, "step": 443 }, { "epoch": 0.4582043343653251, "grad_norm": 10.161332440181559, "learning_rate": 9.916929809443555e-06, "loss": 2.9970295429229736, "step": 444 }, { "epoch": 0.45923632610939114, "grad_norm": 9.34236510344498, "learning_rate": 9.915836272003365e-06, "loss": 3.05275559425354, "step": 445 }, { "epoch": 0.4602683178534572, "grad_norm": 9.96739412039589, "learning_rate": 9.914735644964955e-06, "loss": 2.963543653488159, "step": 446 }, { "epoch": 0.4613003095975232, "grad_norm": 18.452664752564154, "learning_rate": 9.913627929915643e-06, "loss": 3.117292642593384, "step": 447 }, { "epoch": 0.46233230134158926, "grad_norm": 10.880262131715845, "learning_rate": 9.912513128452974e-06, "loss": 3.0254008769989014, "step": 448 }, { "epoch": 0.4633642930856553, "grad_norm": 12.030161973072362, "learning_rate": 9.911391242184709e-06, "loss": 3.374577045440674, "step": 449 }, { "epoch": 0.46439628482972134, "grad_norm": 10.04357223387812, "learning_rate": 9.910262272728827e-06, "loss": 3.246830463409424, "step": 450 }, { "epoch": 0.46542827657378744, "grad_norm": 12.972716308863506, "learning_rate": 9.909126221713523e-06, "loss": 2.814098834991455, "step": 451 }, { "epoch": 0.4664602683178535, "grad_norm": 15.11624739030851, "learning_rate": 9.907983090777206e-06, "loss": 2.862546920776367, "step": 452 }, { "epoch": 0.4674922600619195, "grad_norm": 8.309522945339996, "learning_rate": 9.906832881568495e-06, "loss": 2.7970046997070312, "step": 453 }, { "epoch": 0.46852425180598556, "grad_norm": 8.832127930749055, "learning_rate": 9.905675595746214e-06, "loss": 2.7871201038360596, "step": 454 }, { "epoch": 0.4695562435500516, "grad_norm": 8.994774891105193, "learning_rate": 9.9045112349794e-06, "loss": 2.843500852584839, "step": 455 }, { "epoch": 0.47058823529411764, "grad_norm": 9.434601040395169, "learning_rate": 9.903339800947284e-06, "loss": 2.8510613441467285, "step": 456 }, { "epoch": 0.4716202270381837, "grad_norm": 13.487737414745894, "learning_rate": 9.902161295339306e-06, "loss": 2.999211311340332, "step": 457 }, { "epoch": 0.4726522187822497, "grad_norm": 11.537976864473686, "learning_rate": 9.900975719855103e-06, "loss": 2.8772103786468506, "step": 458 }, { "epoch": 0.47368421052631576, "grad_norm": 6.8020753807677625, "learning_rate": 9.899783076204505e-06, "loss": 2.6091156005859375, "step": 459 }, { "epoch": 0.47471620227038186, "grad_norm": 13.189197539765583, "learning_rate": 9.898583366107539e-06, "loss": 3.0862064361572266, "step": 460 }, { "epoch": 0.4757481940144479, "grad_norm": 12.492595777939266, "learning_rate": 9.897376591294419e-06, "loss": 3.177717685699463, "step": 461 }, { "epoch": 0.47678018575851394, "grad_norm": 8.163142685905079, "learning_rate": 9.896162753505554e-06, "loss": 2.645430564880371, "step": 462 }, { "epoch": 0.47781217750258, "grad_norm": 8.182725255858564, "learning_rate": 9.894941854491533e-06, "loss": 2.8407297134399414, "step": 463 }, { "epoch": 0.478844169246646, "grad_norm": 14.663125083277464, "learning_rate": 9.893713896013134e-06, "loss": 3.1082820892333984, "step": 464 }, { "epoch": 0.47987616099071206, "grad_norm": 14.128620127185377, "learning_rate": 9.892478879841312e-06, "loss": 2.8026363849639893, "step": 465 }, { "epoch": 0.4809081527347781, "grad_norm": 11.79057657992109, "learning_rate": 9.891236807757201e-06, "loss": 3.1938395500183105, "step": 466 }, { "epoch": 0.48194014447884415, "grad_norm": 20.61347482442454, "learning_rate": 9.889987681552116e-06, "loss": 2.888730049133301, "step": 467 }, { "epoch": 0.48297213622291024, "grad_norm": 9.126516869757138, "learning_rate": 9.888731503027535e-06, "loss": 2.6922078132629395, "step": 468 }, { "epoch": 0.4840041279669763, "grad_norm": 7.061744400580414, "learning_rate": 9.88746827399512e-06, "loss": 3.191213369369507, "step": 469 }, { "epoch": 0.4850361197110423, "grad_norm": 21.416335420332835, "learning_rate": 9.88619799627669e-06, "loss": 3.262622833251953, "step": 470 }, { "epoch": 0.48606811145510836, "grad_norm": 12.192570025373612, "learning_rate": 9.884920671704236e-06, "loss": 2.7565722465515137, "step": 471 }, { "epoch": 0.4871001031991744, "grad_norm": 10.743234544996307, "learning_rate": 9.883636302119911e-06, "loss": 2.4965720176696777, "step": 472 }, { "epoch": 0.48813209494324045, "grad_norm": 6.832289564849522, "learning_rate": 9.882344889376025e-06, "loss": 2.9415411949157715, "step": 473 }, { "epoch": 0.4891640866873065, "grad_norm": 14.449776531612711, "learning_rate": 9.881046435335051e-06, "loss": 3.0663790702819824, "step": 474 }, { "epoch": 0.49019607843137253, "grad_norm": 16.313898001218128, "learning_rate": 9.879740941869611e-06, "loss": 3.389232873916626, "step": 475 }, { "epoch": 0.49122807017543857, "grad_norm": 28.323859556398297, "learning_rate": 9.878428410862484e-06, "loss": 2.3527965545654297, "step": 476 }, { "epoch": 0.49226006191950467, "grad_norm": 16.000154869678152, "learning_rate": 9.877108844206596e-06, "loss": 2.9908742904663086, "step": 477 }, { "epoch": 0.4932920536635707, "grad_norm": 8.010781348355478, "learning_rate": 9.875782243805019e-06, "loss": 2.6792373657226562, "step": 478 }, { "epoch": 0.49432404540763675, "grad_norm": 15.59977702072723, "learning_rate": 9.874448611570972e-06, "loss": 2.5156683921813965, "step": 479 }, { "epoch": 0.4953560371517028, "grad_norm": 20.37273895188341, "learning_rate": 9.873107949427815e-06, "loss": 3.209371566772461, "step": 480 }, { "epoch": 0.49638802889576883, "grad_norm": 10.036822409380285, "learning_rate": 9.871760259309043e-06, "loss": 2.7667236328125, "step": 481 }, { "epoch": 0.49742002063983487, "grad_norm": 9.753320357829859, "learning_rate": 9.87040554315829e-06, "loss": 1.9972810745239258, "step": 482 }, { "epoch": 0.4984520123839009, "grad_norm": 10.272132278927545, "learning_rate": 9.869043802929322e-06, "loss": 2.9918630123138428, "step": 483 }, { "epoch": 0.49948400412796695, "grad_norm": 7.60379661096887, "learning_rate": 9.867675040586035e-06, "loss": 2.4964921474456787, "step": 484 }, { "epoch": 0.500515995872033, "grad_norm": 7.939425539221603, "learning_rate": 9.866299258102452e-06, "loss": 2.7059545516967773, "step": 485 }, { "epoch": 0.5015479876160991, "grad_norm": 9.333404161929746, "learning_rate": 9.864916457462718e-06, "loss": 2.2699975967407227, "step": 486 }, { "epoch": 0.5025799793601651, "grad_norm": 23.79614363690678, "learning_rate": 9.863526640661107e-06, "loss": 2.6487884521484375, "step": 487 }, { "epoch": 0.5036119711042312, "grad_norm": 12.805837891171706, "learning_rate": 9.862129809702006e-06, "loss": 2.6330082416534424, "step": 488 }, { "epoch": 0.5046439628482973, "grad_norm": 10.301925185020982, "learning_rate": 9.860725966599915e-06, "loss": 2.554631233215332, "step": 489 }, { "epoch": 0.5056759545923633, "grad_norm": 37.98629229944275, "learning_rate": 9.859315113379455e-06, "loss": 3.3101139068603516, "step": 490 }, { "epoch": 0.5067079463364293, "grad_norm": 7.326811124309165, "learning_rate": 9.857897252075348e-06, "loss": 2.6415815353393555, "step": 491 }, { "epoch": 0.5077399380804953, "grad_norm": 15.800853756638167, "learning_rate": 9.856472384732432e-06, "loss": 3.3660855293273926, "step": 492 }, { "epoch": 0.5087719298245614, "grad_norm": 17.332722973199935, "learning_rate": 9.855040513405642e-06, "loss": 3.0009288787841797, "step": 493 }, { "epoch": 0.5098039215686274, "grad_norm": 10.309930138772124, "learning_rate": 9.853601640160018e-06, "loss": 2.8668737411499023, "step": 494 }, { "epoch": 0.5108359133126935, "grad_norm": 9.955824429005219, "learning_rate": 9.852155767070696e-06, "loss": 2.70121169090271, "step": 495 }, { "epoch": 0.5118679050567595, "grad_norm": 14.293014023725297, "learning_rate": 9.850702896222908e-06, "loss": 2.6304683685302734, "step": 496 }, { "epoch": 0.5128998968008256, "grad_norm": 10.16377770776268, "learning_rate": 9.84924302971198e-06, "loss": 2.697707176208496, "step": 497 }, { "epoch": 0.5139318885448917, "grad_norm": 17.313925209639383, "learning_rate": 9.847776169643322e-06, "loss": 3.2866413593292236, "step": 498 }, { "epoch": 0.5149638802889577, "grad_norm": 13.041656264112651, "learning_rate": 9.846302318132437e-06, "loss": 2.739035129547119, "step": 499 }, { "epoch": 0.5159958720330238, "grad_norm": 25.977909711367786, "learning_rate": 9.844821477304904e-06, "loss": 2.803387403488159, "step": 500 }, { "epoch": 0.5170278637770898, "grad_norm": 11.25826916709719, "learning_rate": 9.843333649296387e-06, "loss": 3.2589616775512695, "step": 501 }, { "epoch": 0.5180598555211559, "grad_norm": 10.61990540469556, "learning_rate": 9.841838836252627e-06, "loss": 2.7748382091522217, "step": 502 }, { "epoch": 0.5190918472652218, "grad_norm": 12.272307422950709, "learning_rate": 9.840337040329433e-06, "loss": 2.3776865005493164, "step": 503 }, { "epoch": 0.5201238390092879, "grad_norm": 15.454489179514884, "learning_rate": 9.838828263692693e-06, "loss": 3.4385757446289062, "step": 504 }, { "epoch": 0.5211558307533539, "grad_norm": 10.462179292307349, "learning_rate": 9.837312508518355e-06, "loss": 2.903876543045044, "step": 505 }, { "epoch": 0.52218782249742, "grad_norm": 9.146943786633209, "learning_rate": 9.835789776992436e-06, "loss": 2.5005409717559814, "step": 506 }, { "epoch": 0.5232198142414861, "grad_norm": 12.200395261029715, "learning_rate": 9.834260071311013e-06, "loss": 2.875609874725342, "step": 507 }, { "epoch": 0.5242518059855521, "grad_norm": 9.905509897971436, "learning_rate": 9.832723393680222e-06, "loss": 2.530484437942505, "step": 508 }, { "epoch": 0.5252837977296182, "grad_norm": 14.932339890770075, "learning_rate": 9.83117974631625e-06, "loss": 3.1532692909240723, "step": 509 }, { "epoch": 0.5263157894736842, "grad_norm": 11.517270882974753, "learning_rate": 9.829629131445342e-06, "loss": 3.3132500648498535, "step": 510 }, { "epoch": 0.5273477812177503, "grad_norm": 10.007374901505056, "learning_rate": 9.828071551303786e-06, "loss": 3.384342670440674, "step": 511 }, { "epoch": 0.5283797729618163, "grad_norm": 14.941295583083766, "learning_rate": 9.826507008137919e-06, "loss": 2.9540984630584717, "step": 512 }, { "epoch": 0.5294117647058824, "grad_norm": 14.270089534040753, "learning_rate": 9.824935504204118e-06, "loss": 2.96120023727417, "step": 513 }, { "epoch": 0.5304437564499485, "grad_norm": 9.60285781526636, "learning_rate": 9.823357041768798e-06, "loss": 2.6130409240722656, "step": 514 }, { "epoch": 0.5314757481940144, "grad_norm": 17.84592940119971, "learning_rate": 9.82177162310841e-06, "loss": 3.43778657913208, "step": 515 }, { "epoch": 0.5325077399380805, "grad_norm": 15.641732375671388, "learning_rate": 9.820179250509442e-06, "loss": 2.6619691848754883, "step": 516 }, { "epoch": 0.5335397316821465, "grad_norm": 21.653610476568268, "learning_rate": 9.818579926268406e-06, "loss": 2.7543861865997314, "step": 517 }, { "epoch": 0.5345717234262126, "grad_norm": 16.735112469367458, "learning_rate": 9.81697365269184e-06, "loss": 2.566066265106201, "step": 518 }, { "epoch": 0.5356037151702786, "grad_norm": 9.928716464268096, "learning_rate": 9.8153604320963e-06, "loss": 2.8962950706481934, "step": 519 }, { "epoch": 0.5366357069143447, "grad_norm": 7.929089403898088, "learning_rate": 9.813740266808375e-06, "loss": 2.634678363800049, "step": 520 }, { "epoch": 0.5376676986584107, "grad_norm": 24.63429174148746, "learning_rate": 9.812113159164654e-06, "loss": 2.4516327381134033, "step": 521 }, { "epoch": 0.5386996904024768, "grad_norm": 10.602652369185357, "learning_rate": 9.810479111511748e-06, "loss": 2.5806102752685547, "step": 522 }, { "epoch": 0.5397316821465429, "grad_norm": 12.360924872138455, "learning_rate": 9.80883812620627e-06, "loss": 2.667391300201416, "step": 523 }, { "epoch": 0.5407636738906089, "grad_norm": 11.213683273229366, "learning_rate": 9.807190205614847e-06, "loss": 2.6906814575195312, "step": 524 }, { "epoch": 0.541795665634675, "grad_norm": 11.312769983405103, "learning_rate": 9.805535352114097e-06, "loss": 2.9441957473754883, "step": 525 }, { "epoch": 0.542827657378741, "grad_norm": 11.414090265004733, "learning_rate": 9.80387356809065e-06, "loss": 3.042667865753174, "step": 526 }, { "epoch": 0.543859649122807, "grad_norm": 24.401446685942332, "learning_rate": 9.802204855941118e-06, "loss": 3.2037558555603027, "step": 527 }, { "epoch": 0.544891640866873, "grad_norm": 14.234503967293701, "learning_rate": 9.800529218072112e-06, "loss": 3.2438836097717285, "step": 528 }, { "epoch": 0.5459236326109391, "grad_norm": 19.941491292892934, "learning_rate": 9.79884665690023e-06, "loss": 2.996436595916748, "step": 529 }, { "epoch": 0.5469556243550051, "grad_norm": 17.39429990283107, "learning_rate": 9.797157174852057e-06, "loss": 3.27510404586792, "step": 530 }, { "epoch": 0.5479876160990712, "grad_norm": 12.33719755107959, "learning_rate": 9.795460774364153e-06, "loss": 2.511228561401367, "step": 531 }, { "epoch": 0.5490196078431373, "grad_norm": 12.39665151922383, "learning_rate": 9.793757457883062e-06, "loss": 2.7313032150268555, "step": 532 }, { "epoch": 0.5500515995872033, "grad_norm": 10.597746898974815, "learning_rate": 9.7920472278653e-06, "loss": 3.2101497650146484, "step": 533 }, { "epoch": 0.5510835913312694, "grad_norm": 14.269140980065217, "learning_rate": 9.79033008677735e-06, "loss": 3.409496307373047, "step": 534 }, { "epoch": 0.5521155830753354, "grad_norm": 16.87834227035807, "learning_rate": 9.788606037095672e-06, "loss": 2.925279140472412, "step": 535 }, { "epoch": 0.5531475748194015, "grad_norm": 12.890765154675877, "learning_rate": 9.786875081306677e-06, "loss": 2.770125389099121, "step": 536 }, { "epoch": 0.5541795665634675, "grad_norm": 8.253261202188625, "learning_rate": 9.785137221906744e-06, "loss": 2.789903163909912, "step": 537 }, { "epoch": 0.5552115583075335, "grad_norm": 12.283379998713215, "learning_rate": 9.783392461402208e-06, "loss": 2.7387213706970215, "step": 538 }, { "epoch": 0.5562435500515995, "grad_norm": 9.957027587195357, "learning_rate": 9.781640802309356e-06, "loss": 2.5360267162323, "step": 539 }, { "epoch": 0.5572755417956656, "grad_norm": 11.516373350987816, "learning_rate": 9.779882247154419e-06, "loss": 2.6707615852355957, "step": 540 }, { "epoch": 0.5583075335397317, "grad_norm": 10.237296608456246, "learning_rate": 9.778116798473581e-06, "loss": 2.4297256469726562, "step": 541 }, { "epoch": 0.5593395252837977, "grad_norm": 12.673097956290409, "learning_rate": 9.776344458812964e-06, "loss": 3.2397093772888184, "step": 542 }, { "epoch": 0.5603715170278638, "grad_norm": 10.07521278832074, "learning_rate": 9.774565230728628e-06, "loss": 2.525027275085449, "step": 543 }, { "epoch": 0.5614035087719298, "grad_norm": 10.57130148434467, "learning_rate": 9.772779116786568e-06, "loss": 2.574157238006592, "step": 544 }, { "epoch": 0.5624355005159959, "grad_norm": 12.124957814722062, "learning_rate": 9.770986119562714e-06, "loss": 2.9182310104370117, "step": 545 }, { "epoch": 0.5634674922600619, "grad_norm": 8.965146386103498, "learning_rate": 9.769186241642912e-06, "loss": 2.629945755004883, "step": 546 }, { "epoch": 0.564499484004128, "grad_norm": 13.426207572012682, "learning_rate": 9.767379485622943e-06, "loss": 3.2727980613708496, "step": 547 }, { "epoch": 0.5655314757481941, "grad_norm": 9.160205572760614, "learning_rate": 9.765565854108503e-06, "loss": 2.322061538696289, "step": 548 }, { "epoch": 0.56656346749226, "grad_norm": 10.994969899026037, "learning_rate": 9.763745349715202e-06, "loss": 2.6975133419036865, "step": 549 }, { "epoch": 0.5675954592363261, "grad_norm": 11.994858883920255, "learning_rate": 9.761917975068564e-06, "loss": 2.7160282135009766, "step": 550 }, { "epoch": 0.5686274509803921, "grad_norm": 9.176623905737621, "learning_rate": 9.760083732804022e-06, "loss": 2.919581174850464, "step": 551 }, { "epoch": 0.5696594427244582, "grad_norm": 9.35943521250712, "learning_rate": 9.758242625566912e-06, "loss": 2.583207130432129, "step": 552 }, { "epoch": 0.5706914344685242, "grad_norm": 20.653688605226197, "learning_rate": 9.75639465601247e-06, "loss": 2.9091479778289795, "step": 553 }, { "epoch": 0.5717234262125903, "grad_norm": 10.35374830404143, "learning_rate": 9.754539826805829e-06, "loss": 2.736656904220581, "step": 554 }, { "epoch": 0.5727554179566563, "grad_norm": 8.128894062796498, "learning_rate": 9.75267814062202e-06, "loss": 2.8653979301452637, "step": 555 }, { "epoch": 0.5737874097007224, "grad_norm": 11.714637714032797, "learning_rate": 9.750809600145955e-06, "loss": 2.816713809967041, "step": 556 }, { "epoch": 0.5748194014447885, "grad_norm": 84.19076077455068, "learning_rate": 9.748934208072436e-06, "loss": 3.234724283218384, "step": 557 }, { "epoch": 0.5758513931888545, "grad_norm": 7.046513409283844, "learning_rate": 9.747051967106147e-06, "loss": 2.851330041885376, "step": 558 }, { "epoch": 0.5768833849329206, "grad_norm": 9.692926498760864, "learning_rate": 9.745162879961647e-06, "loss": 2.6581106185913086, "step": 559 }, { "epoch": 0.5779153766769866, "grad_norm": 17.793573180527815, "learning_rate": 9.743266949363368e-06, "loss": 2.938344717025757, "step": 560 }, { "epoch": 0.5789473684210527, "grad_norm": 15.080146501460122, "learning_rate": 9.741364178045615e-06, "loss": 2.7053351402282715, "step": 561 }, { "epoch": 0.5799793601651186, "grad_norm": 13.655409599016627, "learning_rate": 9.739454568752556e-06, "loss": 3.4648630619049072, "step": 562 }, { "epoch": 0.5810113519091847, "grad_norm": 9.057246477179403, "learning_rate": 9.737538124238222e-06, "loss": 2.716965436935425, "step": 563 }, { "epoch": 0.5820433436532507, "grad_norm": 13.906963767108351, "learning_rate": 9.735614847266502e-06, "loss": 3.076453924179077, "step": 564 }, { "epoch": 0.5830753353973168, "grad_norm": 15.381126441512325, "learning_rate": 9.733684740611134e-06, "loss": 3.1801598072052, "step": 565 }, { "epoch": 0.5841073271413829, "grad_norm": 9.80808696167901, "learning_rate": 9.731747807055713e-06, "loss": 2.517850875854492, "step": 566 }, { "epoch": 0.5851393188854489, "grad_norm": 9.828092721350625, "learning_rate": 9.729804049393677e-06, "loss": 3.1751341819763184, "step": 567 }, { "epoch": 0.586171310629515, "grad_norm": 12.043242343137528, "learning_rate": 9.727853470428301e-06, "loss": 2.8291687965393066, "step": 568 }, { "epoch": 0.587203302373581, "grad_norm": 23.52905268157183, "learning_rate": 9.725896072972707e-06, "loss": 3.2142086029052734, "step": 569 }, { "epoch": 0.5882352941176471, "grad_norm": 10.451472602985962, "learning_rate": 9.723931859849842e-06, "loss": 2.8336706161499023, "step": 570 }, { "epoch": 0.5892672858617131, "grad_norm": 10.202907257116511, "learning_rate": 9.721960833892485e-06, "loss": 2.876638412475586, "step": 571 }, { "epoch": 0.5902992776057792, "grad_norm": 10.20554245805355, "learning_rate": 9.719982997943245e-06, "loss": 2.641693115234375, "step": 572 }, { "epoch": 0.5913312693498453, "grad_norm": 8.864986065419998, "learning_rate": 9.717998354854545e-06, "loss": 3.199042797088623, "step": 573 }, { "epoch": 0.5923632610939112, "grad_norm": 30.978260310733006, "learning_rate": 9.716006907488629e-06, "loss": 4.51623010635376, "step": 574 }, { "epoch": 0.5933952528379773, "grad_norm": 14.544403846601538, "learning_rate": 9.714008658717558e-06, "loss": 3.3321194648742676, "step": 575 }, { "epoch": 0.5944272445820433, "grad_norm": 15.335917515161709, "learning_rate": 9.712003611423194e-06, "loss": 2.279353618621826, "step": 576 }, { "epoch": 0.5954592363261094, "grad_norm": 17.30521345042098, "learning_rate": 9.709991768497208e-06, "loss": 3.110170364379883, "step": 577 }, { "epoch": 0.5964912280701754, "grad_norm": 13.0244368033095, "learning_rate": 9.707973132841072e-06, "loss": 3.0601935386657715, "step": 578 }, { "epoch": 0.5975232198142415, "grad_norm": 17.783099723375052, "learning_rate": 9.705947707366054e-06, "loss": 2.3580307960510254, "step": 579 }, { "epoch": 0.5985552115583075, "grad_norm": 11.222725042910128, "learning_rate": 9.703915494993215e-06, "loss": 2.43118953704834, "step": 580 }, { "epoch": 0.5995872033023736, "grad_norm": 20.058317947787405, "learning_rate": 9.701876498653402e-06, "loss": 3.334968090057373, "step": 581 }, { "epoch": 0.6006191950464397, "grad_norm": 16.270739740240145, "learning_rate": 9.699830721287246e-06, "loss": 2.9842143058776855, "step": 582 }, { "epoch": 0.6016511867905057, "grad_norm": 11.484139834647996, "learning_rate": 9.69777816584516e-06, "loss": 2.535402297973633, "step": 583 }, { "epoch": 0.6026831785345718, "grad_norm": 12.510489500637012, "learning_rate": 9.695718835287328e-06, "loss": 2.87693452835083, "step": 584 }, { "epoch": 0.6037151702786377, "grad_norm": 7.376993753654825, "learning_rate": 9.69365273258371e-06, "loss": 3.2447357177734375, "step": 585 }, { "epoch": 0.6047471620227038, "grad_norm": 9.555009770863176, "learning_rate": 9.691579860714033e-06, "loss": 2.7599704265594482, "step": 586 }, { "epoch": 0.6057791537667698, "grad_norm": 10.740584867704138, "learning_rate": 9.689500222667782e-06, "loss": 3.2146971225738525, "step": 587 }, { "epoch": 0.6068111455108359, "grad_norm": 7.871154664025923, "learning_rate": 9.6874138214442e-06, "loss": 2.678419589996338, "step": 588 }, { "epoch": 0.6078431372549019, "grad_norm": 10.949030762287666, "learning_rate": 9.685320660052286e-06, "loss": 2.680488109588623, "step": 589 }, { "epoch": 0.608875128998968, "grad_norm": 10.246851497890871, "learning_rate": 9.683220741510793e-06, "loss": 2.916963577270508, "step": 590 }, { "epoch": 0.6099071207430341, "grad_norm": 18.75558796180508, "learning_rate": 9.68111406884821e-06, "loss": 3.0434699058532715, "step": 591 }, { "epoch": 0.6109391124871001, "grad_norm": 15.779253328657708, "learning_rate": 9.679000645102771e-06, "loss": 4.273288726806641, "step": 592 }, { "epoch": 0.6119711042311662, "grad_norm": 38.01520037711664, "learning_rate": 9.676880473322452e-06, "loss": 2.3172545433044434, "step": 593 }, { "epoch": 0.6130030959752322, "grad_norm": 8.482705813777587, "learning_rate": 9.67475355656495e-06, "loss": 3.028085470199585, "step": 594 }, { "epoch": 0.6140350877192983, "grad_norm": 7.786458035214588, "learning_rate": 9.6726198978977e-06, "loss": 2.7112159729003906, "step": 595 }, { "epoch": 0.6150670794633643, "grad_norm": 7.547208333658604, "learning_rate": 9.670479500397854e-06, "loss": 2.4657599925994873, "step": 596 }, { "epoch": 0.6160990712074303, "grad_norm": 17.276597522101635, "learning_rate": 9.668332367152282e-06, "loss": 2.915532350540161, "step": 597 }, { "epoch": 0.6171310629514963, "grad_norm": 11.229378917400053, "learning_rate": 9.666178501257573e-06, "loss": 2.6651906967163086, "step": 598 }, { "epoch": 0.6181630546955624, "grad_norm": 11.092754648834564, "learning_rate": 9.664017905820021e-06, "loss": 2.791090965270996, "step": 599 }, { "epoch": 0.6191950464396285, "grad_norm": 8.366193431753203, "learning_rate": 9.66185058395563e-06, "loss": 2.9052670001983643, "step": 600 }, { "epoch": 0.6202270381836945, "grad_norm": 9.084523683371547, "learning_rate": 9.6596765387901e-06, "loss": 2.0730695724487305, "step": 601 }, { "epoch": 0.6212590299277606, "grad_norm": 18.34019433810752, "learning_rate": 9.657495773458832e-06, "loss": 2.617827892303467, "step": 602 }, { "epoch": 0.6222910216718266, "grad_norm": 12.29576293105994, "learning_rate": 9.655308291106915e-06, "loss": 2.3276329040527344, "step": 603 }, { "epoch": 0.6233230134158927, "grad_norm": 14.840325418207792, "learning_rate": 9.653114094889128e-06, "loss": 3.239396095275879, "step": 604 }, { "epoch": 0.6243550051599587, "grad_norm": 9.56777055262741, "learning_rate": 9.65091318796993e-06, "loss": 2.225588321685791, "step": 605 }, { "epoch": 0.6253869969040248, "grad_norm": 17.408599977515152, "learning_rate": 9.64870557352346e-06, "loss": 4.101033687591553, "step": 606 }, { "epoch": 0.6264189886480909, "grad_norm": 12.6799104424994, "learning_rate": 9.646491254733533e-06, "loss": 3.09356689453125, "step": 607 }, { "epoch": 0.6274509803921569, "grad_norm": 10.671217767975829, "learning_rate": 9.644270234793625e-06, "loss": 2.3732972145080566, "step": 608 }, { "epoch": 0.628482972136223, "grad_norm": 37.52455438840146, "learning_rate": 9.642042516906884e-06, "loss": 2.3487610816955566, "step": 609 }, { "epoch": 0.6295149638802889, "grad_norm": 9.04452611714464, "learning_rate": 9.639808104286118e-06, "loss": 2.822605609893799, "step": 610 }, { "epoch": 0.630546955624355, "grad_norm": 7.354451143517812, "learning_rate": 9.637567000153783e-06, "loss": 2.935185194015503, "step": 611 }, { "epoch": 0.631578947368421, "grad_norm": 7.332551011890475, "learning_rate": 9.63531920774199e-06, "loss": 2.9632339477539062, "step": 612 }, { "epoch": 0.6326109391124871, "grad_norm": 15.10098765191982, "learning_rate": 9.6330647302925e-06, "loss": 2.4484119415283203, "step": 613 }, { "epoch": 0.6336429308565531, "grad_norm": 7.87243647045685, "learning_rate": 9.630803571056709e-06, "loss": 2.841092109680176, "step": 614 }, { "epoch": 0.6346749226006192, "grad_norm": 17.622392235945, "learning_rate": 9.62853573329565e-06, "loss": 3.339503526687622, "step": 615 }, { "epoch": 0.6357069143446853, "grad_norm": 16.27308797381183, "learning_rate": 9.62626122027999e-06, "loss": 2.700296401977539, "step": 616 }, { "epoch": 0.6367389060887513, "grad_norm": 15.908772084211096, "learning_rate": 9.62398003529002e-06, "loss": 2.5863516330718994, "step": 617 }, { "epoch": 0.6377708978328174, "grad_norm": 7.9769771271762675, "learning_rate": 9.621692181615657e-06, "loss": 2.5605409145355225, "step": 618 }, { "epoch": 0.6388028895768834, "grad_norm": 17.97539267026468, "learning_rate": 9.619397662556434e-06, "loss": 1.9493972063064575, "step": 619 }, { "epoch": 0.6398348813209495, "grad_norm": 11.543738479270724, "learning_rate": 9.617096481421498e-06, "loss": 2.926856756210327, "step": 620 }, { "epoch": 0.6408668730650154, "grad_norm": 19.54677543508397, "learning_rate": 9.6147886415296e-06, "loss": 3.1197805404663086, "step": 621 }, { "epoch": 0.6418988648090815, "grad_norm": 7.376107725428685, "learning_rate": 9.612474146209097e-06, "loss": 2.92832612991333, "step": 622 }, { "epoch": 0.6429308565531475, "grad_norm": 12.572511377197603, "learning_rate": 9.610152998797946e-06, "loss": 2.574267864227295, "step": 623 }, { "epoch": 0.6439628482972136, "grad_norm": 10.73391934702385, "learning_rate": 9.607825202643696e-06, "loss": 2.7154815196990967, "step": 624 }, { "epoch": 0.6449948400412797, "grad_norm": 25.74792746698063, "learning_rate": 9.605490761103485e-06, "loss": 3.2330567836761475, "step": 625 }, { "epoch": 0.6460268317853457, "grad_norm": 8.879203460607043, "learning_rate": 9.60314967754403e-06, "loss": 2.8523783683776855, "step": 626 }, { "epoch": 0.6470588235294118, "grad_norm": 20.30571917980229, "learning_rate": 9.600801955341638e-06, "loss": 3.3165574073791504, "step": 627 }, { "epoch": 0.6480908152734778, "grad_norm": 9.494600192580265, "learning_rate": 9.598447597882181e-06, "loss": 2.7943115234375, "step": 628 }, { "epoch": 0.6491228070175439, "grad_norm": 14.614501521119585, "learning_rate": 9.596086608561105e-06, "loss": 3.095608711242676, "step": 629 }, { "epoch": 0.6501547987616099, "grad_norm": 11.426320902321967, "learning_rate": 9.593718990783415e-06, "loss": 3.028874397277832, "step": 630 }, { "epoch": 0.651186790505676, "grad_norm": 9.551960197143908, "learning_rate": 9.591344747963685e-06, "loss": 2.879570245742798, "step": 631 }, { "epoch": 0.6522187822497421, "grad_norm": 10.273847483188757, "learning_rate": 9.588963883526033e-06, "loss": 2.807219982147217, "step": 632 }, { "epoch": 0.653250773993808, "grad_norm": 10.330349921653404, "learning_rate": 9.586576400904135e-06, "loss": 2.7486634254455566, "step": 633 }, { "epoch": 0.6542827657378741, "grad_norm": 8.93371321322733, "learning_rate": 9.584182303541205e-06, "loss": 2.0055630207061768, "step": 634 }, { "epoch": 0.6553147574819401, "grad_norm": 16.71564616123394, "learning_rate": 9.581781594890002e-06, "loss": 2.5305252075195312, "step": 635 }, { "epoch": 0.6563467492260062, "grad_norm": 19.66138410376843, "learning_rate": 9.579374278412819e-06, "loss": 2.6674296855926514, "step": 636 }, { "epoch": 0.6573787409700722, "grad_norm": 7.878556702040047, "learning_rate": 9.576960357581475e-06, "loss": 2.5877861976623535, "step": 637 }, { "epoch": 0.6584107327141383, "grad_norm": 21.148865665043417, "learning_rate": 9.574539835877316e-06, "loss": 2.3358216285705566, "step": 638 }, { "epoch": 0.6594427244582043, "grad_norm": 27.193231781342472, "learning_rate": 9.572112716791214e-06, "loss": 3.3498458862304688, "step": 639 }, { "epoch": 0.6604747162022704, "grad_norm": 11.406472675869308, "learning_rate": 9.569679003823542e-06, "loss": 3.41530704498291, "step": 640 }, { "epoch": 0.6615067079463365, "grad_norm": 8.61944457154882, "learning_rate": 9.567238700484195e-06, "loss": 2.8429856300354004, "step": 641 }, { "epoch": 0.6625386996904025, "grad_norm": 8.399879511973081, "learning_rate": 9.564791810292569e-06, "loss": 2.522237777709961, "step": 642 }, { "epoch": 0.6635706914344686, "grad_norm": 7.102928350396439, "learning_rate": 9.562338336777556e-06, "loss": 2.6794557571411133, "step": 643 }, { "epoch": 0.6646026831785345, "grad_norm": 9.367704921473894, "learning_rate": 9.559878283477546e-06, "loss": 2.757814407348633, "step": 644 }, { "epoch": 0.6656346749226006, "grad_norm": 11.857632395196745, "learning_rate": 9.557411653940416e-06, "loss": 1.3053548336029053, "step": 645 }, { "epoch": 0.6666666666666666, "grad_norm": 8.264877117521738, "learning_rate": 9.554938451723533e-06, "loss": 2.681445837020874, "step": 646 }, { "epoch": 0.6676986584107327, "grad_norm": 11.91672847995356, "learning_rate": 9.552458680393732e-06, "loss": 3.016225576400757, "step": 647 }, { "epoch": 0.6687306501547987, "grad_norm": 13.22285021053636, "learning_rate": 9.549972343527336e-06, "loss": 2.237546920776367, "step": 648 }, { "epoch": 0.6697626418988648, "grad_norm": 13.890816027625009, "learning_rate": 9.547479444710125e-06, "loss": 2.1632239818573, "step": 649 }, { "epoch": 0.6707946336429309, "grad_norm": 13.968055221199034, "learning_rate": 9.544979987537348e-06, "loss": 2.8767595291137695, "step": 650 }, { "epoch": 0.6718266253869969, "grad_norm": 10.572301879287217, "learning_rate": 9.54247397561371e-06, "loss": 3.2392611503601074, "step": 651 }, { "epoch": 0.672858617131063, "grad_norm": 8.69083114193443, "learning_rate": 9.539961412553375e-06, "loss": 3.211719512939453, "step": 652 }, { "epoch": 0.673890608875129, "grad_norm": 15.854699726954683, "learning_rate": 9.537442301979947e-06, "loss": 2.769451379776001, "step": 653 }, { "epoch": 0.6749226006191951, "grad_norm": 9.123769838722273, "learning_rate": 9.534916647526477e-06, "loss": 2.761396646499634, "step": 654 }, { "epoch": 0.675954592363261, "grad_norm": 15.534876797351453, "learning_rate": 9.532384452835457e-06, "loss": 2.5450034141540527, "step": 655 }, { "epoch": 0.6769865841073271, "grad_norm": 19.84524192746136, "learning_rate": 9.529845721558802e-06, "loss": 2.847796678543091, "step": 656 }, { "epoch": 0.6780185758513931, "grad_norm": 22.76040744388083, "learning_rate": 9.527300457357862e-06, "loss": 3.6643857955932617, "step": 657 }, { "epoch": 0.6790505675954592, "grad_norm": 13.739512021459273, "learning_rate": 9.524748663903408e-06, "loss": 2.9481019973754883, "step": 658 }, { "epoch": 0.6800825593395253, "grad_norm": 8.044318217381289, "learning_rate": 9.52219034487562e-06, "loss": 3.0928149223327637, "step": 659 }, { "epoch": 0.6811145510835913, "grad_norm": 10.299202871378856, "learning_rate": 9.5196255039641e-06, "loss": 3.110884666442871, "step": 660 }, { "epoch": 0.6821465428276574, "grad_norm": 9.923669131740587, "learning_rate": 9.517054144867852e-06, "loss": 2.8850812911987305, "step": 661 }, { "epoch": 0.6831785345717234, "grad_norm": 8.06974951581709, "learning_rate": 9.514476271295274e-06, "loss": 3.0151658058166504, "step": 662 }, { "epoch": 0.6842105263157895, "grad_norm": 10.70998071656242, "learning_rate": 9.511891886964167e-06, "loss": 2.7311177253723145, "step": 663 }, { "epoch": 0.6852425180598555, "grad_norm": 21.25500131920381, "learning_rate": 9.50930099560172e-06, "loss": 2.797929286956787, "step": 664 }, { "epoch": 0.6862745098039216, "grad_norm": 12.353219706840553, "learning_rate": 9.506703600944504e-06, "loss": 2.719322681427002, "step": 665 }, { "epoch": 0.6873065015479877, "grad_norm": 9.37526977212679, "learning_rate": 9.504099706738472e-06, "loss": 2.0463595390319824, "step": 666 }, { "epoch": 0.6883384932920537, "grad_norm": 12.137096463786149, "learning_rate": 9.501489316738945e-06, "loss": 2.929328441619873, "step": 667 }, { "epoch": 0.6893704850361198, "grad_norm": 17.03028609691427, "learning_rate": 9.498872434710624e-06, "loss": 2.685189723968506, "step": 668 }, { "epoch": 0.6904024767801857, "grad_norm": 11.813969893205455, "learning_rate": 9.496249064427557e-06, "loss": 2.399085521697998, "step": 669 }, { "epoch": 0.6914344685242518, "grad_norm": 10.433948381325353, "learning_rate": 9.493619209673164e-06, "loss": 2.962214708328247, "step": 670 }, { "epoch": 0.6924664602683178, "grad_norm": 10.924256879246604, "learning_rate": 9.490982874240206e-06, "loss": 2.994148015975952, "step": 671 }, { "epoch": 0.6934984520123839, "grad_norm": 10.777410886774046, "learning_rate": 9.488340061930797e-06, "loss": 3.059415340423584, "step": 672 }, { "epoch": 0.6945304437564499, "grad_norm": 30.990766907842698, "learning_rate": 9.485690776556388e-06, "loss": 2.916910409927368, "step": 673 }, { "epoch": 0.695562435500516, "grad_norm": 15.468140122707947, "learning_rate": 9.48303502193777e-06, "loss": 2.9391396045684814, "step": 674 }, { "epoch": 0.6965944272445821, "grad_norm": 14.007942004032593, "learning_rate": 9.48037280190506e-06, "loss": 3.11044979095459, "step": 675 }, { "epoch": 0.6976264189886481, "grad_norm": 8.4366885996248, "learning_rate": 9.477704120297698e-06, "loss": 2.9738566875457764, "step": 676 }, { "epoch": 0.6986584107327142, "grad_norm": 8.798963190200652, "learning_rate": 9.475028980964447e-06, "loss": 2.7472972869873047, "step": 677 }, { "epoch": 0.6996904024767802, "grad_norm": 20.733085450347474, "learning_rate": 9.472347387763382e-06, "loss": 3.2058520317077637, "step": 678 }, { "epoch": 0.7007223942208463, "grad_norm": 10.073546129911945, "learning_rate": 9.469659344561886e-06, "loss": 2.298720359802246, "step": 679 }, { "epoch": 0.7017543859649122, "grad_norm": 9.078790896884078, "learning_rate": 9.46696485523664e-06, "loss": 3.1304984092712402, "step": 680 }, { "epoch": 0.7027863777089783, "grad_norm": 18.362405319861182, "learning_rate": 9.464263923673629e-06, "loss": 2.650181293487549, "step": 681 }, { "epoch": 0.7038183694530443, "grad_norm": 19.699414812583893, "learning_rate": 9.461556553768124e-06, "loss": 3.0006284713745117, "step": 682 }, { "epoch": 0.7048503611971104, "grad_norm": 8.446438400335564, "learning_rate": 9.458842749424682e-06, "loss": 2.7441043853759766, "step": 683 }, { "epoch": 0.7058823529411765, "grad_norm": 26.742234328847115, "learning_rate": 9.45612251455714e-06, "loss": 3.2146198749542236, "step": 684 }, { "epoch": 0.7069143446852425, "grad_norm": 13.584554757600017, "learning_rate": 9.453395853088609e-06, "loss": 3.1827893257141113, "step": 685 }, { "epoch": 0.7079463364293086, "grad_norm": 6.7388482588659695, "learning_rate": 9.450662768951468e-06, "loss": 2.9448766708374023, "step": 686 }, { "epoch": 0.7089783281733746, "grad_norm": 11.127788465095271, "learning_rate": 9.447923266087361e-06, "loss": 3.136505603790283, "step": 687 }, { "epoch": 0.7100103199174407, "grad_norm": 14.989815281265162, "learning_rate": 9.445177348447187e-06, "loss": 3.103586196899414, "step": 688 }, { "epoch": 0.7110423116615067, "grad_norm": 14.83033037187033, "learning_rate": 9.442425019991097e-06, "loss": 2.857605218887329, "step": 689 }, { "epoch": 0.7120743034055728, "grad_norm": 16.11429528022689, "learning_rate": 9.439666284688486e-06, "loss": 3.428617000579834, "step": 690 }, { "epoch": 0.7131062951496389, "grad_norm": 10.602945033932661, "learning_rate": 9.436901146517991e-06, "loss": 2.8772683143615723, "step": 691 }, { "epoch": 0.7141382868937048, "grad_norm": 8.166056489516002, "learning_rate": 9.434129609467484e-06, "loss": 2.5252692699432373, "step": 692 }, { "epoch": 0.7151702786377709, "grad_norm": 9.608848273921893, "learning_rate": 9.43135167753406e-06, "loss": 2.7914929389953613, "step": 693 }, { "epoch": 0.7162022703818369, "grad_norm": 9.334774312998807, "learning_rate": 9.428567354724047e-06, "loss": 2.8287720680236816, "step": 694 }, { "epoch": 0.717234262125903, "grad_norm": 10.683021997652697, "learning_rate": 9.425776645052977e-06, "loss": 2.9784929752349854, "step": 695 }, { "epoch": 0.718266253869969, "grad_norm": 22.0759217459894, "learning_rate": 9.422979552545604e-06, "loss": 2.3638341426849365, "step": 696 }, { "epoch": 0.7192982456140351, "grad_norm": 17.421648915610042, "learning_rate": 9.420176081235882e-06, "loss": 3.194939374923706, "step": 697 }, { "epoch": 0.7203302373581011, "grad_norm": 10.263567225837505, "learning_rate": 9.417366235166962e-06, "loss": 2.8977842330932617, "step": 698 }, { "epoch": 0.7213622291021672, "grad_norm": 62.23893520113241, "learning_rate": 9.414550018391197e-06, "loss": 2.399980068206787, "step": 699 }, { "epoch": 0.7223942208462333, "grad_norm": 43.01592458636682, "learning_rate": 9.411727434970121e-06, "loss": 2.2690107822418213, "step": 700 }, { "epoch": 0.7234262125902993, "grad_norm": 9.09760035353303, "learning_rate": 9.408898488974453e-06, "loss": 1.8195793628692627, "step": 701 }, { "epoch": 0.7244582043343654, "grad_norm": 20.82039863664745, "learning_rate": 9.406063184484084e-06, "loss": 3.671936273574829, "step": 702 }, { "epoch": 0.7254901960784313, "grad_norm": 22.26006983128521, "learning_rate": 9.40322152558808e-06, "loss": 2.808476448059082, "step": 703 }, { "epoch": 0.7265221878224974, "grad_norm": 12.786160596812799, "learning_rate": 9.400373516384671e-06, "loss": 3.3824217319488525, "step": 704 }, { "epoch": 0.7275541795665634, "grad_norm": 13.227887605235063, "learning_rate": 9.397519160981239e-06, "loss": 3.272808313369751, "step": 705 }, { "epoch": 0.7285861713106295, "grad_norm": 21.125694529711843, "learning_rate": 9.394658463494328e-06, "loss": 2.741298198699951, "step": 706 }, { "epoch": 0.7296181630546955, "grad_norm": 9.597190596105376, "learning_rate": 9.391791428049622e-06, "loss": 2.583601951599121, "step": 707 }, { "epoch": 0.7306501547987616, "grad_norm": 13.673553563405285, "learning_rate": 9.388918058781947e-06, "loss": 2.6293039321899414, "step": 708 }, { "epoch": 0.7316821465428277, "grad_norm": 14.809515080215478, "learning_rate": 9.386038359835265e-06, "loss": 2.608527660369873, "step": 709 }, { "epoch": 0.7327141382868937, "grad_norm": 20.749093596054855, "learning_rate": 9.383152335362664e-06, "loss": 3.150191307067871, "step": 710 }, { "epoch": 0.7337461300309598, "grad_norm": 12.70554197774949, "learning_rate": 9.380259989526358e-06, "loss": 2.7562756538391113, "step": 711 }, { "epoch": 0.7347781217750258, "grad_norm": 7.911315504138644, "learning_rate": 9.377361326497673e-06, "loss": 3.000636100769043, "step": 712 }, { "epoch": 0.7358101135190919, "grad_norm": 18.777551104783374, "learning_rate": 9.374456350457052e-06, "loss": 3.5504672527313232, "step": 713 }, { "epoch": 0.7368421052631579, "grad_norm": 15.56092683279138, "learning_rate": 9.371545065594038e-06, "loss": 2.754519462585449, "step": 714 }, { "epoch": 0.737874097007224, "grad_norm": 14.257959884490688, "learning_rate": 9.368627476107275e-06, "loss": 3.421858310699463, "step": 715 }, { "epoch": 0.7389060887512899, "grad_norm": 11.738492800659309, "learning_rate": 9.365703586204495e-06, "loss": 2.8414125442504883, "step": 716 }, { "epoch": 0.739938080495356, "grad_norm": 7.684516652147754, "learning_rate": 9.36277340010252e-06, "loss": 2.511709690093994, "step": 717 }, { "epoch": 0.7409700722394221, "grad_norm": 7.673149105995348, "learning_rate": 9.359836922027255e-06, "loss": 2.693768262863159, "step": 718 }, { "epoch": 0.7420020639834881, "grad_norm": 12.645958239160871, "learning_rate": 9.356894156213674e-06, "loss": 2.4622414112091064, "step": 719 }, { "epoch": 0.7430340557275542, "grad_norm": 10.063934028161487, "learning_rate": 9.353945106905822e-06, "loss": 2.7610793113708496, "step": 720 }, { "epoch": 0.7440660474716202, "grad_norm": 28.87732507966042, "learning_rate": 9.350989778356804e-06, "loss": 2.6496834754943848, "step": 721 }, { "epoch": 0.7450980392156863, "grad_norm": 16.04099161626254, "learning_rate": 9.348028174828785e-06, "loss": 2.8012263774871826, "step": 722 }, { "epoch": 0.7461300309597523, "grad_norm": 18.863090632580292, "learning_rate": 9.345060300592976e-06, "loss": 2.623215675354004, "step": 723 }, { "epoch": 0.7471620227038184, "grad_norm": 12.326353277057443, "learning_rate": 9.342086159929629e-06, "loss": 2.9217588901519775, "step": 724 }, { "epoch": 0.7481940144478845, "grad_norm": 8.179220092438687, "learning_rate": 9.339105757128042e-06, "loss": 2.6204776763916016, "step": 725 }, { "epoch": 0.7492260061919505, "grad_norm": 7.219516548076823, "learning_rate": 9.336119096486538e-06, "loss": 2.5338289737701416, "step": 726 }, { "epoch": 0.7502579979360166, "grad_norm": 7.4288592447235064, "learning_rate": 9.333126182312466e-06, "loss": 2.7727599143981934, "step": 727 }, { "epoch": 0.7512899896800825, "grad_norm": 8.727310813003266, "learning_rate": 9.330127018922195e-06, "loss": 2.846273899078369, "step": 728 }, { "epoch": 0.7523219814241486, "grad_norm": 13.12477588101624, "learning_rate": 9.327121610641102e-06, "loss": 2.754586696624756, "step": 729 }, { "epoch": 0.7533539731682146, "grad_norm": 7.191021167663748, "learning_rate": 9.324109961803578e-06, "loss": 2.8355979919433594, "step": 730 }, { "epoch": 0.7543859649122807, "grad_norm": 9.763856623533446, "learning_rate": 9.321092076753009e-06, "loss": 2.9621317386627197, "step": 731 }, { "epoch": 0.7554179566563467, "grad_norm": 14.955015033358134, "learning_rate": 9.318067959841776e-06, "loss": 3.0515987873077393, "step": 732 }, { "epoch": 0.7564499484004128, "grad_norm": 12.869674986241153, "learning_rate": 9.315037615431247e-06, "loss": 3.477961540222168, "step": 733 }, { "epoch": 0.7574819401444789, "grad_norm": 33.030189464635995, "learning_rate": 9.312001047891772e-06, "loss": 3.993717670440674, "step": 734 }, { "epoch": 0.7585139318885449, "grad_norm": 11.639652244898626, "learning_rate": 9.308958261602677e-06, "loss": 3.2335915565490723, "step": 735 }, { "epoch": 0.759545923632611, "grad_norm": 10.336777347057009, "learning_rate": 9.305909260952255e-06, "loss": 1.841538429260254, "step": 736 }, { "epoch": 0.760577915376677, "grad_norm": 10.528463414308874, "learning_rate": 9.302854050337761e-06, "loss": 2.6561760902404785, "step": 737 }, { "epoch": 0.7616099071207431, "grad_norm": 11.574953224265704, "learning_rate": 9.299792634165407e-06, "loss": 2.8268561363220215, "step": 738 }, { "epoch": 0.762641898864809, "grad_norm": 10.342309275853001, "learning_rate": 9.296725016850354e-06, "loss": 2.692314624786377, "step": 739 }, { "epoch": 0.7636738906088751, "grad_norm": 11.830732967318072, "learning_rate": 9.29365120281671e-06, "loss": 2.5520989894866943, "step": 740 }, { "epoch": 0.7647058823529411, "grad_norm": 10.084933673579913, "learning_rate": 9.290571196497511e-06, "loss": 2.9406180381774902, "step": 741 }, { "epoch": 0.7657378740970072, "grad_norm": 19.141054972203882, "learning_rate": 9.287485002334732e-06, "loss": 2.550870418548584, "step": 742 }, { "epoch": 0.7667698658410733, "grad_norm": 12.137785304288148, "learning_rate": 9.284392624779271e-06, "loss": 2.932023763656616, "step": 743 }, { "epoch": 0.7678018575851393, "grad_norm": 12.319251681417937, "learning_rate": 9.28129406829094e-06, "loss": 2.9180939197540283, "step": 744 }, { "epoch": 0.7688338493292054, "grad_norm": 17.67587880952499, "learning_rate": 9.278189337338461e-06, "loss": 2.494802236557007, "step": 745 }, { "epoch": 0.7698658410732714, "grad_norm": 17.043019392132898, "learning_rate": 9.275078436399469e-06, "loss": 2.905942440032959, "step": 746 }, { "epoch": 0.7708978328173375, "grad_norm": 10.736717086056796, "learning_rate": 9.27196136996049e-06, "loss": 2.4120588302612305, "step": 747 }, { "epoch": 0.7719298245614035, "grad_norm": 14.60363403718915, "learning_rate": 9.268838142516943e-06, "loss": 2.5547168254852295, "step": 748 }, { "epoch": 0.7729618163054696, "grad_norm": 22.435801897090034, "learning_rate": 9.265708758573135e-06, "loss": 2.870389699935913, "step": 749 }, { "epoch": 0.7739938080495357, "grad_norm": 11.20773230740112, "learning_rate": 9.262573222642249e-06, "loss": 3.0618457794189453, "step": 750 }, { "epoch": 0.7750257997936016, "grad_norm": 37.50198419802748, "learning_rate": 9.259431539246343e-06, "loss": 2.7579920291900635, "step": 751 }, { "epoch": 0.7760577915376677, "grad_norm": 8.921780765895253, "learning_rate": 9.256283712916337e-06, "loss": 3.263230323791504, "step": 752 }, { "epoch": 0.7770897832817337, "grad_norm": 51.1999059027526, "learning_rate": 9.253129748192016e-06, "loss": 2.0034947395324707, "step": 753 }, { "epoch": 0.7781217750257998, "grad_norm": 14.747805473660467, "learning_rate": 9.249969649622013e-06, "loss": 3.345240592956543, "step": 754 }, { "epoch": 0.7791537667698658, "grad_norm": 16.75723048704436, "learning_rate": 9.246803421763806e-06, "loss": 3.153355598449707, "step": 755 }, { "epoch": 0.7801857585139319, "grad_norm": 9.834318076373075, "learning_rate": 9.24363106918372e-06, "loss": 2.8869364261627197, "step": 756 }, { "epoch": 0.7812177502579979, "grad_norm": 6.540745571633765, "learning_rate": 9.240452596456906e-06, "loss": 2.6238231658935547, "step": 757 }, { "epoch": 0.782249742002064, "grad_norm": 11.314123597573413, "learning_rate": 9.237268008167345e-06, "loss": 2.992520570755005, "step": 758 }, { "epoch": 0.7832817337461301, "grad_norm": 12.923401981866071, "learning_rate": 9.234077308907836e-06, "loss": 2.7931909561157227, "step": 759 }, { "epoch": 0.7843137254901961, "grad_norm": 16.65804955953843, "learning_rate": 9.230880503279991e-06, "loss": 2.8888728618621826, "step": 760 }, { "epoch": 0.7853457172342622, "grad_norm": 10.421526610811561, "learning_rate": 9.227677595894231e-06, "loss": 2.6950912475585938, "step": 761 }, { "epoch": 0.7863777089783281, "grad_norm": 14.617189434664802, "learning_rate": 9.224468591369774e-06, "loss": 2.90584135055542, "step": 762 }, { "epoch": 0.7874097007223942, "grad_norm": 8.58675097542137, "learning_rate": 9.221253494334636e-06, "loss": 2.7167434692382812, "step": 763 }, { "epoch": 0.7884416924664602, "grad_norm": 16.15135541389231, "learning_rate": 9.218032309425613e-06, "loss": 2.9132118225097656, "step": 764 }, { "epoch": 0.7894736842105263, "grad_norm": 19.154768503387693, "learning_rate": 9.214805041288285e-06, "loss": 3.2092108726501465, "step": 765 }, { "epoch": 0.7905056759545923, "grad_norm": 7.661840634293321, "learning_rate": 9.211571694577006e-06, "loss": 2.8506178855895996, "step": 766 }, { "epoch": 0.7915376676986584, "grad_norm": 8.591767633922984, "learning_rate": 9.208332273954892e-06, "loss": 2.538100481033325, "step": 767 }, { "epoch": 0.7925696594427245, "grad_norm": 7.594303407760444, "learning_rate": 9.205086784093823e-06, "loss": 3.1037511825561523, "step": 768 }, { "epoch": 0.7936016511867905, "grad_norm": 9.033662868228994, "learning_rate": 9.201835229674432e-06, "loss": 3.0107996463775635, "step": 769 }, { "epoch": 0.7946336429308566, "grad_norm": 9.686825296599345, "learning_rate": 9.198577615386095e-06, "loss": 2.7799291610717773, "step": 770 }, { "epoch": 0.7956656346749226, "grad_norm": 10.644985773301196, "learning_rate": 9.195313945926931e-06, "loss": 3.0491931438446045, "step": 771 }, { "epoch": 0.7966976264189887, "grad_norm": 14.458806185486146, "learning_rate": 9.19204422600379e-06, "loss": 2.0217819213867188, "step": 772 }, { "epoch": 0.7977296181630547, "grad_norm": 16.938469522017233, "learning_rate": 9.188768460332246e-06, "loss": 2.975667953491211, "step": 773 }, { "epoch": 0.7987616099071208, "grad_norm": 13.266256845510885, "learning_rate": 9.185486653636598e-06, "loss": 3.2247445583343506, "step": 774 }, { "epoch": 0.7997936016511867, "grad_norm": 14.677831907052576, "learning_rate": 9.182198810649851e-06, "loss": 2.5141379833221436, "step": 775 }, { "epoch": 0.8008255933952528, "grad_norm": 15.422936627055057, "learning_rate": 9.178904936113719e-06, "loss": 2.432544231414795, "step": 776 }, { "epoch": 0.8018575851393189, "grad_norm": 17.434839804300076, "learning_rate": 9.175605034778615e-06, "loss": 3.105786085128784, "step": 777 }, { "epoch": 0.8028895768833849, "grad_norm": 15.705879400216705, "learning_rate": 9.172299111403643e-06, "loss": 2.8137431144714355, "step": 778 }, { "epoch": 0.803921568627451, "grad_norm": 11.88271321999419, "learning_rate": 9.168987170756586e-06, "loss": 3.2288758754730225, "step": 779 }, { "epoch": 0.804953560371517, "grad_norm": 13.066687170197715, "learning_rate": 9.165669217613919e-06, "loss": 2.6433792114257812, "step": 780 }, { "epoch": 0.8059855521155831, "grad_norm": 9.733580696355473, "learning_rate": 9.162345256760776e-06, "loss": 2.9955153465270996, "step": 781 }, { "epoch": 0.8070175438596491, "grad_norm": 8.965843698513766, "learning_rate": 9.159015292990959e-06, "loss": 2.703097343444824, "step": 782 }, { "epoch": 0.8080495356037152, "grad_norm": 8.866794936332639, "learning_rate": 9.155679331106926e-06, "loss": 2.5528955459594727, "step": 783 }, { "epoch": 0.8090815273477813, "grad_norm": 21.82383745566159, "learning_rate": 9.152337375919792e-06, "loss": 2.804586887359619, "step": 784 }, { "epoch": 0.8101135190918473, "grad_norm": 18.653243489377278, "learning_rate": 9.148989432249305e-06, "loss": 2.9573066234588623, "step": 785 }, { "epoch": 0.8111455108359134, "grad_norm": 24.105591024363626, "learning_rate": 9.145635504923858e-06, "loss": 2.6056911945343018, "step": 786 }, { "epoch": 0.8121775025799793, "grad_norm": 7.220666512171809, "learning_rate": 9.142275598780473e-06, "loss": 2.29858660697937, "step": 787 }, { "epoch": 0.8132094943240454, "grad_norm": 12.514739396755783, "learning_rate": 9.138909718664788e-06, "loss": 2.4496383666992188, "step": 788 }, { "epoch": 0.8142414860681114, "grad_norm": 8.833426493794931, "learning_rate": 9.135537869431064e-06, "loss": 2.2068653106689453, "step": 789 }, { "epoch": 0.8152734778121775, "grad_norm": 9.530428925497914, "learning_rate": 9.132160055942165e-06, "loss": 2.7950196266174316, "step": 790 }, { "epoch": 0.8163054695562435, "grad_norm": 9.98582960206115, "learning_rate": 9.128776283069562e-06, "loss": 2.9262535572052, "step": 791 }, { "epoch": 0.8173374613003096, "grad_norm": 12.900268842724707, "learning_rate": 9.125386555693316e-06, "loss": 2.8006927967071533, "step": 792 }, { "epoch": 0.8183694530443757, "grad_norm": 18.760962708945037, "learning_rate": 9.12199087870208e-06, "loss": 3.037771224975586, "step": 793 }, { "epoch": 0.8194014447884417, "grad_norm": 16.019875579967447, "learning_rate": 9.118589256993082e-06, "loss": 3.1076667308807373, "step": 794 }, { "epoch": 0.8204334365325078, "grad_norm": 10.999401596643924, "learning_rate": 9.115181695472129e-06, "loss": 2.5742170810699463, "step": 795 }, { "epoch": 0.8214654282765738, "grad_norm": 29.56667660008175, "learning_rate": 9.111768199053588e-06, "loss": 3.0342397689819336, "step": 796 }, { "epoch": 0.8224974200206399, "grad_norm": 29.123927498016464, "learning_rate": 9.108348772660393e-06, "loss": 2.8449482917785645, "step": 797 }, { "epoch": 0.8235294117647058, "grad_norm": 9.976520193375213, "learning_rate": 9.104923421224026e-06, "loss": 2.592133045196533, "step": 798 }, { "epoch": 0.8245614035087719, "grad_norm": 14.0583828864681, "learning_rate": 9.10149214968451e-06, "loss": 2.886117458343506, "step": 799 }, { "epoch": 0.8255933952528379, "grad_norm": 7.531701327580056, "learning_rate": 9.098054962990415e-06, "loss": 3.031280279159546, "step": 800 }, { "epoch": 0.826625386996904, "grad_norm": 19.666778618237757, "learning_rate": 9.094611866098835e-06, "loss": 2.9728336334228516, "step": 801 }, { "epoch": 0.8276573787409701, "grad_norm": 22.18837308676401, "learning_rate": 9.09116286397539e-06, "loss": 2.537578582763672, "step": 802 }, { "epoch": 0.8286893704850361, "grad_norm": 14.866057727210325, "learning_rate": 9.087707961594216e-06, "loss": 2.3121652603149414, "step": 803 }, { "epoch": 0.8297213622291022, "grad_norm": 6.831833832253876, "learning_rate": 9.084247163937959e-06, "loss": 2.6835837364196777, "step": 804 }, { "epoch": 0.8307533539731682, "grad_norm": 6.98251442833819, "learning_rate": 9.080780475997767e-06, "loss": 2.783241033554077, "step": 805 }, { "epoch": 0.8317853457172343, "grad_norm": 7.313666585431364, "learning_rate": 9.077307902773283e-06, "loss": 2.0943210124969482, "step": 806 }, { "epoch": 0.8328173374613003, "grad_norm": 10.593290702908797, "learning_rate": 9.073829449272635e-06, "loss": 2.4133317470550537, "step": 807 }, { "epoch": 0.8338493292053664, "grad_norm": 9.71450579879684, "learning_rate": 9.070345120512436e-06, "loss": 2.853483200073242, "step": 808 }, { "epoch": 0.8348813209494325, "grad_norm": 20.568941834325773, "learning_rate": 9.066854921517769e-06, "loss": 3.0913448333740234, "step": 809 }, { "epoch": 0.8359133126934984, "grad_norm": 13.234746631937494, "learning_rate": 9.063358857322186e-06, "loss": 2.83183217048645, "step": 810 }, { "epoch": 0.8369453044375645, "grad_norm": 36.08377425601343, "learning_rate": 9.059856932967693e-06, "loss": 4.695616245269775, "step": 811 }, { "epoch": 0.8379772961816305, "grad_norm": 20.79566564325163, "learning_rate": 9.056349153504753e-06, "loss": 2.8295822143554688, "step": 812 }, { "epoch": 0.8390092879256966, "grad_norm": 13.818243787580341, "learning_rate": 9.052835523992272e-06, "loss": 3.0946226119995117, "step": 813 }, { "epoch": 0.8400412796697626, "grad_norm": 13.897038800922802, "learning_rate": 9.049316049497587e-06, "loss": 2.604569673538208, "step": 814 }, { "epoch": 0.8410732714138287, "grad_norm": 9.520643622186595, "learning_rate": 9.045790735096471e-06, "loss": 1.6071749925613403, "step": 815 }, { "epoch": 0.8421052631578947, "grad_norm": 17.614931222808963, "learning_rate": 9.042259585873119e-06, "loss": 3.564199447631836, "step": 816 }, { "epoch": 0.8431372549019608, "grad_norm": 15.952214274549668, "learning_rate": 9.03872260692014e-06, "loss": 2.104039192199707, "step": 817 }, { "epoch": 0.8441692466460269, "grad_norm": 24.93531895357169, "learning_rate": 9.035179803338548e-06, "loss": 2.5762863159179688, "step": 818 }, { "epoch": 0.8452012383900929, "grad_norm": 11.328967064867303, "learning_rate": 9.031631180237759e-06, "loss": 3.1360249519348145, "step": 819 }, { "epoch": 0.846233230134159, "grad_norm": 8.769885949562484, "learning_rate": 9.028076742735583e-06, "loss": 3.7183291912078857, "step": 820 }, { "epoch": 0.847265221878225, "grad_norm": 14.997316368327947, "learning_rate": 9.024516495958216e-06, "loss": 2.3527259826660156, "step": 821 }, { "epoch": 0.848297213622291, "grad_norm": 20.600644268125738, "learning_rate": 9.020950445040227e-06, "loss": 2.992480754852295, "step": 822 }, { "epoch": 0.849329205366357, "grad_norm": 13.053281780210115, "learning_rate": 9.017378595124564e-06, "loss": 3.188293218612671, "step": 823 }, { "epoch": 0.8503611971104231, "grad_norm": 9.553879881471575, "learning_rate": 9.013800951362532e-06, "loss": 2.5585105419158936, "step": 824 }, { "epoch": 0.8513931888544891, "grad_norm": 9.68787522067668, "learning_rate": 9.010217518913793e-06, "loss": 2.2665152549743652, "step": 825 }, { "epoch": 0.8524251805985552, "grad_norm": 16.14020671978396, "learning_rate": 9.00662830294636e-06, "loss": 2.4879815578460693, "step": 826 }, { "epoch": 0.8534571723426213, "grad_norm": 6.4954863969066645, "learning_rate": 9.00303330863658e-06, "loss": 2.7422685623168945, "step": 827 }, { "epoch": 0.8544891640866873, "grad_norm": 15.10528478082292, "learning_rate": 8.999432541169145e-06, "loss": 2.557654619216919, "step": 828 }, { "epoch": 0.8555211558307534, "grad_norm": 8.986901556021547, "learning_rate": 8.995826005737063e-06, "loss": 2.8331565856933594, "step": 829 }, { "epoch": 0.8565531475748194, "grad_norm": 7.2493056927751764, "learning_rate": 8.992213707541666e-06, "loss": 3.2016687393188477, "step": 830 }, { "epoch": 0.8575851393188855, "grad_norm": 10.894871102064524, "learning_rate": 8.988595651792594e-06, "loss": 2.877350091934204, "step": 831 }, { "epoch": 0.8586171310629515, "grad_norm": 9.323882331158536, "learning_rate": 8.984971843707787e-06, "loss": 2.4523348808288574, "step": 832 }, { "epoch": 0.8596491228070176, "grad_norm": 16.354976278586676, "learning_rate": 8.981342288513496e-06, "loss": 3.012882709503174, "step": 833 }, { "epoch": 0.8606811145510835, "grad_norm": 19.164383219243817, "learning_rate": 8.977706991444242e-06, "loss": 2.7856948375701904, "step": 834 }, { "epoch": 0.8617131062951496, "grad_norm": 10.40447845595958, "learning_rate": 8.974065957742837e-06, "loss": 2.9169929027557373, "step": 835 }, { "epoch": 0.8627450980392157, "grad_norm": 8.940861320158776, "learning_rate": 8.970419192660366e-06, "loss": 3.0807642936706543, "step": 836 }, { "epoch": 0.8637770897832817, "grad_norm": 9.175133924656317, "learning_rate": 8.966766701456177e-06, "loss": 3.258143901824951, "step": 837 }, { "epoch": 0.8648090815273478, "grad_norm": 8.944273683325898, "learning_rate": 8.963108489397875e-06, "loss": 2.918156147003174, "step": 838 }, { "epoch": 0.8658410732714138, "grad_norm": 9.104926562136558, "learning_rate": 8.959444561761324e-06, "loss": 2.6537275314331055, "step": 839 }, { "epoch": 0.8668730650154799, "grad_norm": 13.03635629491526, "learning_rate": 8.955774923830618e-06, "loss": 2.762986183166504, "step": 840 }, { "epoch": 0.8679050567595459, "grad_norm": 11.774222353342637, "learning_rate": 8.952099580898096e-06, "loss": 3.0888192653656006, "step": 841 }, { "epoch": 0.868937048503612, "grad_norm": 8.07404672692578, "learning_rate": 8.948418538264323e-06, "loss": 2.560690402984619, "step": 842 }, { "epoch": 0.8699690402476781, "grad_norm": 13.903023771407138, "learning_rate": 8.944731801238083e-06, "loss": 2.9453420639038086, "step": 843 }, { "epoch": 0.8710010319917441, "grad_norm": 13.19580799047363, "learning_rate": 8.94103937513637e-06, "loss": 2.3148603439331055, "step": 844 }, { "epoch": 0.8720330237358102, "grad_norm": 13.8684670671641, "learning_rate": 8.93734126528439e-06, "loss": 3.250168800354004, "step": 845 }, { "epoch": 0.8730650154798761, "grad_norm": 13.64497681612564, "learning_rate": 8.933637477015537e-06, "loss": 2.5707225799560547, "step": 846 }, { "epoch": 0.8740970072239422, "grad_norm": 7.418807578985766, "learning_rate": 8.929928015671401e-06, "loss": 2.7249460220336914, "step": 847 }, { "epoch": 0.8751289989680082, "grad_norm": 10.36606069634186, "learning_rate": 8.92621288660175e-06, "loss": 2.62217116355896, "step": 848 }, { "epoch": 0.8761609907120743, "grad_norm": 11.539302627011725, "learning_rate": 8.922492095164527e-06, "loss": 2.7988717555999756, "step": 849 }, { "epoch": 0.8771929824561403, "grad_norm": 7.625280626172235, "learning_rate": 8.918765646725845e-06, "loss": 2.7753400802612305, "step": 850 }, { "epoch": 0.8782249742002064, "grad_norm": 7.584634902926666, "learning_rate": 8.915033546659967e-06, "loss": 2.5195202827453613, "step": 851 }, { "epoch": 0.8792569659442725, "grad_norm": 9.294927755289535, "learning_rate": 8.911295800349316e-06, "loss": 2.447134256362915, "step": 852 }, { "epoch": 0.8802889576883385, "grad_norm": 8.842077696389328, "learning_rate": 8.907552413184452e-06, "loss": 2.7952306270599365, "step": 853 }, { "epoch": 0.8813209494324046, "grad_norm": 9.10276076863987, "learning_rate": 8.903803390564072e-06, "loss": 3.282439708709717, "step": 854 }, { "epoch": 0.8823529411764706, "grad_norm": 7.0807723439984, "learning_rate": 8.900048737895003e-06, "loss": 2.9339287281036377, "step": 855 }, { "epoch": 0.8833849329205367, "grad_norm": 16.063943118954818, "learning_rate": 8.896288460592187e-06, "loss": 2.1572723388671875, "step": 856 }, { "epoch": 0.8844169246646026, "grad_norm": 13.46757538425171, "learning_rate": 8.892522564078678e-06, "loss": 3.067239761352539, "step": 857 }, { "epoch": 0.8854489164086687, "grad_norm": 12.986747589632044, "learning_rate": 8.88875105378564e-06, "loss": 2.712153196334839, "step": 858 }, { "epoch": 0.8864809081527347, "grad_norm": 23.618370549207103, "learning_rate": 8.884973935152327e-06, "loss": 3.7660300731658936, "step": 859 }, { "epoch": 0.8875128998968008, "grad_norm": 10.310150562761834, "learning_rate": 8.881191213626084e-06, "loss": 2.520407199859619, "step": 860 }, { "epoch": 0.8885448916408669, "grad_norm": 13.958521350174198, "learning_rate": 8.877402894662334e-06, "loss": 3.23856520652771, "step": 861 }, { "epoch": 0.8895768833849329, "grad_norm": 18.806192980822665, "learning_rate": 8.87360898372458e-06, "loss": 2.57643461227417, "step": 862 }, { "epoch": 0.890608875128999, "grad_norm": 19.548747147689593, "learning_rate": 8.869809486284377e-06, "loss": 2.895467758178711, "step": 863 }, { "epoch": 0.891640866873065, "grad_norm": 6.280566713179624, "learning_rate": 8.86600440782135e-06, "loss": 2.423025608062744, "step": 864 }, { "epoch": 0.8926728586171311, "grad_norm": 19.11416840071934, "learning_rate": 8.862193753823164e-06, "loss": 2.310001850128174, "step": 865 }, { "epoch": 0.8937048503611971, "grad_norm": 9.654187002962859, "learning_rate": 8.858377529785529e-06, "loss": 2.8082315921783447, "step": 866 }, { "epoch": 0.8947368421052632, "grad_norm": 8.32153198104575, "learning_rate": 8.854555741212184e-06, "loss": 3.2019920349121094, "step": 867 }, { "epoch": 0.8957688338493293, "grad_norm": 13.34983541223451, "learning_rate": 8.850728393614903e-06, "loss": 2.907245397567749, "step": 868 }, { "epoch": 0.8968008255933952, "grad_norm": 12.955888022748075, "learning_rate": 8.846895492513465e-06, "loss": 2.7559356689453125, "step": 869 }, { "epoch": 0.8978328173374613, "grad_norm": 15.360527468351282, "learning_rate": 8.843057043435666e-06, "loss": 1.923850178718567, "step": 870 }, { "epoch": 0.8988648090815273, "grad_norm": 10.388487298597354, "learning_rate": 8.8392130519173e-06, "loss": 2.542006492614746, "step": 871 }, { "epoch": 0.8998968008255934, "grad_norm": 51.63958659191007, "learning_rate": 8.835363523502154e-06, "loss": 3.031914234161377, "step": 872 }, { "epoch": 0.9009287925696594, "grad_norm": 7.615013687284186, "learning_rate": 8.831508463742004e-06, "loss": 2.4690163135528564, "step": 873 }, { "epoch": 0.9019607843137255, "grad_norm": 13.23651290719279, "learning_rate": 8.8276478781966e-06, "loss": 2.684204339981079, "step": 874 }, { "epoch": 0.9029927760577915, "grad_norm": 14.717871417125446, "learning_rate": 8.823781772433664e-06, "loss": 2.8591437339782715, "step": 875 }, { "epoch": 0.9040247678018576, "grad_norm": 13.566775670141093, "learning_rate": 8.819910152028872e-06, "loss": 4.115355491638184, "step": 876 }, { "epoch": 0.9050567595459237, "grad_norm": 9.568980566918885, "learning_rate": 8.816033022565864e-06, "loss": 2.0554182529449463, "step": 877 }, { "epoch": 0.9060887512899897, "grad_norm": 16.0602903812488, "learning_rate": 8.812150389636216e-06, "loss": 3.273054361343384, "step": 878 }, { "epoch": 0.9071207430340558, "grad_norm": 8.207226227741794, "learning_rate": 8.808262258839448e-06, "loss": 3.1750121116638184, "step": 879 }, { "epoch": 0.9081527347781218, "grad_norm": 11.382393925896498, "learning_rate": 8.804368635783002e-06, "loss": 2.556485414505005, "step": 880 }, { "epoch": 0.9091847265221878, "grad_norm": 14.683025935907603, "learning_rate": 8.80046952608225e-06, "loss": 2.406846523284912, "step": 881 }, { "epoch": 0.9102167182662538, "grad_norm": 7.074443534407077, "learning_rate": 8.796564935360465e-06, "loss": 3.083665609359741, "step": 882 }, { "epoch": 0.9112487100103199, "grad_norm": 13.03835762728328, "learning_rate": 8.792654869248835e-06, "loss": 2.1653966903686523, "step": 883 }, { "epoch": 0.9122807017543859, "grad_norm": 16.27216423441271, "learning_rate": 8.788739333386443e-06, "loss": 2.123908281326294, "step": 884 }, { "epoch": 0.913312693498452, "grad_norm": 10.41574771229284, "learning_rate": 8.78481833342025e-06, "loss": 2.6915431022644043, "step": 885 }, { "epoch": 0.9143446852425181, "grad_norm": 22.13468429161606, "learning_rate": 8.780891875005116e-06, "loss": 3.5780766010284424, "step": 886 }, { "epoch": 0.9153766769865841, "grad_norm": 12.878815073539679, "learning_rate": 8.776959963803754e-06, "loss": 2.8188726902008057, "step": 887 }, { "epoch": 0.9164086687306502, "grad_norm": 18.065810992789128, "learning_rate": 8.773022605486755e-06, "loss": 2.6013312339782715, "step": 888 }, { "epoch": 0.9174406604747162, "grad_norm": 13.15001228695871, "learning_rate": 8.769079805732557e-06, "loss": 3.426697254180908, "step": 889 }, { "epoch": 0.9184726522187823, "grad_norm": 9.92878291032441, "learning_rate": 8.765131570227452e-06, "loss": 2.7974376678466797, "step": 890 }, { "epoch": 0.9195046439628483, "grad_norm": 10.3135415025846, "learning_rate": 8.761177904665566e-06, "loss": 2.6423161029815674, "step": 891 }, { "epoch": 0.9205366357069144, "grad_norm": 17.41217869324895, "learning_rate": 8.75721881474886e-06, "loss": 2.153823137283325, "step": 892 }, { "epoch": 0.9215686274509803, "grad_norm": 8.579722943441634, "learning_rate": 8.753254306187119e-06, "loss": 2.7427873611450195, "step": 893 }, { "epoch": 0.9226006191950464, "grad_norm": 7.699573500485071, "learning_rate": 8.749284384697935e-06, "loss": 2.511962890625, "step": 894 }, { "epoch": 0.9236326109391125, "grad_norm": 11.268034755079555, "learning_rate": 8.745309056006717e-06, "loss": 2.595938205718994, "step": 895 }, { "epoch": 0.9246646026831785, "grad_norm": 10.003038343239702, "learning_rate": 8.741328325846663e-06, "loss": 2.515320062637329, "step": 896 }, { "epoch": 0.9256965944272446, "grad_norm": 11.981229081636926, "learning_rate": 8.73734219995877e-06, "loss": 2.6038193702697754, "step": 897 }, { "epoch": 0.9267285861713106, "grad_norm": 8.241540490814092, "learning_rate": 8.733350684091806e-06, "loss": 2.2989137172698975, "step": 898 }, { "epoch": 0.9277605779153767, "grad_norm": 23.262335469826844, "learning_rate": 8.729353784002323e-06, "loss": 3.0240113735198975, "step": 899 }, { "epoch": 0.9287925696594427, "grad_norm": 8.095825962933413, "learning_rate": 8.725351505454631e-06, "loss": 2.9244229793548584, "step": 900 }, { "epoch": 0.9298245614035088, "grad_norm": 11.340469002970913, "learning_rate": 8.721343854220801e-06, "loss": 2.842160224914551, "step": 901 }, { "epoch": 0.9308565531475749, "grad_norm": 11.539814264733689, "learning_rate": 8.71733083608065e-06, "loss": 2.8664965629577637, "step": 902 }, { "epoch": 0.9318885448916409, "grad_norm": 18.287543283260945, "learning_rate": 8.713312456821734e-06, "loss": 3.327446460723877, "step": 903 }, { "epoch": 0.932920536635707, "grad_norm": 12.875215844396848, "learning_rate": 8.709288722239345e-06, "loss": 3.116183042526245, "step": 904 }, { "epoch": 0.9339525283797729, "grad_norm": 8.275011313688877, "learning_rate": 8.705259638136492e-06, "loss": 2.486152410507202, "step": 905 }, { "epoch": 0.934984520123839, "grad_norm": 16.24739963302216, "learning_rate": 8.701225210323908e-06, "loss": 2.340258836746216, "step": 906 }, { "epoch": 0.936016511867905, "grad_norm": 15.184465335082342, "learning_rate": 8.697185444620024e-06, "loss": 2.975020408630371, "step": 907 }, { "epoch": 0.9370485036119711, "grad_norm": 15.631464693784698, "learning_rate": 8.693140346850975e-06, "loss": 2.2340219020843506, "step": 908 }, { "epoch": 0.9380804953560371, "grad_norm": 13.90806035469735, "learning_rate": 8.689089922850585e-06, "loss": 3.165771484375, "step": 909 }, { "epoch": 0.9391124871001032, "grad_norm": 14.76401235221072, "learning_rate": 8.685034178460354e-06, "loss": 3.132847309112549, "step": 910 }, { "epoch": 0.9401444788441693, "grad_norm": 10.903854932092292, "learning_rate": 8.680973119529462e-06, "loss": 3.366068124771118, "step": 911 }, { "epoch": 0.9411764705882353, "grad_norm": 13.776301491737472, "learning_rate": 8.67690675191475e-06, "loss": 2.9348413944244385, "step": 912 }, { "epoch": 0.9422084623323014, "grad_norm": 15.692440535098076, "learning_rate": 8.672835081480719e-06, "loss": 2.4257867336273193, "step": 913 }, { "epoch": 0.9432404540763674, "grad_norm": 14.417130218937615, "learning_rate": 8.668758114099513e-06, "loss": 3.141251564025879, "step": 914 }, { "epoch": 0.9442724458204335, "grad_norm": 8.435093461118733, "learning_rate": 8.664675855650916e-06, "loss": 2.9332737922668457, "step": 915 }, { "epoch": 0.9453044375644994, "grad_norm": 10.648097845063, "learning_rate": 8.660588312022345e-06, "loss": 2.5100889205932617, "step": 916 }, { "epoch": 0.9463364293085655, "grad_norm": 10.312899822318727, "learning_rate": 8.656495489108835e-06, "loss": 2.524362802505493, "step": 917 }, { "epoch": 0.9473684210526315, "grad_norm": 11.368498557398345, "learning_rate": 8.652397392813043e-06, "loss": 2.685992956161499, "step": 918 }, { "epoch": 0.9484004127966976, "grad_norm": 16.85664501625052, "learning_rate": 8.648294029045224e-06, "loss": 2.5421335697174072, "step": 919 }, { "epoch": 0.9494324045407637, "grad_norm": 21.739808082272482, "learning_rate": 8.644185403723231e-06, "loss": 3.15808367729187, "step": 920 }, { "epoch": 0.9504643962848297, "grad_norm": 9.331946667414577, "learning_rate": 8.640071522772507e-06, "loss": 3.1691818237304688, "step": 921 }, { "epoch": 0.9514963880288958, "grad_norm": 8.736477908392848, "learning_rate": 8.635952392126072e-06, "loss": 2.8568482398986816, "step": 922 }, { "epoch": 0.9525283797729618, "grad_norm": 13.175497418723833, "learning_rate": 8.631828017724521e-06, "loss": 3.022144317626953, "step": 923 }, { "epoch": 0.9535603715170279, "grad_norm": 4.929151400144564, "learning_rate": 8.627698405516007e-06, "loss": 2.628474473953247, "step": 924 }, { "epoch": 0.9545923632610939, "grad_norm": 7.92168979450254, "learning_rate": 8.62356356145624e-06, "loss": 2.601992130279541, "step": 925 }, { "epoch": 0.95562435500516, "grad_norm": 8.612265586231148, "learning_rate": 8.619423491508478e-06, "loss": 2.1208882331848145, "step": 926 }, { "epoch": 0.9566563467492261, "grad_norm": 10.53109930567618, "learning_rate": 8.615278201643507e-06, "loss": 2.7668910026550293, "step": 927 }, { "epoch": 0.957688338493292, "grad_norm": 10.42126479592771, "learning_rate": 8.611127697839649e-06, "loss": 2.4703214168548584, "step": 928 }, { "epoch": 0.9587203302373581, "grad_norm": 20.957195531331426, "learning_rate": 8.606971986082741e-06, "loss": 2.511150360107422, "step": 929 }, { "epoch": 0.9597523219814241, "grad_norm": 11.823633225194214, "learning_rate": 8.602811072366138e-06, "loss": 3.3425581455230713, "step": 930 }, { "epoch": 0.9607843137254902, "grad_norm": 10.336285112955688, "learning_rate": 8.598644962690685e-06, "loss": 3.0630111694335938, "step": 931 }, { "epoch": 0.9618163054695562, "grad_norm": 10.713794321165828, "learning_rate": 8.594473663064735e-06, "loss": 2.752450704574585, "step": 932 }, { "epoch": 0.9628482972136223, "grad_norm": 19.139918427211896, "learning_rate": 8.590297179504113e-06, "loss": 2.7772939205169678, "step": 933 }, { "epoch": 0.9638802889576883, "grad_norm": 17.944538225658494, "learning_rate": 8.586115518032128e-06, "loss": 2.5406405925750732, "step": 934 }, { "epoch": 0.9649122807017544, "grad_norm": 11.022292060071564, "learning_rate": 8.581928684679555e-06, "loss": 2.357140064239502, "step": 935 }, { "epoch": 0.9659442724458205, "grad_norm": 14.86874145409155, "learning_rate": 8.577736685484626e-06, "loss": 2.384441375732422, "step": 936 }, { "epoch": 0.9669762641898865, "grad_norm": 10.324453002839654, "learning_rate": 8.573539526493024e-06, "loss": 2.4222421646118164, "step": 937 }, { "epoch": 0.9680082559339526, "grad_norm": 9.723529992760021, "learning_rate": 8.569337213757877e-06, "loss": 2.5892581939697266, "step": 938 }, { "epoch": 0.9690402476780186, "grad_norm": 7.600976083096801, "learning_rate": 8.56512975333974e-06, "loss": 2.39567232131958, "step": 939 }, { "epoch": 0.9700722394220846, "grad_norm": 12.092137993169, "learning_rate": 8.560917151306594e-06, "loss": 2.8416504859924316, "step": 940 }, { "epoch": 0.9711042311661506, "grad_norm": 12.333962742823834, "learning_rate": 8.556699413733837e-06, "loss": 2.095158576965332, "step": 941 }, { "epoch": 0.9721362229102167, "grad_norm": 8.925591336397723, "learning_rate": 8.552476546704274e-06, "loss": 2.5353894233703613, "step": 942 }, { "epoch": 0.9731682146542827, "grad_norm": 8.049927028903152, "learning_rate": 8.548248556308102e-06, "loss": 2.8934223651885986, "step": 943 }, { "epoch": 0.9742002063983488, "grad_norm": 11.08149431174175, "learning_rate": 8.544015448642916e-06, "loss": 2.9796085357666016, "step": 944 }, { "epoch": 0.9752321981424149, "grad_norm": 15.938377902327213, "learning_rate": 8.539777229813679e-06, "loss": 2.9126100540161133, "step": 945 }, { "epoch": 0.9762641898864809, "grad_norm": 24.04700421888887, "learning_rate": 8.535533905932739e-06, "loss": 2.5258841514587402, "step": 946 }, { "epoch": 0.977296181630547, "grad_norm": 14.212766990310266, "learning_rate": 8.531285483119795e-06, "loss": 3.0223984718322754, "step": 947 }, { "epoch": 0.978328173374613, "grad_norm": 37.61050293165456, "learning_rate": 8.527031967501906e-06, "loss": 2.8660964965820312, "step": 948 }, { "epoch": 0.9793601651186791, "grad_norm": 14.403917310713943, "learning_rate": 8.522773365213475e-06, "loss": 3.0697875022888184, "step": 949 }, { "epoch": 0.9803921568627451, "grad_norm": 7.098727469496524, "learning_rate": 8.518509682396239e-06, "loss": 2.850987434387207, "step": 950 }, { "epoch": 0.9814241486068112, "grad_norm": 22.572887139198077, "learning_rate": 8.514240925199264e-06, "loss": 2.3839073181152344, "step": 951 }, { "epoch": 0.9824561403508771, "grad_norm": 8.788082983526527, "learning_rate": 8.509967099778934e-06, "loss": 3.274752140045166, "step": 952 }, { "epoch": 0.9834881320949432, "grad_norm": 20.559290641214933, "learning_rate": 8.505688212298941e-06, "loss": 2.112039804458618, "step": 953 }, { "epoch": 0.9845201238390093, "grad_norm": 22.912915721133384, "learning_rate": 8.501404268930278e-06, "loss": 2.4956905841827393, "step": 954 }, { "epoch": 0.9855521155830753, "grad_norm": 13.126124723791293, "learning_rate": 8.497115275851229e-06, "loss": 3.32060170173645, "step": 955 }, { "epoch": 0.9865841073271414, "grad_norm": 11.190620519311915, "learning_rate": 8.492821239247365e-06, "loss": 1.7453281879425049, "step": 956 }, { "epoch": 0.9876160990712074, "grad_norm": 13.0991058336918, "learning_rate": 8.488522165311524e-06, "loss": 3.2108659744262695, "step": 957 }, { "epoch": 0.9886480908152735, "grad_norm": 6.930979842489424, "learning_rate": 8.484218060243816e-06, "loss": 2.2504026889801025, "step": 958 }, { "epoch": 0.9896800825593395, "grad_norm": 8.244851270837673, "learning_rate": 8.479908930251598e-06, "loss": 2.60968279838562, "step": 959 }, { "epoch": 0.9907120743034056, "grad_norm": 10.240611873801234, "learning_rate": 8.475594781549483e-06, "loss": 2.1658976078033447, "step": 960 }, { "epoch": 0.9917440660474717, "grad_norm": 28.864480536810515, "learning_rate": 8.471275620359317e-06, "loss": 3.141274929046631, "step": 961 }, { "epoch": 0.9927760577915377, "grad_norm": 12.113857550322448, "learning_rate": 8.466951452910175e-06, "loss": 3.094350814819336, "step": 962 }, { "epoch": 0.9938080495356038, "grad_norm": 12.264594375162037, "learning_rate": 8.462622285438353e-06, "loss": 2.9558591842651367, "step": 963 }, { "epoch": 0.9948400412796697, "grad_norm": 18.61406866971962, "learning_rate": 8.45828812418736e-06, "loss": 2.1471381187438965, "step": 964 }, { "epoch": 0.9958720330237358, "grad_norm": 10.57186468452118, "learning_rate": 8.453948975407902e-06, "loss": 2.4173901081085205, "step": 965 }, { "epoch": 0.9969040247678018, "grad_norm": 22.749642951004663, "learning_rate": 8.449604845357884e-06, "loss": 3.100821018218994, "step": 966 }, { "epoch": 0.9979360165118679, "grad_norm": 8.295297615285332, "learning_rate": 8.44525574030239e-06, "loss": 2.519862174987793, "step": 967 }, { "epoch": 0.9989680082559339, "grad_norm": 11.978450605385294, "learning_rate": 8.44090166651368e-06, "loss": 2.934941530227661, "step": 968 }, { "epoch": 1.0, "grad_norm": 18.050324887327026, "learning_rate": 8.436542630271185e-06, "loss": 2.6186721324920654, "step": 969 }, { "epoch": 1.001031991744066, "grad_norm": 16.05476252261218, "learning_rate": 8.432178637861483e-06, "loss": 2.1563639640808105, "step": 970 }, { "epoch": 1.0020639834881322, "grad_norm": 9.594858198037516, "learning_rate": 8.427809695578308e-06, "loss": 1.5539755821228027, "step": 971 }, { "epoch": 1.0030959752321982, "grad_norm": 18.807824999148238, "learning_rate": 8.42343580972253e-06, "loss": 1.7002182006835938, "step": 972 }, { "epoch": 1.0041279669762642, "grad_norm": 8.027778363641874, "learning_rate": 8.41905698660215e-06, "loss": 1.7681635618209839, "step": 973 }, { "epoch": 1.0051599587203301, "grad_norm": 8.623238509439696, "learning_rate": 8.414673232532286e-06, "loss": 1.373335838317871, "step": 974 }, { "epoch": 1.0061919504643964, "grad_norm": 8.021841163165114, "learning_rate": 8.41028455383517e-06, "loss": 1.8987131118774414, "step": 975 }, { "epoch": 1.0072239422084623, "grad_norm": 5.856433020017942, "learning_rate": 8.405890956840136e-06, "loss": 1.373334288597107, "step": 976 }, { "epoch": 1.0082559339525283, "grad_norm": 12.34788416810373, "learning_rate": 8.401492447883611e-06, "loss": 1.4636648893356323, "step": 977 }, { "epoch": 1.0092879256965945, "grad_norm": 11.49620424832148, "learning_rate": 8.397089033309106e-06, "loss": 1.7376048564910889, "step": 978 }, { "epoch": 1.0103199174406605, "grad_norm": 12.636249423602015, "learning_rate": 8.392680719467207e-06, "loss": 2.006678819656372, "step": 979 }, { "epoch": 1.0113519091847265, "grad_norm": 15.598006639972548, "learning_rate": 8.388267512715565e-06, "loss": 1.7946763038635254, "step": 980 }, { "epoch": 1.0123839009287925, "grad_norm": 11.693682446027942, "learning_rate": 8.383849419418889e-06, "loss": 1.8450850248336792, "step": 981 }, { "epoch": 1.0134158926728587, "grad_norm": 12.088922204874327, "learning_rate": 8.379426445948933e-06, "loss": 1.9440231323242188, "step": 982 }, { "epoch": 1.0144478844169247, "grad_norm": 12.647814682769102, "learning_rate": 8.374998598684491e-06, "loss": 2.2285561561584473, "step": 983 }, { "epoch": 1.0154798761609907, "grad_norm": 16.737139048202998, "learning_rate": 8.370565884011389e-06, "loss": 1.5640538930892944, "step": 984 }, { "epoch": 1.0165118679050567, "grad_norm": 13.230104870514074, "learning_rate": 8.366128308322464e-06, "loss": 1.3722457885742188, "step": 985 }, { "epoch": 1.0175438596491229, "grad_norm": 10.841490744693859, "learning_rate": 8.361685878017569e-06, "loss": 1.5551173686981201, "step": 986 }, { "epoch": 1.0185758513931888, "grad_norm": 11.799978525328118, "learning_rate": 8.357238599503561e-06, "loss": 1.8554892539978027, "step": 987 }, { "epoch": 1.0196078431372548, "grad_norm": 10.905843303872457, "learning_rate": 8.352786479194288e-06, "loss": 0.8869086503982544, "step": 988 }, { "epoch": 1.020639834881321, "grad_norm": 9.157020132242838, "learning_rate": 8.348329523510575e-06, "loss": 1.5887739658355713, "step": 989 }, { "epoch": 1.021671826625387, "grad_norm": 12.324235401608584, "learning_rate": 8.343867738880224e-06, "loss": 1.9491077661514282, "step": 990 }, { "epoch": 1.022703818369453, "grad_norm": 10.040335718777902, "learning_rate": 8.339401131738007e-06, "loss": 1.5371899604797363, "step": 991 }, { "epoch": 1.023735810113519, "grad_norm": 12.243061824958042, "learning_rate": 8.33492970852564e-06, "loss": 1.762153148651123, "step": 992 }, { "epoch": 1.0247678018575852, "grad_norm": 8.745195314678448, "learning_rate": 8.330453475691795e-06, "loss": 1.4221936464309692, "step": 993 }, { "epoch": 1.0257997936016512, "grad_norm": 10.920261519671197, "learning_rate": 8.325972439692075e-06, "loss": 2.0102250576019287, "step": 994 }, { "epoch": 1.0268317853457172, "grad_norm": 10.184593431575598, "learning_rate": 8.321486606989009e-06, "loss": 1.4635906219482422, "step": 995 }, { "epoch": 1.0278637770897834, "grad_norm": 7.472082964121502, "learning_rate": 8.316995984052048e-06, "loss": 1.3242697715759277, "step": 996 }, { "epoch": 1.0288957688338494, "grad_norm": 10.021648848752625, "learning_rate": 8.312500577357547e-06, "loss": 1.3507283926010132, "step": 997 }, { "epoch": 1.0299277605779154, "grad_norm": 12.209845924003377, "learning_rate": 8.308000393388766e-06, "loss": 1.1653996706008911, "step": 998 }, { "epoch": 1.0309597523219813, "grad_norm": 11.99835459000819, "learning_rate": 8.30349543863585e-06, "loss": 1.1610116958618164, "step": 999 }, { "epoch": 1.0319917440660475, "grad_norm": 17.474369680421233, "learning_rate": 8.298985719595824e-06, "loss": 1.5260579586029053, "step": 1000 }, { "epoch": 1.0330237358101135, "grad_norm": 9.295614465776053, "learning_rate": 8.294471242772588e-06, "loss": 1.3743215799331665, "step": 1001 }, { "epoch": 1.0340557275541795, "grad_norm": 12.80841059312557, "learning_rate": 8.289952014676896e-06, "loss": 1.5841118097305298, "step": 1002 }, { "epoch": 1.0350877192982457, "grad_norm": 9.224528441262, "learning_rate": 8.285428041826367e-06, "loss": 1.4197484254837036, "step": 1003 }, { "epoch": 1.0361197110423117, "grad_norm": 16.858592753583462, "learning_rate": 8.280899330745452e-06, "loss": 2.183623790740967, "step": 1004 }, { "epoch": 1.0371517027863777, "grad_norm": 21.383223442525296, "learning_rate": 8.276365887965439e-06, "loss": 1.8438667058944702, "step": 1005 }, { "epoch": 1.0381836945304437, "grad_norm": 10.825302703272476, "learning_rate": 8.27182772002444e-06, "loss": 1.3235054016113281, "step": 1006 }, { "epoch": 1.0392156862745099, "grad_norm": 20.316069030942174, "learning_rate": 8.26728483346738e-06, "loss": 2.006171703338623, "step": 1007 }, { "epoch": 1.0402476780185759, "grad_norm": 9.948994304742493, "learning_rate": 8.262737234845993e-06, "loss": 1.711141586303711, "step": 1008 }, { "epoch": 1.0412796697626419, "grad_norm": 10.158121460132765, "learning_rate": 8.258184930718806e-06, "loss": 1.515275001525879, "step": 1009 }, { "epoch": 1.0423116615067078, "grad_norm": 10.857642178327987, "learning_rate": 8.253627927651131e-06, "loss": 2.4347457885742188, "step": 1010 }, { "epoch": 1.043343653250774, "grad_norm": 12.3480224278066, "learning_rate": 8.24906623221506e-06, "loss": 1.3014485836029053, "step": 1011 }, { "epoch": 1.04437564499484, "grad_norm": 13.258087199229148, "learning_rate": 8.244499850989453e-06, "loss": 1.2108027935028076, "step": 1012 }, { "epoch": 1.045407636738906, "grad_norm": 10.772891424959182, "learning_rate": 8.239928790559921e-06, "loss": 1.0891926288604736, "step": 1013 }, { "epoch": 1.0464396284829722, "grad_norm": 12.063550652084645, "learning_rate": 8.235353057518832e-06, "loss": 0.9712181687355042, "step": 1014 }, { "epoch": 1.0474716202270382, "grad_norm": 9.839529947110902, "learning_rate": 8.230772658465284e-06, "loss": 1.5175083875656128, "step": 1015 }, { "epoch": 1.0485036119711042, "grad_norm": 11.211375364424866, "learning_rate": 8.226187600005116e-06, "loss": 1.9159111976623535, "step": 1016 }, { "epoch": 1.0495356037151702, "grad_norm": 10.97182298304798, "learning_rate": 8.221597888750873e-06, "loss": 1.3137034177780151, "step": 1017 }, { "epoch": 1.0505675954592364, "grad_norm": 8.588510298188089, "learning_rate": 8.21700353132182e-06, "loss": 1.6617357730865479, "step": 1018 }, { "epoch": 1.0515995872033024, "grad_norm": 13.201744584709031, "learning_rate": 8.212404534343923e-06, "loss": 1.9006850719451904, "step": 1019 }, { "epoch": 1.0526315789473684, "grad_norm": 9.323623275175217, "learning_rate": 8.207800904449829e-06, "loss": 1.5100172758102417, "step": 1020 }, { "epoch": 1.0536635706914346, "grad_norm": 11.913995314722836, "learning_rate": 8.20319264827888e-06, "loss": 2.788395881652832, "step": 1021 }, { "epoch": 1.0546955624355006, "grad_norm": 19.097847988506008, "learning_rate": 8.19857977247708e-06, "loss": 2.4118590354919434, "step": 1022 }, { "epoch": 1.0557275541795665, "grad_norm": 8.49565411469797, "learning_rate": 8.1939622836971e-06, "loss": 2.0495169162750244, "step": 1023 }, { "epoch": 1.0567595459236325, "grad_norm": 13.327628308023453, "learning_rate": 8.189340188598263e-06, "loss": 1.24650239944458, "step": 1024 }, { "epoch": 1.0577915376676987, "grad_norm": 13.415163634469513, "learning_rate": 8.184713493846533e-06, "loss": 1.9664095640182495, "step": 1025 }, { "epoch": 1.0588235294117647, "grad_norm": 9.849539930086326, "learning_rate": 8.180082206114511e-06, "loss": 1.2281479835510254, "step": 1026 }, { "epoch": 1.0598555211558307, "grad_norm": 16.920920109593283, "learning_rate": 8.17544633208142e-06, "loss": 1.5589534044265747, "step": 1027 }, { "epoch": 1.060887512899897, "grad_norm": 13.973934268345339, "learning_rate": 8.1708058784331e-06, "loss": 1.5565776824951172, "step": 1028 }, { "epoch": 1.061919504643963, "grad_norm": 9.237207321033726, "learning_rate": 8.166160851861991e-06, "loss": 1.1853022575378418, "step": 1029 }, { "epoch": 1.0629514963880289, "grad_norm": 17.927718371572553, "learning_rate": 8.161511259067132e-06, "loss": 1.247610092163086, "step": 1030 }, { "epoch": 1.0639834881320949, "grad_norm": 9.882845156111637, "learning_rate": 8.156857106754146e-06, "loss": 1.7039568424224854, "step": 1031 }, { "epoch": 1.065015479876161, "grad_norm": 20.898414415727924, "learning_rate": 8.15219840163523e-06, "loss": 1.7086340188980103, "step": 1032 }, { "epoch": 1.066047471620227, "grad_norm": 8.091855854808516, "learning_rate": 8.147535150429152e-06, "loss": 1.825638771057129, "step": 1033 }, { "epoch": 1.067079463364293, "grad_norm": 7.8479612013519455, "learning_rate": 8.142867359861229e-06, "loss": 1.5998353958129883, "step": 1034 }, { "epoch": 1.068111455108359, "grad_norm": 13.109224886594863, "learning_rate": 8.13819503666333e-06, "loss": 1.3819228410720825, "step": 1035 }, { "epoch": 1.0691434468524252, "grad_norm": 20.26012950910058, "learning_rate": 8.133518187573864e-06, "loss": 1.7897826433181763, "step": 1036 }, { "epoch": 1.0701754385964912, "grad_norm": 10.3800525344813, "learning_rate": 8.128836819337756e-06, "loss": 3.021141529083252, "step": 1037 }, { "epoch": 1.0712074303405572, "grad_norm": 8.243334160834076, "learning_rate": 8.124150938706462e-06, "loss": 2.0512983798980713, "step": 1038 }, { "epoch": 1.0722394220846234, "grad_norm": 8.471515219842848, "learning_rate": 8.119460552437934e-06, "loss": 1.7683520317077637, "step": 1039 }, { "epoch": 1.0732714138286894, "grad_norm": 12.009553013711418, "learning_rate": 8.114765667296628e-06, "loss": 1.4466801881790161, "step": 1040 }, { "epoch": 1.0743034055727554, "grad_norm": 13.497829932993618, "learning_rate": 8.110066290053493e-06, "loss": 1.7317613363265991, "step": 1041 }, { "epoch": 1.0753353973168214, "grad_norm": 45.88528225853127, "learning_rate": 8.105362427485942e-06, "loss": 1.4356427192687988, "step": 1042 }, { "epoch": 1.0763673890608876, "grad_norm": 7.778793676406035, "learning_rate": 8.100654086377875e-06, "loss": 1.7903071641921997, "step": 1043 }, { "epoch": 1.0773993808049536, "grad_norm": 14.255467890004203, "learning_rate": 8.095941273519634e-06, "loss": 1.3363776206970215, "step": 1044 }, { "epoch": 1.0784313725490196, "grad_norm": 9.036230547290165, "learning_rate": 8.09122399570802e-06, "loss": 1.202798843383789, "step": 1045 }, { "epoch": 1.0794633642930858, "grad_norm": 18.688989336597917, "learning_rate": 8.086502259746272e-06, "loss": 3.8349111080169678, "step": 1046 }, { "epoch": 1.0804953560371517, "grad_norm": 7.517142738194956, "learning_rate": 8.081776072444059e-06, "loss": 1.5295863151550293, "step": 1047 }, { "epoch": 1.0815273477812177, "grad_norm": 10.129619974724548, "learning_rate": 8.077045440617465e-06, "loss": 1.1850452423095703, "step": 1048 }, { "epoch": 1.0825593395252837, "grad_norm": 10.207207301825113, "learning_rate": 8.07231037108899e-06, "loss": 1.3872036933898926, "step": 1049 }, { "epoch": 1.08359133126935, "grad_norm": 16.408079543721303, "learning_rate": 8.067570870687527e-06, "loss": 1.0992767810821533, "step": 1050 }, { "epoch": 1.084623323013416, "grad_norm": 9.538420851034369, "learning_rate": 8.062826946248367e-06, "loss": 1.5989173650741577, "step": 1051 }, { "epoch": 1.085655314757482, "grad_norm": 14.905496947608839, "learning_rate": 8.058078604613178e-06, "loss": 1.1511602401733398, "step": 1052 }, { "epoch": 1.086687306501548, "grad_norm": 9.629087356335749, "learning_rate": 8.053325852629997e-06, "loss": 1.6529784202575684, "step": 1053 }, { "epoch": 1.087719298245614, "grad_norm": 10.085267441911933, "learning_rate": 8.048568697153222e-06, "loss": 1.2220031023025513, "step": 1054 }, { "epoch": 1.08875128998968, "grad_norm": 8.753348062316723, "learning_rate": 8.043807145043604e-06, "loss": 1.8311115503311157, "step": 1055 }, { "epoch": 1.089783281733746, "grad_norm": 12.3408445582175, "learning_rate": 8.039041203168233e-06, "loss": 1.4134912490844727, "step": 1056 }, { "epoch": 1.0908152734778123, "grad_norm": 11.783862781283819, "learning_rate": 8.034270878400529e-06, "loss": 2.0294742584228516, "step": 1057 }, { "epoch": 1.0918472652218782, "grad_norm": 15.95782287855318, "learning_rate": 8.029496177620235e-06, "loss": 2.1761271953582764, "step": 1058 }, { "epoch": 1.0928792569659442, "grad_norm": 19.40651300793911, "learning_rate": 8.024717107713402e-06, "loss": 2.606509208679199, "step": 1059 }, { "epoch": 1.0939112487100102, "grad_norm": 12.146765149401814, "learning_rate": 8.019933675572389e-06, "loss": 2.140730857849121, "step": 1060 }, { "epoch": 1.0949432404540764, "grad_norm": 15.050517998410367, "learning_rate": 8.015145888095838e-06, "loss": 1.610368013381958, "step": 1061 }, { "epoch": 1.0959752321981424, "grad_norm": 9.277932993782787, "learning_rate": 8.010353752188676e-06, "loss": 1.4251974821090698, "step": 1062 }, { "epoch": 1.0970072239422084, "grad_norm": 7.323597909570674, "learning_rate": 8.005557274762103e-06, "loss": 1.3800044059753418, "step": 1063 }, { "epoch": 1.0980392156862746, "grad_norm": 25.233200899043258, "learning_rate": 8.000756462733577e-06, "loss": 0.9251134395599365, "step": 1064 }, { "epoch": 1.0990712074303406, "grad_norm": 9.587282864681407, "learning_rate": 7.995951323026808e-06, "loss": 1.3711384534835815, "step": 1065 }, { "epoch": 1.1001031991744066, "grad_norm": 10.911019786869124, "learning_rate": 7.991141862571749e-06, "loss": 1.3407011032104492, "step": 1066 }, { "epoch": 1.1011351909184726, "grad_norm": 9.203525627848961, "learning_rate": 7.986328088304584e-06, "loss": 1.2513601779937744, "step": 1067 }, { "epoch": 1.1021671826625388, "grad_norm": 10.0827879606565, "learning_rate": 7.981510007167719e-06, "loss": 1.395066261291504, "step": 1068 }, { "epoch": 1.1031991744066048, "grad_norm": 24.008463216744232, "learning_rate": 7.976687626109765e-06, "loss": 2.0673134326934814, "step": 1069 }, { "epoch": 1.1042311661506707, "grad_norm": 22.233138519673105, "learning_rate": 7.971860952085546e-06, "loss": 1.3563206195831299, "step": 1070 }, { "epoch": 1.1052631578947367, "grad_norm": 10.308977883395446, "learning_rate": 7.967029992056066e-06, "loss": 1.195631742477417, "step": 1071 }, { "epoch": 1.106295149638803, "grad_norm": 7.768208205965082, "learning_rate": 7.962194752988519e-06, "loss": 1.311927318572998, "step": 1072 }, { "epoch": 1.107327141382869, "grad_norm": 11.206042051044388, "learning_rate": 7.957355241856261e-06, "loss": 1.8952860832214355, "step": 1073 }, { "epoch": 1.108359133126935, "grad_norm": 14.522936932286084, "learning_rate": 7.95251146563882e-06, "loss": 2.9905707836151123, "step": 1074 }, { "epoch": 1.109391124871001, "grad_norm": 8.330515845051611, "learning_rate": 7.947663431321866e-06, "loss": 0.9683359265327454, "step": 1075 }, { "epoch": 1.110423116615067, "grad_norm": 14.489243484006737, "learning_rate": 7.942811145897215e-06, "loss": 0.9986129999160767, "step": 1076 }, { "epoch": 1.111455108359133, "grad_norm": 12.075146700492658, "learning_rate": 7.937954616362813e-06, "loss": 1.6724216938018799, "step": 1077 }, { "epoch": 1.1124871001031993, "grad_norm": 20.295900640522916, "learning_rate": 7.933093849722724e-06, "loss": 1.9367952346801758, "step": 1078 }, { "epoch": 1.1135190918472653, "grad_norm": 20.886634414966576, "learning_rate": 7.928228852987126e-06, "loss": 1.3379758596420288, "step": 1079 }, { "epoch": 1.1145510835913313, "grad_norm": 15.391029180263173, "learning_rate": 7.923359633172299e-06, "loss": 1.158537745475769, "step": 1080 }, { "epoch": 1.1155830753353972, "grad_norm": 18.61127263155287, "learning_rate": 7.918486197300608e-06, "loss": 2.004901885986328, "step": 1081 }, { "epoch": 1.1166150670794635, "grad_norm": 10.01744099671902, "learning_rate": 7.913608552400504e-06, "loss": 1.117366909980774, "step": 1082 }, { "epoch": 1.1176470588235294, "grad_norm": 13.07772422800308, "learning_rate": 7.908726705506502e-06, "loss": 1.727134108543396, "step": 1083 }, { "epoch": 1.1186790505675954, "grad_norm": 9.572834550760705, "learning_rate": 7.903840663659186e-06, "loss": 1.0672117471694946, "step": 1084 }, { "epoch": 1.1197110423116614, "grad_norm": 9.409168030173149, "learning_rate": 7.89895043390518e-06, "loss": 1.5920839309692383, "step": 1085 }, { "epoch": 1.1207430340557276, "grad_norm": 21.583892474378278, "learning_rate": 7.894056023297156e-06, "loss": 2.1002070903778076, "step": 1086 }, { "epoch": 1.1217750257997936, "grad_norm": 7.55744359619947, "learning_rate": 7.889157438893813e-06, "loss": 0.7461848258972168, "step": 1087 }, { "epoch": 1.1228070175438596, "grad_norm": 25.813146959585897, "learning_rate": 7.884254687759863e-06, "loss": 2.137263536453247, "step": 1088 }, { "epoch": 1.1238390092879258, "grad_norm": 13.42741779404928, "learning_rate": 7.879347776966039e-06, "loss": 1.5984553098678589, "step": 1089 }, { "epoch": 1.1248710010319918, "grad_norm": 20.393007755113697, "learning_rate": 7.874436713589065e-06, "loss": 1.4474716186523438, "step": 1090 }, { "epoch": 1.1259029927760578, "grad_norm": 9.345956996474037, "learning_rate": 7.869521504711653e-06, "loss": 1.6039841175079346, "step": 1091 }, { "epoch": 1.1269349845201238, "grad_norm": 11.133766283828184, "learning_rate": 7.864602157422501e-06, "loss": 1.083439588546753, "step": 1092 }, { "epoch": 1.12796697626419, "grad_norm": 14.667634387278584, "learning_rate": 7.859678678816266e-06, "loss": 1.7140947580337524, "step": 1093 }, { "epoch": 1.128998968008256, "grad_norm": 10.461128558097727, "learning_rate": 7.854751075993572e-06, "loss": 1.3764314651489258, "step": 1094 }, { "epoch": 1.130030959752322, "grad_norm": 8.700167873438682, "learning_rate": 7.849819356060986e-06, "loss": 1.55415940284729, "step": 1095 }, { "epoch": 1.131062951496388, "grad_norm": 10.892723358166004, "learning_rate": 7.844883526131014e-06, "loss": 1.4459302425384521, "step": 1096 }, { "epoch": 1.1320949432404541, "grad_norm": 12.75716199523203, "learning_rate": 7.839943593322084e-06, "loss": 1.8245646953582764, "step": 1097 }, { "epoch": 1.13312693498452, "grad_norm": 13.312280216700708, "learning_rate": 7.834999564758553e-06, "loss": 2.2421915531158447, "step": 1098 }, { "epoch": 1.134158926728586, "grad_norm": 12.798983235555607, "learning_rate": 7.830051447570674e-06, "loss": 0.9207951426506042, "step": 1099 }, { "epoch": 1.1351909184726523, "grad_norm": 19.561513762469197, "learning_rate": 7.8250992488946e-06, "loss": 1.3448659181594849, "step": 1100 }, { "epoch": 1.1362229102167183, "grad_norm": 10.655264812219203, "learning_rate": 7.820142975872377e-06, "loss": 1.5575282573699951, "step": 1101 }, { "epoch": 1.1372549019607843, "grad_norm": 10.236688960938611, "learning_rate": 7.815182635651913e-06, "loss": 1.1294775009155273, "step": 1102 }, { "epoch": 1.1382868937048505, "grad_norm": 13.12609756584624, "learning_rate": 7.810218235386994e-06, "loss": 1.45517098903656, "step": 1103 }, { "epoch": 1.1393188854489165, "grad_norm": 12.890391208806918, "learning_rate": 7.805249782237256e-06, "loss": 1.4597437381744385, "step": 1104 }, { "epoch": 1.1403508771929824, "grad_norm": 20.154226114081528, "learning_rate": 7.800277283368184e-06, "loss": 1.961379885673523, "step": 1105 }, { "epoch": 1.1413828689370484, "grad_norm": 9.464678873571312, "learning_rate": 7.79530074595109e-06, "loss": 1.188375473022461, "step": 1106 }, { "epoch": 1.1424148606811146, "grad_norm": 14.154349605303679, "learning_rate": 7.790320177163116e-06, "loss": 1.73966383934021, "step": 1107 }, { "epoch": 1.1434468524251806, "grad_norm": 10.420904211191234, "learning_rate": 7.78533558418722e-06, "loss": 2.5877296924591064, "step": 1108 }, { "epoch": 1.1444788441692466, "grad_norm": 16.044920620511835, "learning_rate": 7.780346974212159e-06, "loss": 1.5162712335586548, "step": 1109 }, { "epoch": 1.1455108359133126, "grad_norm": 16.28795365818509, "learning_rate": 7.775354354432484e-06, "loss": 1.6514241695404053, "step": 1110 }, { "epoch": 1.1465428276573788, "grad_norm": 18.70420376563244, "learning_rate": 7.770357732048533e-06, "loss": 1.10786771774292, "step": 1111 }, { "epoch": 1.1475748194014448, "grad_norm": 10.780579837493953, "learning_rate": 7.765357114266409e-06, "loss": 1.5677838325500488, "step": 1112 }, { "epoch": 1.1486068111455108, "grad_norm": 17.00709242561451, "learning_rate": 7.760352508297988e-06, "loss": 2.3274734020233154, "step": 1113 }, { "epoch": 1.149638802889577, "grad_norm": 8.250265430352718, "learning_rate": 7.755343921360887e-06, "loss": 1.1954925060272217, "step": 1114 }, { "epoch": 1.150670794633643, "grad_norm": 12.032632061244854, "learning_rate": 7.750331360678471e-06, "loss": 0.9751878976821899, "step": 1115 }, { "epoch": 1.151702786377709, "grad_norm": 11.770457667292105, "learning_rate": 7.745314833479834e-06, "loss": 1.2305867671966553, "step": 1116 }, { "epoch": 1.152734778121775, "grad_norm": 14.257996247234294, "learning_rate": 7.740294346999786e-06, "loss": 1.3428764343261719, "step": 1117 }, { "epoch": 1.1537667698658411, "grad_norm": 6.682943553705904, "learning_rate": 7.735269908478856e-06, "loss": 1.1069071292877197, "step": 1118 }, { "epoch": 1.1547987616099071, "grad_norm": 12.012699080331593, "learning_rate": 7.730241525163266e-06, "loss": 1.4219002723693848, "step": 1119 }, { "epoch": 1.1558307533539731, "grad_norm": 9.403879667177996, "learning_rate": 7.72520920430493e-06, "loss": 1.0495020151138306, "step": 1120 }, { "epoch": 1.156862745098039, "grad_norm": 8.719460389688432, "learning_rate": 7.720172953161438e-06, "loss": 1.7092715501785278, "step": 1121 }, { "epoch": 1.1578947368421053, "grad_norm": 7.831380903960697, "learning_rate": 7.715132778996053e-06, "loss": 1.8763151168823242, "step": 1122 }, { "epoch": 1.1589267285861713, "grad_norm": 19.446465480773362, "learning_rate": 7.710088689077689e-06, "loss": 1.7434253692626953, "step": 1123 }, { "epoch": 1.1599587203302373, "grad_norm": 11.704881506960021, "learning_rate": 7.705040690680915e-06, "loss": 1.6507149934768677, "step": 1124 }, { "epoch": 1.1609907120743035, "grad_norm": 8.857711832262313, "learning_rate": 7.699988791085931e-06, "loss": 1.8820699453353882, "step": 1125 }, { "epoch": 1.1620227038183695, "grad_norm": 9.422720425465496, "learning_rate": 7.694932997578565e-06, "loss": 1.5133967399597168, "step": 1126 }, { "epoch": 1.1630546955624355, "grad_norm": 13.889002563598517, "learning_rate": 7.68987331745026e-06, "loss": 2.1303153038024902, "step": 1127 }, { "epoch": 1.1640866873065017, "grad_norm": 11.739488829961909, "learning_rate": 7.684809757998066e-06, "loss": 1.5680687427520752, "step": 1128 }, { "epoch": 1.1651186790505677, "grad_norm": 14.384496276363349, "learning_rate": 7.679742326524628e-06, "loss": 1.483288288116455, "step": 1129 }, { "epoch": 1.1661506707946336, "grad_norm": 10.874983943768116, "learning_rate": 7.674671030338176e-06, "loss": 2.03609561920166, "step": 1130 }, { "epoch": 1.1671826625386996, "grad_norm": 11.378154502477775, "learning_rate": 7.669595876752508e-06, "loss": 1.7551511526107788, "step": 1131 }, { "epoch": 1.1682146542827658, "grad_norm": 10.45131877825218, "learning_rate": 7.664516873086987e-06, "loss": 1.564146637916565, "step": 1132 }, { "epoch": 1.1692466460268318, "grad_norm": 14.201292073707153, "learning_rate": 7.659434026666536e-06, "loss": 2.13928484916687, "step": 1133 }, { "epoch": 1.1702786377708978, "grad_norm": 8.513928619367956, "learning_rate": 7.654347344821613e-06, "loss": 1.8708624839782715, "step": 1134 }, { "epoch": 1.1713106295149638, "grad_norm": 26.303106116968944, "learning_rate": 7.64925683488821e-06, "loss": 1.4692811965942383, "step": 1135 }, { "epoch": 1.17234262125903, "grad_norm": 26.79861905969612, "learning_rate": 7.644162504207834e-06, "loss": 1.458707571029663, "step": 1136 }, { "epoch": 1.173374613003096, "grad_norm": 14.432879402437464, "learning_rate": 7.639064360127512e-06, "loss": 1.3703393936157227, "step": 1137 }, { "epoch": 1.174406604747162, "grad_norm": 11.172595147665566, "learning_rate": 7.633962409999765e-06, "loss": 1.5748720169067383, "step": 1138 }, { "epoch": 1.1754385964912282, "grad_norm": 10.323351276905665, "learning_rate": 7.6288566611826e-06, "loss": 1.9944082498550415, "step": 1139 }, { "epoch": 1.1764705882352942, "grad_norm": 19.622276447625225, "learning_rate": 7.623747121039512e-06, "loss": 1.4969562292099, "step": 1140 }, { "epoch": 1.1775025799793601, "grad_norm": 10.575512895874743, "learning_rate": 7.618633796939454e-06, "loss": 1.993700623512268, "step": 1141 }, { "epoch": 1.1785345717234261, "grad_norm": 13.393720411744683, "learning_rate": 7.613516696256842e-06, "loss": 1.6115179061889648, "step": 1142 }, { "epoch": 1.1795665634674923, "grad_norm": 12.397493564889684, "learning_rate": 7.608395826371536e-06, "loss": 1.0056989192962646, "step": 1143 }, { "epoch": 1.1805985552115583, "grad_norm": 8.63445703443166, "learning_rate": 7.603271194668835e-06, "loss": 1.631241798400879, "step": 1144 }, { "epoch": 1.1816305469556243, "grad_norm": 9.960901745053299, "learning_rate": 7.598142808539458e-06, "loss": 1.2353228330612183, "step": 1145 }, { "epoch": 1.1826625386996903, "grad_norm": 11.90792893561612, "learning_rate": 7.593010675379542e-06, "loss": 1.3433904647827148, "step": 1146 }, { "epoch": 1.1836945304437565, "grad_norm": 14.96853874887359, "learning_rate": 7.5878748025906315e-06, "loss": 1.197488784790039, "step": 1147 }, { "epoch": 1.1847265221878225, "grad_norm": 17.738178329199055, "learning_rate": 7.582735197579657e-06, "loss": 1.4474613666534424, "step": 1148 }, { "epoch": 1.1857585139318885, "grad_norm": 10.212462753877391, "learning_rate": 7.577591867758937e-06, "loss": 1.2012611627578735, "step": 1149 }, { "epoch": 1.1867905056759547, "grad_norm": 15.941473391658032, "learning_rate": 7.572444820546157e-06, "loss": 1.5386306047439575, "step": 1150 }, { "epoch": 1.1878224974200207, "grad_norm": 13.83055155407034, "learning_rate": 7.567294063364369e-06, "loss": 1.314261794090271, "step": 1151 }, { "epoch": 1.1888544891640866, "grad_norm": 10.71943455414836, "learning_rate": 7.562139603641971e-06, "loss": 1.694484829902649, "step": 1152 }, { "epoch": 1.1898864809081529, "grad_norm": 9.764445754550207, "learning_rate": 7.556981448812707e-06, "loss": 1.5592496395111084, "step": 1153 }, { "epoch": 1.1909184726522188, "grad_norm": 12.283485449478526, "learning_rate": 7.551819606315644e-06, "loss": 1.2753419876098633, "step": 1154 }, { "epoch": 1.1919504643962848, "grad_norm": 10.19282029688353, "learning_rate": 7.546654083595167e-06, "loss": 2.5282106399536133, "step": 1155 }, { "epoch": 1.1929824561403508, "grad_norm": 9.945635665366675, "learning_rate": 7.541484888100974e-06, "loss": 1.256844162940979, "step": 1156 }, { "epoch": 1.194014447884417, "grad_norm": 7.5302923653532785, "learning_rate": 7.5363120272880554e-06, "loss": 1.311590552330017, "step": 1157 }, { "epoch": 1.195046439628483, "grad_norm": 14.701895071884568, "learning_rate": 7.531135508616689e-06, "loss": 1.5042333602905273, "step": 1158 }, { "epoch": 1.196078431372549, "grad_norm": 9.897244311338603, "learning_rate": 7.52595533955243e-06, "loss": 1.5542973279953003, "step": 1159 }, { "epoch": 1.197110423116615, "grad_norm": 6.868338379037202, "learning_rate": 7.520771527566093e-06, "loss": 1.6668894290924072, "step": 1160 }, { "epoch": 1.1981424148606812, "grad_norm": 8.953819262399154, "learning_rate": 7.515584080133753e-06, "loss": 1.67169189453125, "step": 1161 }, { "epoch": 1.1991744066047472, "grad_norm": 11.976800133954304, "learning_rate": 7.510393004736723e-06, "loss": 1.6002707481384277, "step": 1162 }, { "epoch": 1.2002063983488132, "grad_norm": 14.07389764280866, "learning_rate": 7.50519830886155e-06, "loss": 1.1405104398727417, "step": 1163 }, { "epoch": 1.2012383900928794, "grad_norm": 64.45087236802736, "learning_rate": 7.500000000000001e-06, "loss": 1.908927083015442, "step": 1164 }, { "epoch": 1.2022703818369453, "grad_norm": 9.506985345062768, "learning_rate": 7.494798085649058e-06, "loss": 1.78879976272583, "step": 1165 }, { "epoch": 1.2033023735810113, "grad_norm": 10.559746008230174, "learning_rate": 7.489592573310896e-06, "loss": 1.2638949155807495, "step": 1166 }, { "epoch": 1.2043343653250773, "grad_norm": 18.971862976066006, "learning_rate": 7.484383470492886e-06, "loss": 1.8479580879211426, "step": 1167 }, { "epoch": 1.2053663570691435, "grad_norm": 10.968772501481508, "learning_rate": 7.479170784707574e-06, "loss": 1.2037568092346191, "step": 1168 }, { "epoch": 1.2063983488132095, "grad_norm": 11.135611363827667, "learning_rate": 7.473954523472673e-06, "loss": 2.127514600753784, "step": 1169 }, { "epoch": 1.2074303405572755, "grad_norm": 21.45101625402494, "learning_rate": 7.468734694311051e-06, "loss": 1.5933599472045898, "step": 1170 }, { "epoch": 1.2084623323013415, "grad_norm": 12.58067929424832, "learning_rate": 7.463511304750724e-06, "loss": 1.6531916856765747, "step": 1171 }, { "epoch": 1.2094943240454077, "grad_norm": 14.649666157234222, "learning_rate": 7.458284362324844e-06, "loss": 1.17509126663208, "step": 1172 }, { "epoch": 1.2105263157894737, "grad_norm": 10.039116690840553, "learning_rate": 7.453053874571683e-06, "loss": 1.4780304431915283, "step": 1173 }, { "epoch": 1.2115583075335397, "grad_norm": 8.816563367174497, "learning_rate": 7.44781984903463e-06, "loss": 1.7202036380767822, "step": 1174 }, { "epoch": 1.2125902992776059, "grad_norm": 18.902257219854263, "learning_rate": 7.442582293262174e-06, "loss": 1.120159387588501, "step": 1175 }, { "epoch": 1.2136222910216719, "grad_norm": 16.571562799351348, "learning_rate": 7.437341214807895e-06, "loss": 1.8021618127822876, "step": 1176 }, { "epoch": 1.2146542827657378, "grad_norm": 10.904647120919584, "learning_rate": 7.432096621230455e-06, "loss": 1.6198031902313232, "step": 1177 }, { "epoch": 1.215686274509804, "grad_norm": 21.795007344934522, "learning_rate": 7.426848520093585e-06, "loss": 1.4299342632293701, "step": 1178 }, { "epoch": 1.21671826625387, "grad_norm": 11.04639195785765, "learning_rate": 7.421596918966072e-06, "loss": 1.6385784149169922, "step": 1179 }, { "epoch": 1.217750257997936, "grad_norm": 10.479625516242656, "learning_rate": 7.416341825421755e-06, "loss": 1.2281197309494019, "step": 1180 }, { "epoch": 1.218782249742002, "grad_norm": 12.249613732754618, "learning_rate": 7.411083247039506e-06, "loss": 1.3680484294891357, "step": 1181 }, { "epoch": 1.2198142414860682, "grad_norm": 16.349200785730165, "learning_rate": 7.4058211914032264e-06, "loss": 1.4355851411819458, "step": 1182 }, { "epoch": 1.2208462332301342, "grad_norm": 13.102412298483305, "learning_rate": 7.400555666101829e-06, "loss": 1.8079159259796143, "step": 1183 }, { "epoch": 1.2218782249742002, "grad_norm": 13.586305957723505, "learning_rate": 7.395286678729232e-06, "loss": 1.2523359060287476, "step": 1184 }, { "epoch": 1.2229102167182662, "grad_norm": 8.662082403874702, "learning_rate": 7.390014236884349e-06, "loss": 1.3039319515228271, "step": 1185 }, { "epoch": 1.2239422084623324, "grad_norm": 8.711755492904405, "learning_rate": 7.384738348171069e-06, "loss": 1.032116413116455, "step": 1186 }, { "epoch": 1.2249742002063984, "grad_norm": 14.906728364069968, "learning_rate": 7.379459020198261e-06, "loss": 1.8307032585144043, "step": 1187 }, { "epoch": 1.2260061919504643, "grad_norm": 15.7359508426536, "learning_rate": 7.374176260579746e-06, "loss": 1.6073552370071411, "step": 1188 }, { "epoch": 1.2270381836945305, "grad_norm": 26.584089550977485, "learning_rate": 7.368890076934298e-06, "loss": 1.3526015281677246, "step": 1189 }, { "epoch": 1.2280701754385965, "grad_norm": 13.134590698432364, "learning_rate": 7.36360047688563e-06, "loss": 2.4070539474487305, "step": 1190 }, { "epoch": 1.2291021671826625, "grad_norm": 51.80448653646729, "learning_rate": 7.35830746806238e-06, "loss": 2.3934192657470703, "step": 1191 }, { "epoch": 1.2301341589267285, "grad_norm": 18.212263393779182, "learning_rate": 7.353011058098104e-06, "loss": 1.1821274757385254, "step": 1192 }, { "epoch": 1.2311661506707947, "grad_norm": 18.405227075703046, "learning_rate": 7.34771125463126e-06, "loss": 1.1790523529052734, "step": 1193 }, { "epoch": 1.2321981424148607, "grad_norm": 19.446981285426606, "learning_rate": 7.342408065305202e-06, "loss": 2.165193557739258, "step": 1194 }, { "epoch": 1.2332301341589267, "grad_norm": 24.59132404411911, "learning_rate": 7.3371014977681685e-06, "loss": 1.4371821880340576, "step": 1195 }, { "epoch": 1.2342621259029927, "grad_norm": 15.429073098790294, "learning_rate": 7.33179155967327e-06, "loss": 1.5296125411987305, "step": 1196 }, { "epoch": 1.2352941176470589, "grad_norm": 13.347553476226281, "learning_rate": 7.326478258678474e-06, "loss": 1.9588744640350342, "step": 1197 }, { "epoch": 1.2363261093911249, "grad_norm": 14.260271910609688, "learning_rate": 7.321161602446601e-06, "loss": 1.2351356744766235, "step": 1198 }, { "epoch": 1.2373581011351908, "grad_norm": 7.802396516831153, "learning_rate": 7.315841598645313e-06, "loss": 1.22335946559906, "step": 1199 }, { "epoch": 1.238390092879257, "grad_norm": 14.03192012952255, "learning_rate": 7.310518254947092e-06, "loss": 1.795140027999878, "step": 1200 }, { "epoch": 1.239422084623323, "grad_norm": 12.82609528076698, "learning_rate": 7.305191579029246e-06, "loss": 1.3026559352874756, "step": 1201 }, { "epoch": 1.240454076367389, "grad_norm": 8.175138039549628, "learning_rate": 7.299861578573881e-06, "loss": 1.3581645488739014, "step": 1202 }, { "epoch": 1.2414860681114552, "grad_norm": 19.10804827967584, "learning_rate": 7.294528261267905e-06, "loss": 1.415238618850708, "step": 1203 }, { "epoch": 1.2425180598555212, "grad_norm": 16.335381882389054, "learning_rate": 7.289191634803002e-06, "loss": 1.8068633079528809, "step": 1204 }, { "epoch": 1.2435500515995872, "grad_norm": 13.151633240669845, "learning_rate": 7.283851706875633e-06, "loss": 2.057955741882324, "step": 1205 }, { "epoch": 1.2445820433436532, "grad_norm": 8.937106130577334, "learning_rate": 7.278508485187022e-06, "loss": 1.2385305166244507, "step": 1206 }, { "epoch": 1.2456140350877192, "grad_norm": 16.887133387666424, "learning_rate": 7.273161977443137e-06, "loss": 1.0469703674316406, "step": 1207 }, { "epoch": 1.2466460268317854, "grad_norm": 16.606881298783872, "learning_rate": 7.267812191354691e-06, "loss": 1.2680339813232422, "step": 1208 }, { "epoch": 1.2476780185758514, "grad_norm": 9.311173055553429, "learning_rate": 7.262459134637122e-06, "loss": 1.4323675632476807, "step": 1209 }, { "epoch": 1.2487100103199174, "grad_norm": 12.258511806079309, "learning_rate": 7.257102815010585e-06, "loss": 1.5210167169570923, "step": 1210 }, { "epoch": 1.2497420020639836, "grad_norm": 9.846658124763904, "learning_rate": 7.251743240199944e-06, "loss": 1.3966602087020874, "step": 1211 }, { "epoch": 1.2507739938080495, "grad_norm": 10.650172801865791, "learning_rate": 7.246380417934752e-06, "loss": 1.2398332357406616, "step": 1212 }, { "epoch": 1.2518059855521155, "grad_norm": 17.175116898809183, "learning_rate": 7.24101435594925e-06, "loss": 2.0594077110290527, "step": 1213 }, { "epoch": 1.2528379772961817, "grad_norm": 11.462008515081358, "learning_rate": 7.2356450619823495e-06, "loss": 1.7122313976287842, "step": 1214 }, { "epoch": 1.2538699690402477, "grad_norm": 8.87949827881755, "learning_rate": 7.230272543777625e-06, "loss": 1.1924419403076172, "step": 1215 }, { "epoch": 1.2549019607843137, "grad_norm": 9.21318932815073, "learning_rate": 7.224896809083297e-06, "loss": 1.6619291305541992, "step": 1216 }, { "epoch": 1.2559339525283797, "grad_norm": 7.207008807843295, "learning_rate": 7.219517865652228e-06, "loss": 1.6214816570281982, "step": 1217 }, { "epoch": 1.256965944272446, "grad_norm": 13.735589808619933, "learning_rate": 7.214135721241908e-06, "loss": 1.911083459854126, "step": 1218 }, { "epoch": 1.2579979360165119, "grad_norm": 12.344700121567975, "learning_rate": 7.208750383614442e-06, "loss": 1.6782649755477905, "step": 1219 }, { "epoch": 1.2590299277605779, "grad_norm": 9.30907810758885, "learning_rate": 7.203361860536544e-06, "loss": 1.339187502861023, "step": 1220 }, { "epoch": 1.2600619195046439, "grad_norm": 8.751624839086462, "learning_rate": 7.1979701597795145e-06, "loss": 1.91410493850708, "step": 1221 }, { "epoch": 1.26109391124871, "grad_norm": 9.909673616548863, "learning_rate": 7.192575289119246e-06, "loss": 1.1134378910064697, "step": 1222 }, { "epoch": 1.262125902992776, "grad_norm": 16.497933136012385, "learning_rate": 7.187177256336194e-06, "loss": 1.2252280712127686, "step": 1223 }, { "epoch": 1.263157894736842, "grad_norm": 20.610993611451047, "learning_rate": 7.181776069215382e-06, "loss": 0.9598990678787231, "step": 1224 }, { "epoch": 1.2641898864809082, "grad_norm": 13.830348435110258, "learning_rate": 7.176371735546377e-06, "loss": 1.5006102323532104, "step": 1225 }, { "epoch": 1.2652218782249742, "grad_norm": 13.15337113477755, "learning_rate": 7.170964263123286e-06, "loss": 0.9433538913726807, "step": 1226 }, { "epoch": 1.2662538699690402, "grad_norm": 25.898742835089134, "learning_rate": 7.165553659744744e-06, "loss": 1.5279262065887451, "step": 1227 }, { "epoch": 1.2672858617131064, "grad_norm": 24.6601728692259, "learning_rate": 7.160139933213899e-06, "loss": 0.6092150807380676, "step": 1228 }, { "epoch": 1.2683178534571724, "grad_norm": 11.514310521737785, "learning_rate": 7.154723091338404e-06, "loss": 1.605895757675171, "step": 1229 }, { "epoch": 1.2693498452012384, "grad_norm": 12.060484911728022, "learning_rate": 7.1493031419304095e-06, "loss": 1.384972333908081, "step": 1230 }, { "epoch": 1.2703818369453044, "grad_norm": 16.430377347078384, "learning_rate": 7.1438800928065385e-06, "loss": 1.4129787683486938, "step": 1231 }, { "epoch": 1.2714138286893704, "grad_norm": 11.185735193254, "learning_rate": 7.138453951787894e-06, "loss": 1.020602822303772, "step": 1232 }, { "epoch": 1.2724458204334366, "grad_norm": 14.645847466215377, "learning_rate": 7.133024726700027e-06, "loss": 1.4097436666488647, "step": 1233 }, { "epoch": 1.2734778121775026, "grad_norm": 10.044328600175684, "learning_rate": 7.12759242537295e-06, "loss": 1.802374243736267, "step": 1234 }, { "epoch": 1.2745098039215685, "grad_norm": 17.231549286448168, "learning_rate": 7.1221570556411005e-06, "loss": 1.298069953918457, "step": 1235 }, { "epoch": 1.2755417956656347, "grad_norm": 10.40438584029231, "learning_rate": 7.1167186253433474e-06, "loss": 1.9908943176269531, "step": 1236 }, { "epoch": 1.2765737874097007, "grad_norm": 10.094942538512901, "learning_rate": 7.111277142322971e-06, "loss": 1.542879581451416, "step": 1237 }, { "epoch": 1.2776057791537667, "grad_norm": 84.36926538241218, "learning_rate": 7.105832614427656e-06, "loss": 1.3311142921447754, "step": 1238 }, { "epoch": 1.278637770897833, "grad_norm": 16.046071866527804, "learning_rate": 7.100385049509477e-06, "loss": 1.7607979774475098, "step": 1239 }, { "epoch": 1.279669762641899, "grad_norm": 10.83846781923933, "learning_rate": 7.094934455424889e-06, "loss": 1.2249962091445923, "step": 1240 }, { "epoch": 1.280701754385965, "grad_norm": 15.256033188599012, "learning_rate": 7.089480840034715e-06, "loss": 1.9785404205322266, "step": 1241 }, { "epoch": 1.2817337461300309, "grad_norm": 7.75804045811503, "learning_rate": 7.084024211204136e-06, "loss": 1.1947821378707886, "step": 1242 }, { "epoch": 1.282765737874097, "grad_norm": 17.308231386990283, "learning_rate": 7.07856457680268e-06, "loss": 1.1516659259796143, "step": 1243 }, { "epoch": 1.283797729618163, "grad_norm": 13.440135470304948, "learning_rate": 7.073101944704209e-06, "loss": 1.3261773586273193, "step": 1244 }, { "epoch": 1.284829721362229, "grad_norm": 10.582432798186264, "learning_rate": 7.067636322786906e-06, "loss": 1.8862181901931763, "step": 1245 }, { "epoch": 1.285861713106295, "grad_norm": 9.287526448936454, "learning_rate": 7.06216771893327e-06, "loss": 1.8220220804214478, "step": 1246 }, { "epoch": 1.2868937048503613, "grad_norm": 11.547994179745336, "learning_rate": 7.056696141030095e-06, "loss": 0.8939683437347412, "step": 1247 }, { "epoch": 1.2879256965944272, "grad_norm": 10.68722215184878, "learning_rate": 7.051221596968471e-06, "loss": 1.5759246349334717, "step": 1248 }, { "epoch": 1.2889576883384932, "grad_norm": 7.647385646118011, "learning_rate": 7.0457440946437605e-06, "loss": 2.126826763153076, "step": 1249 }, { "epoch": 1.2899896800825594, "grad_norm": 9.860181991689545, "learning_rate": 7.040263641955594e-06, "loss": 1.167819857597351, "step": 1250 }, { "epoch": 1.2910216718266254, "grad_norm": 10.300885899118448, "learning_rate": 7.034780246807857e-06, "loss": 0.8613216280937195, "step": 1251 }, { "epoch": 1.2920536635706914, "grad_norm": 14.731011070999733, "learning_rate": 7.029293917108678e-06, "loss": 1.7457356452941895, "step": 1252 }, { "epoch": 1.2930856553147576, "grad_norm": 16.05965173219095, "learning_rate": 7.023804660770422e-06, "loss": 2.087299346923828, "step": 1253 }, { "epoch": 1.2941176470588236, "grad_norm": 19.98491427880135, "learning_rate": 7.0183124857096676e-06, "loss": 1.3274412155151367, "step": 1254 }, { "epoch": 1.2951496388028896, "grad_norm": 8.480002780443607, "learning_rate": 7.012817399847207e-06, "loss": 1.2816498279571533, "step": 1255 }, { "epoch": 1.2961816305469556, "grad_norm": 10.022827971317682, "learning_rate": 7.0073194111080315e-06, "loss": 1.2162542343139648, "step": 1256 }, { "epoch": 1.2972136222910216, "grad_norm": 6.871625539305346, "learning_rate": 7.001818527421314e-06, "loss": 0.9680448770523071, "step": 1257 }, { "epoch": 1.2982456140350878, "grad_norm": 7.317032162601178, "learning_rate": 6.996314756720409e-06, "loss": 1.8782672882080078, "step": 1258 }, { "epoch": 1.2992776057791537, "grad_norm": 11.160755976506538, "learning_rate": 6.9908081069428294e-06, "loss": 1.9667476415634155, "step": 1259 }, { "epoch": 1.3003095975232197, "grad_norm": 8.456022150247852, "learning_rate": 6.985298586030241e-06, "loss": 1.8856534957885742, "step": 1260 }, { "epoch": 1.301341589267286, "grad_norm": 8.939573953300108, "learning_rate": 6.979786201928455e-06, "loss": 1.4806193113327026, "step": 1261 }, { "epoch": 1.302373581011352, "grad_norm": 10.227571088009338, "learning_rate": 6.974270962587405e-06, "loss": 1.4667408466339111, "step": 1262 }, { "epoch": 1.303405572755418, "grad_norm": 13.231021950855867, "learning_rate": 6.968752875961149e-06, "loss": 1.3672292232513428, "step": 1263 }, { "epoch": 1.3044375644994841, "grad_norm": 17.13546379140691, "learning_rate": 6.963231950007845e-06, "loss": 1.7469172477722168, "step": 1264 }, { "epoch": 1.30546955624355, "grad_norm": 10.826702738897808, "learning_rate": 6.95770819268975e-06, "loss": 1.0989598035812378, "step": 1265 }, { "epoch": 1.306501547987616, "grad_norm": 13.088464704259879, "learning_rate": 6.952181611973203e-06, "loss": 1.5576810836791992, "step": 1266 }, { "epoch": 1.307533539731682, "grad_norm": 10.604025023103842, "learning_rate": 6.9466522158286175e-06, "loss": 1.8453103303909302, "step": 1267 }, { "epoch": 1.308565531475748, "grad_norm": 13.530398513450153, "learning_rate": 6.941120012230464e-06, "loss": 1.4260985851287842, "step": 1268 }, { "epoch": 1.3095975232198143, "grad_norm": 18.227518930179752, "learning_rate": 6.93558500915726e-06, "loss": 1.8812353610992432, "step": 1269 }, { "epoch": 1.3106295149638802, "grad_norm": 16.834162609499675, "learning_rate": 6.930047214591569e-06, "loss": 1.3378567695617676, "step": 1270 }, { "epoch": 1.3116615067079462, "grad_norm": 17.632777807359314, "learning_rate": 6.924506636519968e-06, "loss": 1.2886006832122803, "step": 1271 }, { "epoch": 1.3126934984520124, "grad_norm": 8.590505601569463, "learning_rate": 6.918963282933063e-06, "loss": 1.9369480609893799, "step": 1272 }, { "epoch": 1.3137254901960784, "grad_norm": 15.460248224270192, "learning_rate": 6.913417161825449e-06, "loss": 1.6706684827804565, "step": 1273 }, { "epoch": 1.3147574819401444, "grad_norm": 16.46819164452491, "learning_rate": 6.907868281195722e-06, "loss": 1.8951505422592163, "step": 1274 }, { "epoch": 1.3157894736842106, "grad_norm": 16.231207232177706, "learning_rate": 6.902316649046452e-06, "loss": 1.964847207069397, "step": 1275 }, { "epoch": 1.3168214654282766, "grad_norm": 13.918532385058198, "learning_rate": 6.896762273384179e-06, "loss": 1.9368600845336914, "step": 1276 }, { "epoch": 1.3178534571723426, "grad_norm": 14.031469949597836, "learning_rate": 6.891205162219402e-06, "loss": 1.7412488460540771, "step": 1277 }, { "epoch": 1.3188854489164088, "grad_norm": 11.144610201408431, "learning_rate": 6.885645323566561e-06, "loss": 1.5681676864624023, "step": 1278 }, { "epoch": 1.3199174406604748, "grad_norm": 11.79257157269013, "learning_rate": 6.880082765444034e-06, "loss": 1.7658888101577759, "step": 1279 }, { "epoch": 1.3209494324045408, "grad_norm": 13.902619176662753, "learning_rate": 6.8745174958741164e-06, "loss": 1.7245615720748901, "step": 1280 }, { "epoch": 1.3219814241486068, "grad_norm": 18.76369717340051, "learning_rate": 6.868949522883017e-06, "loss": 1.6578267812728882, "step": 1281 }, { "epoch": 1.3230134158926727, "grad_norm": 8.996167250954647, "learning_rate": 6.863378854500846e-06, "loss": 1.5728305578231812, "step": 1282 }, { "epoch": 1.324045407636739, "grad_norm": 9.57359831014415, "learning_rate": 6.857805498761593e-06, "loss": 1.375089168548584, "step": 1283 }, { "epoch": 1.325077399380805, "grad_norm": 9.066334633906381, "learning_rate": 6.852229463703131e-06, "loss": 1.8140184879302979, "step": 1284 }, { "epoch": 1.326109391124871, "grad_norm": 9.652084209720877, "learning_rate": 6.846650757367192e-06, "loss": 1.1467084884643555, "step": 1285 }, { "epoch": 1.3271413828689371, "grad_norm": 12.930430979530293, "learning_rate": 6.841069387799364e-06, "loss": 1.2440385818481445, "step": 1286 }, { "epoch": 1.328173374613003, "grad_norm": 9.645014089750466, "learning_rate": 6.835485363049075e-06, "loss": 1.2007393836975098, "step": 1287 }, { "epoch": 1.329205366357069, "grad_norm": 10.864773450057065, "learning_rate": 6.829898691169581e-06, "loss": 1.7485893964767456, "step": 1288 }, { "epoch": 1.3302373581011353, "grad_norm": 7.027777848617237, "learning_rate": 6.8243093802179574e-06, "loss": 1.6619250774383545, "step": 1289 }, { "epoch": 1.3312693498452013, "grad_norm": 14.918429364794484, "learning_rate": 6.81871743825508e-06, "loss": 1.067305088043213, "step": 1290 }, { "epoch": 1.3323013415892673, "grad_norm": 7.774349136796258, "learning_rate": 6.813122873345632e-06, "loss": 1.8407032489776611, "step": 1291 }, { "epoch": 1.3333333333333333, "grad_norm": 14.681779739650855, "learning_rate": 6.8075256935580655e-06, "loss": 2.130232334136963, "step": 1292 }, { "epoch": 1.3343653250773992, "grad_norm": 9.52858502084645, "learning_rate": 6.80192590696461e-06, "loss": 1.317871332168579, "step": 1293 }, { "epoch": 1.3353973168214655, "grad_norm": 15.393341342952034, "learning_rate": 6.796323521641257e-06, "loss": 0.9826754331588745, "step": 1294 }, { "epoch": 1.3364293085655314, "grad_norm": 12.804696585185201, "learning_rate": 6.790718545667738e-06, "loss": 1.7907086610794067, "step": 1295 }, { "epoch": 1.3374613003095974, "grad_norm": 11.551243689301163, "learning_rate": 6.78511098712753e-06, "loss": 1.8355216979980469, "step": 1296 }, { "epoch": 1.3384932920536636, "grad_norm": 12.067399492809944, "learning_rate": 6.779500854107828e-06, "loss": 0.6331468820571899, "step": 1297 }, { "epoch": 1.3395252837977296, "grad_norm": 13.245167552074978, "learning_rate": 6.773888154699543e-06, "loss": 1.709820032119751, "step": 1298 }, { "epoch": 1.3405572755417956, "grad_norm": 7.370036463533888, "learning_rate": 6.768272896997285e-06, "loss": 1.5491633415222168, "step": 1299 }, { "epoch": 1.3415892672858618, "grad_norm": 23.00598116208232, "learning_rate": 6.762655089099353e-06, "loss": 2.1439013481140137, "step": 1300 }, { "epoch": 1.3426212590299278, "grad_norm": 12.393604989350957, "learning_rate": 6.757034739107732e-06, "loss": 1.1984869241714478, "step": 1301 }, { "epoch": 1.3436532507739938, "grad_norm": 7.950299634053667, "learning_rate": 6.751411855128062e-06, "loss": 1.8228305578231812, "step": 1302 }, { "epoch": 1.34468524251806, "grad_norm": 8.48250505168335, "learning_rate": 6.745786445269644e-06, "loss": 1.5694873332977295, "step": 1303 }, { "epoch": 1.345717234262126, "grad_norm": 25.47218353712853, "learning_rate": 6.740158517645418e-06, "loss": 1.922268271446228, "step": 1304 }, { "epoch": 1.346749226006192, "grad_norm": 9.26806273282689, "learning_rate": 6.734528080371962e-06, "loss": 1.4681077003479004, "step": 1305 }, { "epoch": 1.347781217750258, "grad_norm": 8.276957138807358, "learning_rate": 6.728895141569464e-06, "loss": 1.7013589143753052, "step": 1306 }, { "epoch": 1.348813209494324, "grad_norm": 10.28492210292928, "learning_rate": 6.723259709361726e-06, "loss": 1.5537389516830444, "step": 1307 }, { "epoch": 1.3498452012383901, "grad_norm": 14.359998071626563, "learning_rate": 6.717621791876147e-06, "loss": 2.2924580574035645, "step": 1308 }, { "epoch": 1.3508771929824561, "grad_norm": 20.262716426358573, "learning_rate": 6.711981397243703e-06, "loss": 1.4611811637878418, "step": 1309 }, { "epoch": 1.351909184726522, "grad_norm": 18.62862713951369, "learning_rate": 6.706338533598951e-06, "loss": 1.7338206768035889, "step": 1310 }, { "epoch": 1.3529411764705883, "grad_norm": 8.391919709124968, "learning_rate": 6.700693209080003e-06, "loss": 1.2168724536895752, "step": 1311 }, { "epoch": 1.3539731682146543, "grad_norm": 19.2929608725374, "learning_rate": 6.695045431828524e-06, "loss": 1.4697058200836182, "step": 1312 }, { "epoch": 1.3550051599587203, "grad_norm": 25.873752000752564, "learning_rate": 6.689395209989713e-06, "loss": 1.6276439428329468, "step": 1313 }, { "epoch": 1.3560371517027865, "grad_norm": 8.689028425958979, "learning_rate": 6.6837425517122945e-06, "loss": 1.2771011590957642, "step": 1314 }, { "epoch": 1.3570691434468525, "grad_norm": 12.809736064633563, "learning_rate": 6.678087465148511e-06, "loss": 1.3017215728759766, "step": 1315 }, { "epoch": 1.3581011351909185, "grad_norm": 15.79317141632538, "learning_rate": 6.672429958454103e-06, "loss": 1.9034658670425415, "step": 1316 }, { "epoch": 1.3591331269349844, "grad_norm": 9.924700399484639, "learning_rate": 6.666770039788305e-06, "loss": 1.3009569644927979, "step": 1317 }, { "epoch": 1.3601651186790504, "grad_norm": 12.264158048711824, "learning_rate": 6.661107717313824e-06, "loss": 1.3613559007644653, "step": 1318 }, { "epoch": 1.3611971104231166, "grad_norm": 12.995848633185838, "learning_rate": 6.655442999196838e-06, "loss": 0.8472391366958618, "step": 1319 }, { "epoch": 1.3622291021671826, "grad_norm": 12.715673473779065, "learning_rate": 6.649775893606982e-06, "loss": 1.333915114402771, "step": 1320 }, { "epoch": 1.3632610939112486, "grad_norm": 12.237472217177828, "learning_rate": 6.64410640871733e-06, "loss": 1.5714805126190186, "step": 1321 }, { "epoch": 1.3642930856553148, "grad_norm": 14.454550700639611, "learning_rate": 6.638434552704389e-06, "loss": 2.2822306156158447, "step": 1322 }, { "epoch": 1.3653250773993808, "grad_norm": 26.144904229457918, "learning_rate": 6.632760333748086e-06, "loss": 1.3987756967544556, "step": 1323 }, { "epoch": 1.3663570691434468, "grad_norm": 11.746650435747524, "learning_rate": 6.627083760031755e-06, "loss": 1.3919203281402588, "step": 1324 }, { "epoch": 1.367389060887513, "grad_norm": 8.768967894320843, "learning_rate": 6.621404839742127e-06, "loss": 1.8376681804656982, "step": 1325 }, { "epoch": 1.368421052631579, "grad_norm": 7.280635538208758, "learning_rate": 6.615723581069318e-06, "loss": 1.408341884613037, "step": 1326 }, { "epoch": 1.369453044375645, "grad_norm": 12.407087366787891, "learning_rate": 6.610039992206814e-06, "loss": 1.5806256532669067, "step": 1327 }, { "epoch": 1.3704850361197112, "grad_norm": 11.943643963848155, "learning_rate": 6.604354081351461e-06, "loss": 2.1980557441711426, "step": 1328 }, { "epoch": 1.3715170278637772, "grad_norm": 9.940910005359118, "learning_rate": 6.5986658567034565e-06, "loss": 1.2753872871398926, "step": 1329 }, { "epoch": 1.3725490196078431, "grad_norm": 7.560652373165568, "learning_rate": 6.592975326466336e-06, "loss": 1.95408296585083, "step": 1330 }, { "epoch": 1.3735810113519091, "grad_norm": 11.601700149690876, "learning_rate": 6.587282498846956e-06, "loss": 1.5149401426315308, "step": 1331 }, { "epoch": 1.3746130030959751, "grad_norm": 12.092137663163145, "learning_rate": 6.5815873820554925e-06, "loss": 1.8140610456466675, "step": 1332 }, { "epoch": 1.3756449948400413, "grad_norm": 8.401132139383938, "learning_rate": 6.575889984305411e-06, "loss": 1.916958212852478, "step": 1333 }, { "epoch": 1.3766769865841073, "grad_norm": 9.68323274094978, "learning_rate": 6.57019031381348e-06, "loss": 1.9963104724884033, "step": 1334 }, { "epoch": 1.3777089783281733, "grad_norm": 11.431708385978807, "learning_rate": 6.564488378799738e-06, "loss": 1.2590843439102173, "step": 1335 }, { "epoch": 1.3787409700722395, "grad_norm": 8.571951982688004, "learning_rate": 6.558784187487495e-06, "loss": 1.394248604774475, "step": 1336 }, { "epoch": 1.3797729618163055, "grad_norm": 10.492254285636673, "learning_rate": 6.553077748103307e-06, "loss": 1.6234259605407715, "step": 1337 }, { "epoch": 1.3808049535603715, "grad_norm": 14.586388788385152, "learning_rate": 6.5473690688769775e-06, "loss": 1.6336512565612793, "step": 1338 }, { "epoch": 1.3818369453044377, "grad_norm": 7.877592238801531, "learning_rate": 6.5416581580415415e-06, "loss": 2.567668914794922, "step": 1339 }, { "epoch": 1.3828689370485037, "grad_norm": 11.874862181391041, "learning_rate": 6.535945023833249e-06, "loss": 1.7893708944320679, "step": 1340 }, { "epoch": 1.3839009287925697, "grad_norm": 10.04975526985597, "learning_rate": 6.530229674491559e-06, "loss": 1.4658175706863403, "step": 1341 }, { "epoch": 1.3849329205366356, "grad_norm": 33.409645572782985, "learning_rate": 6.524512118259122e-06, "loss": 3.506965398788452, "step": 1342 }, { "epoch": 1.3859649122807016, "grad_norm": 10.778179108329482, "learning_rate": 6.518792363381776e-06, "loss": 1.2108711004257202, "step": 1343 }, { "epoch": 1.3869969040247678, "grad_norm": 9.572063239204743, "learning_rate": 6.513070418108525e-06, "loss": 1.6470930576324463, "step": 1344 }, { "epoch": 1.3880288957688338, "grad_norm": 12.130274106203869, "learning_rate": 6.507346290691534e-06, "loss": 1.4623892307281494, "step": 1345 }, { "epoch": 1.3890608875128998, "grad_norm": 13.41976404770925, "learning_rate": 6.501619989386118e-06, "loss": 1.5492268800735474, "step": 1346 }, { "epoch": 1.390092879256966, "grad_norm": 16.948274862432797, "learning_rate": 6.4958915224507235e-06, "loss": 1.1457581520080566, "step": 1347 }, { "epoch": 1.391124871001032, "grad_norm": 10.519090027770496, "learning_rate": 6.490160898146919e-06, "loss": 1.7141773700714111, "step": 1348 }, { "epoch": 1.392156862745098, "grad_norm": 9.328092490719959, "learning_rate": 6.484428124739388e-06, "loss": 2.0902035236358643, "step": 1349 }, { "epoch": 1.3931888544891642, "grad_norm": 17.461370440544524, "learning_rate": 6.478693210495913e-06, "loss": 3.634981632232666, "step": 1350 }, { "epoch": 1.3942208462332302, "grad_norm": 9.592360182696881, "learning_rate": 6.472956163687363e-06, "loss": 1.582890510559082, "step": 1351 }, { "epoch": 1.3952528379772962, "grad_norm": 19.520153952418713, "learning_rate": 6.467216992587679e-06, "loss": 1.5116333961486816, "step": 1352 }, { "epoch": 1.3962848297213624, "grad_norm": 15.309150288640616, "learning_rate": 6.4614757054738744e-06, "loss": 1.3862173557281494, "step": 1353 }, { "epoch": 1.3973168214654283, "grad_norm": 12.534277109856836, "learning_rate": 6.455732310626005e-06, "loss": 1.5213537216186523, "step": 1354 }, { "epoch": 1.3983488132094943, "grad_norm": 7.623388240826514, "learning_rate": 6.449986816327173e-06, "loss": 1.5704606771469116, "step": 1355 }, { "epoch": 1.3993808049535603, "grad_norm": 13.96068897335519, "learning_rate": 6.444239230863505e-06, "loss": 1.6977448463439941, "step": 1356 }, { "epoch": 1.4004127966976263, "grad_norm": 7.7978324340101395, "learning_rate": 6.438489562524143e-06, "loss": 0.5249931812286377, "step": 1357 }, { "epoch": 1.4014447884416925, "grad_norm": 12.885842848452445, "learning_rate": 6.432737819601236e-06, "loss": 1.6976923942565918, "step": 1358 }, { "epoch": 1.4024767801857585, "grad_norm": 8.879805450767204, "learning_rate": 6.426984010389924e-06, "loss": 1.4502618312835693, "step": 1359 }, { "epoch": 1.4035087719298245, "grad_norm": 14.587093528930188, "learning_rate": 6.421228143188325e-06, "loss": 1.5353691577911377, "step": 1360 }, { "epoch": 1.4045407636738907, "grad_norm": 19.111908518818357, "learning_rate": 6.4154702262975254e-06, "loss": 1.5783036947250366, "step": 1361 }, { "epoch": 1.4055727554179567, "grad_norm": 18.35949942738296, "learning_rate": 6.40971026802157e-06, "loss": 1.5345011949539185, "step": 1362 }, { "epoch": 1.4066047471620227, "grad_norm": 13.506971539689111, "learning_rate": 6.403948276667446e-06, "loss": 1.7035109996795654, "step": 1363 }, { "epoch": 1.4076367389060889, "grad_norm": 16.84134628365059, "learning_rate": 6.398184260545072e-06, "loss": 1.957930564880371, "step": 1364 }, { "epoch": 1.4086687306501549, "grad_norm": 12.614397611258333, "learning_rate": 6.39241822796729e-06, "loss": 1.7606230974197388, "step": 1365 }, { "epoch": 1.4097007223942208, "grad_norm": 12.167810963337313, "learning_rate": 6.386650187249843e-06, "loss": 1.1950409412384033, "step": 1366 }, { "epoch": 1.4107327141382868, "grad_norm": 18.497511480405805, "learning_rate": 6.380880146711379e-06, "loss": 1.470259428024292, "step": 1367 }, { "epoch": 1.4117647058823528, "grad_norm": 12.68092565602118, "learning_rate": 6.375108114673425e-06, "loss": 2.590393304824829, "step": 1368 }, { "epoch": 1.412796697626419, "grad_norm": 14.371407712891207, "learning_rate": 6.369334099460382e-06, "loss": 1.1304482221603394, "step": 1369 }, { "epoch": 1.413828689370485, "grad_norm": 12.495927048359073, "learning_rate": 6.363558109399508e-06, "loss": 1.537990689277649, "step": 1370 }, { "epoch": 1.414860681114551, "grad_norm": 12.65546028712542, "learning_rate": 6.3577801528209125e-06, "loss": 1.5674386024475098, "step": 1371 }, { "epoch": 1.4158926728586172, "grad_norm": 10.941640579885966, "learning_rate": 6.3520002380575395e-06, "loss": 1.2706232070922852, "step": 1372 }, { "epoch": 1.4169246646026832, "grad_norm": 13.571585050567363, "learning_rate": 6.346218373445159e-06, "loss": 1.5578970909118652, "step": 1373 }, { "epoch": 1.4179566563467492, "grad_norm": 8.250524965469019, "learning_rate": 6.340434567322351e-06, "loss": 1.6340320110321045, "step": 1374 }, { "epoch": 1.4189886480908154, "grad_norm": 10.160982961416137, "learning_rate": 6.334648828030494e-06, "loss": 1.663146734237671, "step": 1375 }, { "epoch": 1.4200206398348814, "grad_norm": 13.750329742860425, "learning_rate": 6.32886116391376e-06, "loss": 1.119152545928955, "step": 1376 }, { "epoch": 1.4210526315789473, "grad_norm": 12.556590420753649, "learning_rate": 6.323071583319091e-06, "loss": 1.083686113357544, "step": 1377 }, { "epoch": 1.4220846233230136, "grad_norm": 7.811797834471477, "learning_rate": 6.317280094596197e-06, "loss": 1.0705013275146484, "step": 1378 }, { "epoch": 1.4231166150670795, "grad_norm": 13.31750115490766, "learning_rate": 6.3114867060975396e-06, "loss": 1.2521347999572754, "step": 1379 }, { "epoch": 1.4241486068111455, "grad_norm": 16.62958542447736, "learning_rate": 6.305691426178316e-06, "loss": 1.2190382480621338, "step": 1380 }, { "epoch": 1.4251805985552115, "grad_norm": 11.024512409090624, "learning_rate": 6.299894263196456e-06, "loss": 1.4755083322525024, "step": 1381 }, { "epoch": 1.4262125902992775, "grad_norm": 27.767711605272577, "learning_rate": 6.294095225512604e-06, "loss": 3.2815802097320557, "step": 1382 }, { "epoch": 1.4272445820433437, "grad_norm": 9.477799042765865, "learning_rate": 6.288294321490107e-06, "loss": 2.3116531372070312, "step": 1383 }, { "epoch": 1.4282765737874097, "grad_norm": 11.990489393588334, "learning_rate": 6.282491559495005e-06, "loss": 1.9234528541564941, "step": 1384 }, { "epoch": 1.4293085655314757, "grad_norm": 12.348925653645608, "learning_rate": 6.276686947896015e-06, "loss": 1.8830013275146484, "step": 1385 }, { "epoch": 1.4303405572755419, "grad_norm": 8.455310727922418, "learning_rate": 6.270880495064524e-06, "loss": 1.5233604907989502, "step": 1386 }, { "epoch": 1.4313725490196079, "grad_norm": 8.66136780771483, "learning_rate": 6.265072209374574e-06, "loss": 1.7544364929199219, "step": 1387 }, { "epoch": 1.4324045407636739, "grad_norm": 11.686141996301, "learning_rate": 6.259262099202849e-06, "loss": 1.745523452758789, "step": 1388 }, { "epoch": 1.43343653250774, "grad_norm": 25.909341019617443, "learning_rate": 6.253450172928668e-06, "loss": 1.608154296875, "step": 1389 }, { "epoch": 1.434468524251806, "grad_norm": 15.986325301735999, "learning_rate": 6.247636438933963e-06, "loss": 1.7948957681655884, "step": 1390 }, { "epoch": 1.435500515995872, "grad_norm": 11.105863635839464, "learning_rate": 6.241820905603277e-06, "loss": 1.2178281545639038, "step": 1391 }, { "epoch": 1.436532507739938, "grad_norm": 12.800978863737068, "learning_rate": 6.23600358132375e-06, "loss": 1.4831297397613525, "step": 1392 }, { "epoch": 1.437564499484004, "grad_norm": 30.76144400796033, "learning_rate": 6.230184474485101e-06, "loss": 2.632197141647339, "step": 1393 }, { "epoch": 1.4385964912280702, "grad_norm": 13.142736450508734, "learning_rate": 6.22436359347962e-06, "loss": 2.632613182067871, "step": 1394 }, { "epoch": 1.4396284829721362, "grad_norm": 11.286789151567506, "learning_rate": 6.218540946702158e-06, "loss": 1.2967652082443237, "step": 1395 }, { "epoch": 1.4406604747162022, "grad_norm": 11.133156606466436, "learning_rate": 6.212716542550112e-06, "loss": 1.594740629196167, "step": 1396 }, { "epoch": 1.4416924664602684, "grad_norm": 8.730555310476715, "learning_rate": 6.206890389423412e-06, "loss": 1.584374189376831, "step": 1397 }, { "epoch": 1.4427244582043344, "grad_norm": 8.534026782889603, "learning_rate": 6.201062495724513e-06, "loss": 2.5659961700439453, "step": 1398 }, { "epoch": 1.4437564499484004, "grad_norm": 7.288337030266494, "learning_rate": 6.195232869858375e-06, "loss": 1.7394036054611206, "step": 1399 }, { "epoch": 1.4447884416924666, "grad_norm": 9.841768591510458, "learning_rate": 6.189401520232464e-06, "loss": 1.254488229751587, "step": 1400 }, { "epoch": 1.4458204334365325, "grad_norm": 12.839409832556502, "learning_rate": 6.183568455256725e-06, "loss": 1.8513953685760498, "step": 1401 }, { "epoch": 1.4468524251805985, "grad_norm": 13.276032955940094, "learning_rate": 6.177733683343578e-06, "loss": 1.8852769136428833, "step": 1402 }, { "epoch": 1.4478844169246647, "grad_norm": 9.43865265441635, "learning_rate": 6.171897212907912e-06, "loss": 1.440374732017517, "step": 1403 }, { "epoch": 1.4489164086687307, "grad_norm": 11.606811882038782, "learning_rate": 6.166059052367055e-06, "loss": 0.9862947463989258, "step": 1404 }, { "epoch": 1.4499484004127967, "grad_norm": 14.206842551035457, "learning_rate": 6.16021921014078e-06, "loss": 1.9904972314834595, "step": 1405 }, { "epoch": 1.4509803921568627, "grad_norm": 8.696455410766612, "learning_rate": 6.154377694651279e-06, "loss": 1.3866620063781738, "step": 1406 }, { "epoch": 1.4520123839009287, "grad_norm": 8.596637181735094, "learning_rate": 6.148534514323165e-06, "loss": 1.2775863409042358, "step": 1407 }, { "epoch": 1.453044375644995, "grad_norm": 10.271782117514103, "learning_rate": 6.142689677583447e-06, "loss": 1.40165114402771, "step": 1408 }, { "epoch": 1.4540763673890609, "grad_norm": 8.693261567738002, "learning_rate": 6.136843192861522e-06, "loss": 1.458809494972229, "step": 1409 }, { "epoch": 1.4551083591331269, "grad_norm": 13.065942705085428, "learning_rate": 6.130995068589166e-06, "loss": 1.12819242477417, "step": 1410 }, { "epoch": 1.456140350877193, "grad_norm": 14.840424340102057, "learning_rate": 6.125145313200519e-06, "loss": 1.9877163171768188, "step": 1411 }, { "epoch": 1.457172342621259, "grad_norm": 12.797097904552402, "learning_rate": 6.119293935132076e-06, "loss": 1.3951126337051392, "step": 1412 }, { "epoch": 1.458204334365325, "grad_norm": 12.77015646004144, "learning_rate": 6.113440942822666e-06, "loss": 1.1544969081878662, "step": 1413 }, { "epoch": 1.4592363261093912, "grad_norm": 9.33713645008868, "learning_rate": 6.107586344713451e-06, "loss": 2.1354317665100098, "step": 1414 }, { "epoch": 1.4602683178534572, "grad_norm": 15.776487661672133, "learning_rate": 6.101730149247908e-06, "loss": 1.611715316772461, "step": 1415 }, { "epoch": 1.4613003095975232, "grad_norm": 12.73566319918363, "learning_rate": 6.095872364871818e-06, "loss": 2.534665107727051, "step": 1416 }, { "epoch": 1.4623323013415892, "grad_norm": 21.518820732330596, "learning_rate": 6.090013000033251e-06, "loss": 1.435193419456482, "step": 1417 }, { "epoch": 1.4633642930856552, "grad_norm": 13.034743581278482, "learning_rate": 6.084152063182559e-06, "loss": 0.7462902665138245, "step": 1418 }, { "epoch": 1.4643962848297214, "grad_norm": 12.039234728135936, "learning_rate": 6.078289562772362e-06, "loss": 1.4898221492767334, "step": 1419 }, { "epoch": 1.4654282765737874, "grad_norm": 10.031784052622553, "learning_rate": 6.072425507257528e-06, "loss": 1.8172965049743652, "step": 1420 }, { "epoch": 1.4664602683178534, "grad_norm": 12.802095758665487, "learning_rate": 6.066559905095179e-06, "loss": 2.3749866485595703, "step": 1421 }, { "epoch": 1.4674922600619196, "grad_norm": 12.745749761023887, "learning_rate": 6.060692764744657e-06, "loss": 2.0804553031921387, "step": 1422 }, { "epoch": 1.4685242518059856, "grad_norm": 12.233534810853003, "learning_rate": 6.054824094667529e-06, "loss": 2.026507616043091, "step": 1423 }, { "epoch": 1.4695562435500515, "grad_norm": 11.046016933485669, "learning_rate": 6.048953903327568e-06, "loss": 1.3507273197174072, "step": 1424 }, { "epoch": 1.4705882352941178, "grad_norm": 10.09541906536385, "learning_rate": 6.043082199190735e-06, "loss": 1.6492483615875244, "step": 1425 }, { "epoch": 1.4716202270381837, "grad_norm": 10.447296179338457, "learning_rate": 6.037208990725181e-06, "loss": 0.9386216402053833, "step": 1426 }, { "epoch": 1.4726522187822497, "grad_norm": 9.277034655962519, "learning_rate": 6.031334286401218e-06, "loss": 1.2079228162765503, "step": 1427 }, { "epoch": 1.4736842105263157, "grad_norm": 13.768299398362204, "learning_rate": 6.025458094691323e-06, "loss": 1.549511432647705, "step": 1428 }, { "epoch": 1.474716202270382, "grad_norm": 9.200360613784461, "learning_rate": 6.019580424070114e-06, "loss": 1.2142996788024902, "step": 1429 }, { "epoch": 1.475748194014448, "grad_norm": 11.175925574454759, "learning_rate": 6.0137012830143405e-06, "loss": 1.2798269987106323, "step": 1430 }, { "epoch": 1.4767801857585139, "grad_norm": 10.251935687194365, "learning_rate": 6.007820680002878e-06, "loss": 1.052504301071167, "step": 1431 }, { "epoch": 1.4778121775025799, "grad_norm": 11.33479315396473, "learning_rate": 6.0019386235167055e-06, "loss": 1.3901056051254272, "step": 1432 }, { "epoch": 1.478844169246646, "grad_norm": 11.445001768478525, "learning_rate": 5.9960551220389e-06, "loss": 1.121410846710205, "step": 1433 }, { "epoch": 1.479876160990712, "grad_norm": 14.072546458014429, "learning_rate": 5.990170184054622e-06, "loss": 1.7075709104537964, "step": 1434 }, { "epoch": 1.480908152734778, "grad_norm": 11.420431766114314, "learning_rate": 5.984283818051104e-06, "loss": 2.078220844268799, "step": 1435 }, { "epoch": 1.4819401444788443, "grad_norm": 12.833753989702446, "learning_rate": 5.978396032517641e-06, "loss": 1.833093523979187, "step": 1436 }, { "epoch": 1.4829721362229102, "grad_norm": 8.45273153102322, "learning_rate": 5.972506835945569e-06, "loss": 2.052757740020752, "step": 1437 }, { "epoch": 1.4840041279669762, "grad_norm": 15.084190288166576, "learning_rate": 5.966616236828263e-06, "loss": 1.1162149906158447, "step": 1438 }, { "epoch": 1.4850361197110424, "grad_norm": 14.058585981663885, "learning_rate": 5.960724243661119e-06, "loss": 1.4707602262496948, "step": 1439 }, { "epoch": 1.4860681114551084, "grad_norm": 17.79529872485665, "learning_rate": 5.9548308649415486e-06, "loss": 1.8684618473052979, "step": 1440 }, { "epoch": 1.4871001031991744, "grad_norm": 12.509522667361384, "learning_rate": 5.948936109168954e-06, "loss": 2.0305280685424805, "step": 1441 }, { "epoch": 1.4881320949432404, "grad_norm": 15.322431562131413, "learning_rate": 5.943039984844727e-06, "loss": 2.028458833694458, "step": 1442 }, { "epoch": 1.4891640866873064, "grad_norm": 10.558362837173703, "learning_rate": 5.937142500472235e-06, "loss": 1.3905258178710938, "step": 1443 }, { "epoch": 1.4901960784313726, "grad_norm": 10.812034444363723, "learning_rate": 5.931243664556803e-06, "loss": 1.1899843215942383, "step": 1444 }, { "epoch": 1.4912280701754386, "grad_norm": 9.376651735471622, "learning_rate": 5.925343485605709e-06, "loss": 1.3725718259811401, "step": 1445 }, { "epoch": 1.4922600619195046, "grad_norm": 10.631617419467513, "learning_rate": 5.919441972128165e-06, "loss": 2.473677158355713, "step": 1446 }, { "epoch": 1.4932920536635708, "grad_norm": 12.177567464057311, "learning_rate": 5.913539132635309e-06, "loss": 1.3525331020355225, "step": 1447 }, { "epoch": 1.4943240454076367, "grad_norm": 17.265066708620502, "learning_rate": 5.90763497564019e-06, "loss": 2.0814294815063477, "step": 1448 }, { "epoch": 1.4953560371517027, "grad_norm": 10.516814129994565, "learning_rate": 5.901729509657758e-06, "loss": 1.4664232730865479, "step": 1449 }, { "epoch": 1.496388028895769, "grad_norm": 9.305983886869592, "learning_rate": 5.895822743204855e-06, "loss": 1.3303736448287964, "step": 1450 }, { "epoch": 1.497420020639835, "grad_norm": 12.006125644076471, "learning_rate": 5.889914684800191e-06, "loss": 2.165750503540039, "step": 1451 }, { "epoch": 1.498452012383901, "grad_norm": 13.8111428322773, "learning_rate": 5.884005342964343e-06, "loss": 1.3294661045074463, "step": 1452 }, { "epoch": 1.499484004127967, "grad_norm": 11.698364347897481, "learning_rate": 5.87809472621974e-06, "loss": 1.6502536535263062, "step": 1453 }, { "epoch": 1.5005159958720329, "grad_norm": 7.817300525446381, "learning_rate": 5.872182843090644e-06, "loss": 1.302175521850586, "step": 1454 }, { "epoch": 1.501547987616099, "grad_norm": 5.571703031423158, "learning_rate": 5.8662697021031555e-06, "loss": 1.9427019357681274, "step": 1455 }, { "epoch": 1.502579979360165, "grad_norm": 9.404770291286901, "learning_rate": 5.860355311785175e-06, "loss": 1.2602835893630981, "step": 1456 }, { "epoch": 1.503611971104231, "grad_norm": 13.125735745212998, "learning_rate": 5.8544396806664135e-06, "loss": 1.3319302797317505, "step": 1457 }, { "epoch": 1.5046439628482973, "grad_norm": 10.729152197735356, "learning_rate": 5.848522817278369e-06, "loss": 1.3135343790054321, "step": 1458 }, { "epoch": 1.5056759545923633, "grad_norm": 11.124309797323237, "learning_rate": 5.8426047301543165e-06, "loss": 2.520270824432373, "step": 1459 }, { "epoch": 1.5067079463364292, "grad_norm": 8.362701679492053, "learning_rate": 5.836685427829296e-06, "loss": 1.7932217121124268, "step": 1460 }, { "epoch": 1.5077399380804954, "grad_norm": 9.13955572924332, "learning_rate": 5.830764918840102e-06, "loss": 1.0656404495239258, "step": 1461 }, { "epoch": 1.5087719298245614, "grad_norm": 13.47182208637423, "learning_rate": 5.824843211725265e-06, "loss": 2.238260507583618, "step": 1462 }, { "epoch": 1.5098039215686274, "grad_norm": 9.202878119340536, "learning_rate": 5.818920315025045e-06, "loss": 1.6161681413650513, "step": 1463 }, { "epoch": 1.5108359133126936, "grad_norm": 11.722185650063693, "learning_rate": 5.812996237281423e-06, "loss": 1.6068499088287354, "step": 1464 }, { "epoch": 1.5118679050567594, "grad_norm": 9.617934336633931, "learning_rate": 5.807070987038075e-06, "loss": 1.7375080585479736, "step": 1465 }, { "epoch": 1.5128998968008256, "grad_norm": 11.986350172942641, "learning_rate": 5.8011445728403724e-06, "loss": 1.4095920324325562, "step": 1466 }, { "epoch": 1.5139318885448918, "grad_norm": 11.439405475221042, "learning_rate": 5.7952170032353675e-06, "loss": 2.3441104888916016, "step": 1467 }, { "epoch": 1.5149638802889576, "grad_norm": 10.864248301742652, "learning_rate": 5.7892882867717705e-06, "loss": 1.1567907333374023, "step": 1468 }, { "epoch": 1.5159958720330238, "grad_norm": 8.931863762917068, "learning_rate": 5.7833584319999555e-06, "loss": 1.375398874282837, "step": 1469 }, { "epoch": 1.5170278637770898, "grad_norm": 11.194765468102132, "learning_rate": 5.777427447471933e-06, "loss": 1.997140884399414, "step": 1470 }, { "epoch": 1.5180598555211557, "grad_norm": 10.822588418086724, "learning_rate": 5.771495341741344e-06, "loss": 1.4403889179229736, "step": 1471 }, { "epoch": 1.519091847265222, "grad_norm": 12.160372351646561, "learning_rate": 5.765562123363445e-06, "loss": 1.1714125871658325, "step": 1472 }, { "epoch": 1.520123839009288, "grad_norm": 12.879802517475234, "learning_rate": 5.759627800895098e-06, "loss": 1.2552604675292969, "step": 1473 }, { "epoch": 1.521155830753354, "grad_norm": 9.307394670135041, "learning_rate": 5.75369238289476e-06, "loss": 1.5141665935516357, "step": 1474 }, { "epoch": 1.5221878224974201, "grad_norm": 10.935452019272311, "learning_rate": 5.747755877922464e-06, "loss": 1.5338034629821777, "step": 1475 }, { "epoch": 1.5232198142414861, "grad_norm": 11.9459171615159, "learning_rate": 5.7418182945398136e-06, "loss": 1.9021246433258057, "step": 1476 }, { "epoch": 1.524251805985552, "grad_norm": 14.221176762445017, "learning_rate": 5.735879641309964e-06, "loss": 1.4347755908966064, "step": 1477 }, { "epoch": 1.5252837977296183, "grad_norm": 9.735995474013986, "learning_rate": 5.729939926797617e-06, "loss": 1.5926001071929932, "step": 1478 }, { "epoch": 1.526315789473684, "grad_norm": 8.085584686886568, "learning_rate": 5.723999159569005e-06, "loss": 1.271660566329956, "step": 1479 }, { "epoch": 1.5273477812177503, "grad_norm": 11.747415248330071, "learning_rate": 5.718057348191874e-06, "loss": 1.1379930973052979, "step": 1480 }, { "epoch": 1.5283797729618163, "grad_norm": 10.759081786618275, "learning_rate": 5.712114501235485e-06, "loss": 1.7403454780578613, "step": 1481 }, { "epoch": 1.5294117647058822, "grad_norm": 10.544504378713793, "learning_rate": 5.7061706272705796e-06, "loss": 1.6052613258361816, "step": 1482 }, { "epoch": 1.5304437564499485, "grad_norm": 15.57304293320465, "learning_rate": 5.7002257348693925e-06, "loss": 1.1717628240585327, "step": 1483 }, { "epoch": 1.5314757481940144, "grad_norm": 15.929231531783989, "learning_rate": 5.6942798326056205e-06, "loss": 1.8529958724975586, "step": 1484 }, { "epoch": 1.5325077399380804, "grad_norm": 8.34964910158795, "learning_rate": 5.688332929054417e-06, "loss": 1.7307343482971191, "step": 1485 }, { "epoch": 1.5335397316821466, "grad_norm": 15.606681556690106, "learning_rate": 5.682385032792386e-06, "loss": 1.4584531784057617, "step": 1486 }, { "epoch": 1.5345717234262126, "grad_norm": 13.23496465250211, "learning_rate": 5.6764361523975535e-06, "loss": 1.3606812953948975, "step": 1487 }, { "epoch": 1.5356037151702786, "grad_norm": 11.134571166045973, "learning_rate": 5.670486296449373e-06, "loss": 1.5671963691711426, "step": 1488 }, { "epoch": 1.5366357069143448, "grad_norm": 15.493278817263745, "learning_rate": 5.664535473528698e-06, "loss": 1.2391408681869507, "step": 1489 }, { "epoch": 1.5376676986584106, "grad_norm": 10.213002097693753, "learning_rate": 5.658583692217783e-06, "loss": 1.294258713722229, "step": 1490 }, { "epoch": 1.5386996904024768, "grad_norm": 12.818515458411959, "learning_rate": 5.65263096110026e-06, "loss": 1.159803867340088, "step": 1491 }, { "epoch": 1.539731682146543, "grad_norm": 10.797451105215767, "learning_rate": 5.646677288761132e-06, "loss": 1.3275816440582275, "step": 1492 }, { "epoch": 1.5407636738906088, "grad_norm": 8.030273488551382, "learning_rate": 5.640722683786763e-06, "loss": 1.8123530149459839, "step": 1493 }, { "epoch": 1.541795665634675, "grad_norm": 9.27722564107055, "learning_rate": 5.634767154764855e-06, "loss": 1.186286449432373, "step": 1494 }, { "epoch": 1.542827657378741, "grad_norm": 9.714533218402066, "learning_rate": 5.628810710284452e-06, "loss": 1.3890738487243652, "step": 1495 }, { "epoch": 1.543859649122807, "grad_norm": 13.892419121328265, "learning_rate": 5.622853358935908e-06, "loss": 1.5548430681228638, "step": 1496 }, { "epoch": 1.5448916408668731, "grad_norm": 19.532000923493, "learning_rate": 5.616895109310891e-06, "loss": 1.4705501794815063, "step": 1497 }, { "epoch": 1.5459236326109391, "grad_norm": 12.11543562722765, "learning_rate": 5.6109359700023655e-06, "loss": 1.580586314201355, "step": 1498 }, { "epoch": 1.546955624355005, "grad_norm": 10.642818826957756, "learning_rate": 5.604975949604575e-06, "loss": 1.3070194721221924, "step": 1499 }, { "epoch": 1.5479876160990713, "grad_norm": 9.62098409065898, "learning_rate": 5.599015056713037e-06, "loss": 1.736723780632019, "step": 1500 }, { "epoch": 1.5490196078431373, "grad_norm": 13.275477934185444, "learning_rate": 5.5930532999245246e-06, "loss": 1.9193358421325684, "step": 1501 }, { "epoch": 1.5500515995872033, "grad_norm": 13.769257992693825, "learning_rate": 5.587090687837059e-06, "loss": 2.576160192489624, "step": 1502 }, { "epoch": 1.5510835913312695, "grad_norm": 12.63314417067987, "learning_rate": 5.581127229049892e-06, "loss": 2.4420785903930664, "step": 1503 }, { "epoch": 1.5521155830753353, "grad_norm": 18.02829925219223, "learning_rate": 5.575162932163501e-06, "loss": 1.7575864791870117, "step": 1504 }, { "epoch": 1.5531475748194015, "grad_norm": 18.256616100672844, "learning_rate": 5.569197805779571e-06, "loss": 1.8051812648773193, "step": 1505 }, { "epoch": 1.5541795665634675, "grad_norm": 10.34342544361706, "learning_rate": 5.563231858500978e-06, "loss": 1.7020881175994873, "step": 1506 }, { "epoch": 1.5552115583075334, "grad_norm": 9.179344832215872, "learning_rate": 5.5572650989317874e-06, "loss": 1.781341314315796, "step": 1507 }, { "epoch": 1.5562435500515996, "grad_norm": 10.177043030118556, "learning_rate": 5.551297535677236e-06, "loss": 1.5569262504577637, "step": 1508 }, { "epoch": 1.5572755417956656, "grad_norm": 9.583449662670834, "learning_rate": 5.545329177343717e-06, "loss": 2.1539697647094727, "step": 1509 }, { "epoch": 1.5583075335397316, "grad_norm": 9.400041914372162, "learning_rate": 5.539360032538771e-06, "loss": 1.4488544464111328, "step": 1510 }, { "epoch": 1.5593395252837978, "grad_norm": 7.919911135989337, "learning_rate": 5.533390109871074e-06, "loss": 0.8961690068244934, "step": 1511 }, { "epoch": 1.5603715170278638, "grad_norm": 12.372315743172784, "learning_rate": 5.527419417950424e-06, "loss": 1.3138301372528076, "step": 1512 }, { "epoch": 1.5614035087719298, "grad_norm": 12.730704327051964, "learning_rate": 5.521447965387725e-06, "loss": 1.3151988983154297, "step": 1513 }, { "epoch": 1.562435500515996, "grad_norm": 7.100293313995284, "learning_rate": 5.515475760794984e-06, "loss": 1.3968534469604492, "step": 1514 }, { "epoch": 1.5634674922600618, "grad_norm": 7.392014954526171, "learning_rate": 5.509502812785286e-06, "loss": 1.4642367362976074, "step": 1515 }, { "epoch": 1.564499484004128, "grad_norm": 7.492607467771529, "learning_rate": 5.503529129972792e-06, "loss": 1.7389013767242432, "step": 1516 }, { "epoch": 1.5655314757481942, "grad_norm": 8.944711785968888, "learning_rate": 5.497554720972723e-06, "loss": 1.4127401113510132, "step": 1517 }, { "epoch": 1.56656346749226, "grad_norm": 12.957800966751032, "learning_rate": 5.4915795944013475e-06, "loss": 1.6793855428695679, "step": 1518 }, { "epoch": 1.5675954592363261, "grad_norm": 7.326475883945584, "learning_rate": 5.485603758875965e-06, "loss": 1.7146462202072144, "step": 1519 }, { "epoch": 1.5686274509803921, "grad_norm": 11.589191219080782, "learning_rate": 5.479627223014902e-06, "loss": 1.3451881408691406, "step": 1520 }, { "epoch": 1.5696594427244581, "grad_norm": 16.447402666325793, "learning_rate": 5.4736499954374914e-06, "loss": 1.2179075479507446, "step": 1521 }, { "epoch": 1.5706914344685243, "grad_norm": 8.160593859091993, "learning_rate": 5.467672084764066e-06, "loss": 1.4500888586044312, "step": 1522 }, { "epoch": 1.5717234262125903, "grad_norm": 13.125699034555025, "learning_rate": 5.461693499615945e-06, "loss": 1.6095616817474365, "step": 1523 }, { "epoch": 1.5727554179566563, "grad_norm": 8.93740984586208, "learning_rate": 5.455714248615417e-06, "loss": 1.4185380935668945, "step": 1524 }, { "epoch": 1.5737874097007225, "grad_norm": 14.45255315472622, "learning_rate": 5.449734340385731e-06, "loss": 2.674546718597412, "step": 1525 }, { "epoch": 1.5748194014447885, "grad_norm": 7.4837493471229175, "learning_rate": 5.443753783551089e-06, "loss": 1.137263536453247, "step": 1526 }, { "epoch": 1.5758513931888545, "grad_norm": 9.570474546668043, "learning_rate": 5.4377725867366215e-06, "loss": 1.42196786403656, "step": 1527 }, { "epoch": 1.5768833849329207, "grad_norm": 9.511890006370185, "learning_rate": 5.431790758568388e-06, "loss": 1.3452329635620117, "step": 1528 }, { "epoch": 1.5779153766769864, "grad_norm": 12.043083718932166, "learning_rate": 5.425808307673353e-06, "loss": 1.150077223777771, "step": 1529 }, { "epoch": 1.5789473684210527, "grad_norm": 10.943590583717281, "learning_rate": 5.4198252426793815e-06, "loss": 1.872908592224121, "step": 1530 }, { "epoch": 1.5799793601651186, "grad_norm": 9.128250032776844, "learning_rate": 5.413841572215228e-06, "loss": 2.1760289669036865, "step": 1531 }, { "epoch": 1.5810113519091846, "grad_norm": 7.921443667774921, "learning_rate": 5.4078573049105135e-06, "loss": 1.3120591640472412, "step": 1532 }, { "epoch": 1.5820433436532508, "grad_norm": 12.057521921968885, "learning_rate": 5.401872449395724e-06, "loss": 1.4298338890075684, "step": 1533 }, { "epoch": 1.5830753353973168, "grad_norm": 18.411184603387003, "learning_rate": 5.3958870143021925e-06, "loss": 1.3725688457489014, "step": 1534 }, { "epoch": 1.5841073271413828, "grad_norm": 10.04468876006326, "learning_rate": 5.389901008262088e-06, "loss": 1.4433213472366333, "step": 1535 }, { "epoch": 1.585139318885449, "grad_norm": 9.128904820696041, "learning_rate": 5.383914439908403e-06, "loss": 1.6381319761276245, "step": 1536 }, { "epoch": 1.586171310629515, "grad_norm": 11.525643554964871, "learning_rate": 5.377927317874942e-06, "loss": 1.583193063735962, "step": 1537 }, { "epoch": 1.587203302373581, "grad_norm": 9.945250975135199, "learning_rate": 5.371939650796307e-06, "loss": 1.3325223922729492, "step": 1538 }, { "epoch": 1.5882352941176472, "grad_norm": 11.326604029853062, "learning_rate": 5.365951447307884e-06, "loss": 1.445408821105957, "step": 1539 }, { "epoch": 1.589267285861713, "grad_norm": 14.685339833591474, "learning_rate": 5.359962716045836e-06, "loss": 1.6517233848571777, "step": 1540 }, { "epoch": 1.5902992776057792, "grad_norm": 9.251883685106936, "learning_rate": 5.353973465647085e-06, "loss": 1.377445936203003, "step": 1541 }, { "epoch": 1.5913312693498454, "grad_norm": 9.41245716131414, "learning_rate": 5.347983704749307e-06, "loss": 1.2144426107406616, "step": 1542 }, { "epoch": 1.5923632610939111, "grad_norm": 28.182784474952282, "learning_rate": 5.3419934419909024e-06, "loss": 2.0331592559814453, "step": 1543 }, { "epoch": 1.5933952528379773, "grad_norm": 9.463227142084353, "learning_rate": 5.336002686011007e-06, "loss": 1.401644229888916, "step": 1544 }, { "epoch": 1.5944272445820433, "grad_norm": 13.957678408468148, "learning_rate": 5.330011445449463e-06, "loss": 1.7547483444213867, "step": 1545 }, { "epoch": 1.5954592363261093, "grad_norm": 13.965244353484048, "learning_rate": 5.324019728946813e-06, "loss": 1.2632057666778564, "step": 1546 }, { "epoch": 1.5964912280701755, "grad_norm": 8.227946520511322, "learning_rate": 5.318027545144285e-06, "loss": 1.8406842947006226, "step": 1547 }, { "epoch": 1.5975232198142415, "grad_norm": 10.461119013028867, "learning_rate": 5.312034902683779e-06, "loss": 1.6203927993774414, "step": 1548 }, { "epoch": 1.5985552115583075, "grad_norm": 12.655939294576894, "learning_rate": 5.3060418102078606e-06, "loss": 1.2169432640075684, "step": 1549 }, { "epoch": 1.5995872033023737, "grad_norm": 15.741119526890982, "learning_rate": 5.30004827635974e-06, "loss": 1.2657561302185059, "step": 1550 }, { "epoch": 1.6006191950464397, "grad_norm": 9.942059337569491, "learning_rate": 5.29405430978327e-06, "loss": 1.4021785259246826, "step": 1551 }, { "epoch": 1.6016511867905057, "grad_norm": 18.7804432153341, "learning_rate": 5.288059919122922e-06, "loss": 1.5848625898361206, "step": 1552 }, { "epoch": 1.6026831785345719, "grad_norm": 13.139933695764597, "learning_rate": 5.28206511302378e-06, "loss": 1.2499003410339355, "step": 1553 }, { "epoch": 1.6037151702786376, "grad_norm": 15.87750104151842, "learning_rate": 5.276069900131527e-06, "loss": 2.3699073791503906, "step": 1554 }, { "epoch": 1.6047471620227038, "grad_norm": 14.943036426486472, "learning_rate": 5.270074289092436e-06, "loss": 1.2444607019424438, "step": 1555 }, { "epoch": 1.6057791537667698, "grad_norm": 8.815300016946654, "learning_rate": 5.2640782885533515e-06, "loss": 1.8418155908584595, "step": 1556 }, { "epoch": 1.6068111455108358, "grad_norm": 17.329411897746205, "learning_rate": 5.258081907161679e-06, "loss": 1.5811779499053955, "step": 1557 }, { "epoch": 1.607843137254902, "grad_norm": 8.381110893104205, "learning_rate": 5.252085153565375e-06, "loss": 0.8724288940429688, "step": 1558 }, { "epoch": 1.608875128998968, "grad_norm": 11.581865689267957, "learning_rate": 5.246088036412932e-06, "loss": 1.2417452335357666, "step": 1559 }, { "epoch": 1.609907120743034, "grad_norm": 14.733501236802056, "learning_rate": 5.240090564353365e-06, "loss": 1.3736588954925537, "step": 1560 }, { "epoch": 1.6109391124871002, "grad_norm": 9.371127457010875, "learning_rate": 5.234092746036207e-06, "loss": 1.3768947124481201, "step": 1561 }, { "epoch": 1.6119711042311662, "grad_norm": 12.9031259829694, "learning_rate": 5.228094590111482e-06, "loss": 1.6988537311553955, "step": 1562 }, { "epoch": 1.6130030959752322, "grad_norm": 8.543930368744116, "learning_rate": 5.222096105229706e-06, "loss": 1.2279560565948486, "step": 1563 }, { "epoch": 1.6140350877192984, "grad_norm": 21.64334602867434, "learning_rate": 5.21609730004187e-06, "loss": 1.220855712890625, "step": 1564 }, { "epoch": 1.6150670794633641, "grad_norm": 8.654475735092758, "learning_rate": 5.210098183199425e-06, "loss": 1.4797768592834473, "step": 1565 }, { "epoch": 1.6160990712074303, "grad_norm": 9.732715925307753, "learning_rate": 5.204098763354271e-06, "loss": 1.3446614742279053, "step": 1566 }, { "epoch": 1.6171310629514963, "grad_norm": 13.726568655001055, "learning_rate": 5.198099049158747e-06, "loss": 1.5844343900680542, "step": 1567 }, { "epoch": 1.6181630546955623, "grad_norm": 13.041238597711342, "learning_rate": 5.1920990492656135e-06, "loss": 1.4087917804718018, "step": 1568 }, { "epoch": 1.6191950464396285, "grad_norm": 10.797232297896977, "learning_rate": 5.186098772328045e-06, "loss": 1.329378604888916, "step": 1569 }, { "epoch": 1.6202270381836945, "grad_norm": 19.667372833362254, "learning_rate": 5.180098226999618e-06, "loss": 1.7636022567749023, "step": 1570 }, { "epoch": 1.6212590299277605, "grad_norm": 12.2622869747031, "learning_rate": 5.174097421934292e-06, "loss": 1.1490764617919922, "step": 1571 }, { "epoch": 1.6222910216718267, "grad_norm": 21.98393618159907, "learning_rate": 5.168096365786402e-06, "loss": 2.3102211952209473, "step": 1572 }, { "epoch": 1.6233230134158927, "grad_norm": 9.735773744875136, "learning_rate": 5.162095067210649e-06, "loss": 2.117244243621826, "step": 1573 }, { "epoch": 1.6243550051599587, "grad_norm": 18.463264815416654, "learning_rate": 5.156093534862073e-06, "loss": 1.6067651510238647, "step": 1574 }, { "epoch": 1.6253869969040249, "grad_norm": 10.41108714363021, "learning_rate": 5.150091777396064e-06, "loss": 1.8751254081726074, "step": 1575 }, { "epoch": 1.6264189886480909, "grad_norm": 16.631981321449587, "learning_rate": 5.144089803468333e-06, "loss": 1.724184513092041, "step": 1576 }, { "epoch": 1.6274509803921569, "grad_norm": 16.402806927437133, "learning_rate": 5.1380876217348975e-06, "loss": 1.6436328887939453, "step": 1577 }, { "epoch": 1.628482972136223, "grad_norm": 14.603029785469376, "learning_rate": 5.132085240852081e-06, "loss": 1.4703165292739868, "step": 1578 }, { "epoch": 1.6295149638802888, "grad_norm": 10.809206838766594, "learning_rate": 5.126082669476486e-06, "loss": 1.5617787837982178, "step": 1579 }, { "epoch": 1.630546955624355, "grad_norm": 15.021756779960803, "learning_rate": 5.1200799162650035e-06, "loss": 1.7255334854125977, "step": 1580 }, { "epoch": 1.631578947368421, "grad_norm": 11.385645534836486, "learning_rate": 5.114076989874774e-06, "loss": 1.0603340864181519, "step": 1581 }, { "epoch": 1.632610939112487, "grad_norm": 10.23379994812466, "learning_rate": 5.108073898963194e-06, "loss": 1.5189906358718872, "step": 1582 }, { "epoch": 1.6336429308565532, "grad_norm": 9.144538402885932, "learning_rate": 5.102070652187896e-06, "loss": 1.7182832956314087, "step": 1583 }, { "epoch": 1.6346749226006192, "grad_norm": 9.54719126606895, "learning_rate": 5.096067258206735e-06, "loss": 1.6244089603424072, "step": 1584 }, { "epoch": 1.6357069143446852, "grad_norm": 14.084997047091763, "learning_rate": 5.090063725677783e-06, "loss": 1.9122145175933838, "step": 1585 }, { "epoch": 1.6367389060887514, "grad_norm": 10.013449437705365, "learning_rate": 5.084060063259307e-06, "loss": 1.6446342468261719, "step": 1586 }, { "epoch": 1.6377708978328174, "grad_norm": 17.089763116564722, "learning_rate": 5.078056279609765e-06, "loss": 1.7437760829925537, "step": 1587 }, { "epoch": 1.6388028895768834, "grad_norm": 11.044670734053568, "learning_rate": 5.072052383387787e-06, "loss": 1.2320588827133179, "step": 1588 }, { "epoch": 1.6398348813209496, "grad_norm": 14.08195744318699, "learning_rate": 5.066048383252167e-06, "loss": 1.3749891519546509, "step": 1589 }, { "epoch": 1.6408668730650153, "grad_norm": 16.434964061637274, "learning_rate": 5.060044287861849e-06, "loss": 1.593663215637207, "step": 1590 }, { "epoch": 1.6418988648090815, "grad_norm": 14.051200573452352, "learning_rate": 5.0540401058759146e-06, "loss": 1.4485447406768799, "step": 1591 }, { "epoch": 1.6429308565531475, "grad_norm": 7.510337244595722, "learning_rate": 5.048035845953569e-06, "loss": 1.5242805480957031, "step": 1592 }, { "epoch": 1.6439628482972135, "grad_norm": 8.52829998314459, "learning_rate": 5.0420315167541276e-06, "loss": 1.5791704654693604, "step": 1593 }, { "epoch": 1.6449948400412797, "grad_norm": 8.817881646576703, "learning_rate": 5.036027126937013e-06, "loss": 1.4871054887771606, "step": 1594 }, { "epoch": 1.6460268317853457, "grad_norm": 36.28513377894254, "learning_rate": 5.030022685161728e-06, "loss": 1.431492567062378, "step": 1595 }, { "epoch": 1.6470588235294117, "grad_norm": 7.8606543397940944, "learning_rate": 5.024018200087855e-06, "loss": 1.1094318628311157, "step": 1596 }, { "epoch": 1.648090815273478, "grad_norm": 12.578770283939217, "learning_rate": 5.018013680375035e-06, "loss": 1.6543710231781006, "step": 1597 }, { "epoch": 1.6491228070175439, "grad_norm": 9.50037107760328, "learning_rate": 5.012009134682962e-06, "loss": 1.903153896331787, "step": 1598 }, { "epoch": 1.6501547987616099, "grad_norm": 7.830729585220461, "learning_rate": 5.006004571671366e-06, "loss": 1.0540351867675781, "step": 1599 }, { "epoch": 1.651186790505676, "grad_norm": 10.062924028777218, "learning_rate": 5e-06, "loss": 2.05865216255188, "step": 1600 }, { "epoch": 1.652218782249742, "grad_norm": 15.157412191135569, "learning_rate": 4.993995428328636e-06, "loss": 1.5756983757019043, "step": 1601 }, { "epoch": 1.653250773993808, "grad_norm": 9.663025370177763, "learning_rate": 4.987990865317041e-06, "loss": 1.184818148612976, "step": 1602 }, { "epoch": 1.6542827657378743, "grad_norm": 14.143188141995502, "learning_rate": 4.981986319624967e-06, "loss": 1.0625786781311035, "step": 1603 }, { "epoch": 1.65531475748194, "grad_norm": 8.063283383306972, "learning_rate": 4.975981799912147e-06, "loss": 1.6618146896362305, "step": 1604 }, { "epoch": 1.6563467492260062, "grad_norm": 12.709312761105126, "learning_rate": 4.969977314838272e-06, "loss": 1.8030552864074707, "step": 1605 }, { "epoch": 1.6573787409700722, "grad_norm": 14.574230843269499, "learning_rate": 4.9639728730629875e-06, "loss": 1.3334550857543945, "step": 1606 }, { "epoch": 1.6584107327141382, "grad_norm": 15.387353844548636, "learning_rate": 4.957968483245872e-06, "loss": 1.209212303161621, "step": 1607 }, { "epoch": 1.6594427244582044, "grad_norm": 7.981954261851792, "learning_rate": 4.951964154046432e-06, "loss": 1.3116942644119263, "step": 1608 }, { "epoch": 1.6604747162022704, "grad_norm": 8.253089376554888, "learning_rate": 4.945959894124087e-06, "loss": 2.226292848587036, "step": 1609 }, { "epoch": 1.6615067079463364, "grad_norm": 12.043947987554164, "learning_rate": 4.939955712138152e-06, "loss": 1.098877191543579, "step": 1610 }, { "epoch": 1.6625386996904026, "grad_norm": 20.431447291322144, "learning_rate": 4.933951616747836e-06, "loss": 1.091958999633789, "step": 1611 }, { "epoch": 1.6635706914344686, "grad_norm": 15.057623937343532, "learning_rate": 4.927947616612216e-06, "loss": 1.723926067352295, "step": 1612 }, { "epoch": 1.6646026831785345, "grad_norm": 12.230671689105673, "learning_rate": 4.921943720390237e-06, "loss": 1.3400969505310059, "step": 1613 }, { "epoch": 1.6656346749226008, "grad_norm": 7.6343512879012305, "learning_rate": 4.915939936740695e-06, "loss": 1.2368969917297363, "step": 1614 }, { "epoch": 1.6666666666666665, "grad_norm": 10.268261353546139, "learning_rate": 4.909936274322218e-06, "loss": 2.005229949951172, "step": 1615 }, { "epoch": 1.6676986584107327, "grad_norm": 8.935903383990896, "learning_rate": 4.903932741793266e-06, "loss": 1.6855125427246094, "step": 1616 }, { "epoch": 1.6687306501547987, "grad_norm": 10.95300941540321, "learning_rate": 4.897929347812105e-06, "loss": 2.65848445892334, "step": 1617 }, { "epoch": 1.6697626418988647, "grad_norm": 11.881114482756562, "learning_rate": 4.891926101036807e-06, "loss": 1.18087899684906, "step": 1618 }, { "epoch": 1.670794633642931, "grad_norm": 11.21765463146983, "learning_rate": 4.8859230101252265e-06, "loss": 1.3030716180801392, "step": 1619 }, { "epoch": 1.671826625386997, "grad_norm": 9.896122171751951, "learning_rate": 4.879920083734997e-06, "loss": 1.1979525089263916, "step": 1620 }, { "epoch": 1.6728586171310629, "grad_norm": 10.086324155948205, "learning_rate": 4.873917330523515e-06, "loss": 1.686155080795288, "step": 1621 }, { "epoch": 1.673890608875129, "grad_norm": 16.56729831056172, "learning_rate": 4.867914759147923e-06, "loss": 1.1940433979034424, "step": 1622 }, { "epoch": 1.674922600619195, "grad_norm": 11.583028128884898, "learning_rate": 4.861912378265105e-06, "loss": 1.2244114875793457, "step": 1623 }, { "epoch": 1.675954592363261, "grad_norm": 10.593147180242253, "learning_rate": 4.855910196531669e-06, "loss": 1.3701207637786865, "step": 1624 }, { "epoch": 1.6769865841073273, "grad_norm": 8.676009971461912, "learning_rate": 4.849908222603935e-06, "loss": 1.537056803703308, "step": 1625 }, { "epoch": 1.678018575851393, "grad_norm": 9.6472850150048, "learning_rate": 4.843906465137928e-06, "loss": 1.1899044513702393, "step": 1626 }, { "epoch": 1.6790505675954592, "grad_norm": 11.331798705960017, "learning_rate": 4.837904932789354e-06, "loss": 1.2348029613494873, "step": 1627 }, { "epoch": 1.6800825593395254, "grad_norm": 14.519718124029408, "learning_rate": 4.8319036342135985e-06, "loss": 1.33205246925354, "step": 1628 }, { "epoch": 1.6811145510835912, "grad_norm": 9.937051983890404, "learning_rate": 4.825902578065709e-06, "loss": 1.4491955041885376, "step": 1629 }, { "epoch": 1.6821465428276574, "grad_norm": 13.640510767187013, "learning_rate": 4.8199017730003835e-06, "loss": 1.3364590406417847, "step": 1630 }, { "epoch": 1.6831785345717234, "grad_norm": 9.463468217820962, "learning_rate": 4.813901227671956e-06, "loss": 1.0079162120819092, "step": 1631 }, { "epoch": 1.6842105263157894, "grad_norm": 16.78555368860191, "learning_rate": 4.807900950734388e-06, "loss": 1.1904146671295166, "step": 1632 }, { "epoch": 1.6852425180598556, "grad_norm": 17.46837597768086, "learning_rate": 4.801900950841256e-06, "loss": 1.6875295639038086, "step": 1633 }, { "epoch": 1.6862745098039216, "grad_norm": 11.04892393388373, "learning_rate": 4.7959012366457296e-06, "loss": 1.0612598657608032, "step": 1634 }, { "epoch": 1.6873065015479876, "grad_norm": 10.951994980251744, "learning_rate": 4.789901816800576e-06, "loss": 1.220237374305725, "step": 1635 }, { "epoch": 1.6883384932920538, "grad_norm": 10.810193358891974, "learning_rate": 4.78390269995813e-06, "loss": 2.282114267349243, "step": 1636 }, { "epoch": 1.6893704850361198, "grad_norm": 10.601081487834142, "learning_rate": 4.777903894770295e-06, "loss": 1.8550426959991455, "step": 1637 }, { "epoch": 1.6904024767801857, "grad_norm": 16.937769027859023, "learning_rate": 4.771905409888519e-06, "loss": 1.6059949398040771, "step": 1638 }, { "epoch": 1.691434468524252, "grad_norm": 11.82584669633604, "learning_rate": 4.765907253963794e-06, "loss": 1.5053925514221191, "step": 1639 }, { "epoch": 1.6924664602683177, "grad_norm": 21.097151926344726, "learning_rate": 4.759909435646636e-06, "loss": 1.4629043340682983, "step": 1640 }, { "epoch": 1.693498452012384, "grad_norm": 13.235912731207039, "learning_rate": 4.75391196358707e-06, "loss": 1.3238599300384521, "step": 1641 }, { "epoch": 1.69453044375645, "grad_norm": 7.8725490128533675, "learning_rate": 4.747914846434628e-06, "loss": 1.45640230178833, "step": 1642 }, { "epoch": 1.6955624355005159, "grad_norm": 14.946160581628561, "learning_rate": 4.741918092838323e-06, "loss": 1.2770317792892456, "step": 1643 }, { "epoch": 1.696594427244582, "grad_norm": 8.510880011836228, "learning_rate": 4.735921711446649e-06, "loss": 2.1564223766326904, "step": 1644 }, { "epoch": 1.697626418988648, "grad_norm": 14.277925313390723, "learning_rate": 4.729925710907564e-06, "loss": 1.5286706686019897, "step": 1645 }, { "epoch": 1.698658410732714, "grad_norm": 19.948193785198274, "learning_rate": 4.723930099868474e-06, "loss": 1.0891897678375244, "step": 1646 }, { "epoch": 1.6996904024767803, "grad_norm": 17.95581198688877, "learning_rate": 4.717934886976222e-06, "loss": 1.64451265335083, "step": 1647 }, { "epoch": 1.7007223942208463, "grad_norm": 8.13400056672354, "learning_rate": 4.711940080877079e-06, "loss": 2.1116182804107666, "step": 1648 }, { "epoch": 1.7017543859649122, "grad_norm": 11.37852718181068, "learning_rate": 4.705945690216732e-06, "loss": 1.7777572870254517, "step": 1649 }, { "epoch": 1.7027863777089784, "grad_norm": 15.414009752148521, "learning_rate": 4.6999517236402606e-06, "loss": 1.2301387786865234, "step": 1650 }, { "epoch": 1.7038183694530442, "grad_norm": 10.564694903218198, "learning_rate": 4.693958189792141e-06, "loss": 1.45538330078125, "step": 1651 }, { "epoch": 1.7048503611971104, "grad_norm": 10.880485640783963, "learning_rate": 4.687965097316223e-06, "loss": 1.4673454761505127, "step": 1652 }, { "epoch": 1.7058823529411766, "grad_norm": 12.193018066560857, "learning_rate": 4.681972454855716e-06, "loss": 1.5198006629943848, "step": 1653 }, { "epoch": 1.7069143446852424, "grad_norm": 11.781383156813746, "learning_rate": 4.675980271053188e-06, "loss": 1.2425904273986816, "step": 1654 }, { "epoch": 1.7079463364293086, "grad_norm": 10.567582329941056, "learning_rate": 4.669988554550537e-06, "loss": 1.4114564657211304, "step": 1655 }, { "epoch": 1.7089783281733746, "grad_norm": 10.888758406783566, "learning_rate": 4.6639973139889944e-06, "loss": 1.25340735912323, "step": 1656 }, { "epoch": 1.7100103199174406, "grad_norm": 8.530234178613362, "learning_rate": 4.658006558009099e-06, "loss": 1.8368052244186401, "step": 1657 }, { "epoch": 1.7110423116615068, "grad_norm": 8.41778259091175, "learning_rate": 4.6520162952506955e-06, "loss": 1.3329459428787231, "step": 1658 }, { "epoch": 1.7120743034055728, "grad_norm": 11.408600150564817, "learning_rate": 4.646026534352915e-06, "loss": 1.4662222862243652, "step": 1659 }, { "epoch": 1.7131062951496387, "grad_norm": 10.198028210749765, "learning_rate": 4.640037283954165e-06, "loss": 1.2321686744689941, "step": 1660 }, { "epoch": 1.714138286893705, "grad_norm": 14.136421995011203, "learning_rate": 4.634048552692118e-06, "loss": 1.333176851272583, "step": 1661 }, { "epoch": 1.715170278637771, "grad_norm": 22.44277745548592, "learning_rate": 4.628060349203696e-06, "loss": 1.7321614027023315, "step": 1662 }, { "epoch": 1.716202270381837, "grad_norm": 9.246555897894348, "learning_rate": 4.6220726821250585e-06, "loss": 1.0161447525024414, "step": 1663 }, { "epoch": 1.7172342621259031, "grad_norm": 14.785133991387573, "learning_rate": 4.616085560091596e-06, "loss": 2.0218515396118164, "step": 1664 }, { "epoch": 1.718266253869969, "grad_norm": 10.769115891694414, "learning_rate": 4.6100989917379135e-06, "loss": 1.2232139110565186, "step": 1665 }, { "epoch": 1.719298245614035, "grad_norm": 11.807273474877455, "learning_rate": 4.604112985697809e-06, "loss": 1.8226964473724365, "step": 1666 }, { "epoch": 1.720330237358101, "grad_norm": 12.289979140449626, "learning_rate": 4.598127550604277e-06, "loss": 1.0019363164901733, "step": 1667 }, { "epoch": 1.721362229102167, "grad_norm": 8.738085743756857, "learning_rate": 4.592142695089489e-06, "loss": 1.4394011497497559, "step": 1668 }, { "epoch": 1.7223942208462333, "grad_norm": 13.565046659820133, "learning_rate": 4.586158427784774e-06, "loss": 1.683599829673767, "step": 1669 }, { "epoch": 1.7234262125902993, "grad_norm": 11.027567358705781, "learning_rate": 4.580174757320619e-06, "loss": 1.4985852241516113, "step": 1670 }, { "epoch": 1.7244582043343653, "grad_norm": 7.447607771930486, "learning_rate": 4.57419169232665e-06, "loss": 1.4004662036895752, "step": 1671 }, { "epoch": 1.7254901960784315, "grad_norm": 15.051229317074, "learning_rate": 4.568209241431615e-06, "loss": 1.4091589450836182, "step": 1672 }, { "epoch": 1.7265221878224974, "grad_norm": 16.506203657234344, "learning_rate": 4.5622274132633785e-06, "loss": 1.4664890766143799, "step": 1673 }, { "epoch": 1.7275541795665634, "grad_norm": 10.232611043119553, "learning_rate": 4.556246216448911e-06, "loss": 1.2841026782989502, "step": 1674 }, { "epoch": 1.7285861713106296, "grad_norm": 7.835802175897451, "learning_rate": 4.5502656596142695e-06, "loss": 1.7837278842926025, "step": 1675 }, { "epoch": 1.7296181630546954, "grad_norm": 12.624683401827452, "learning_rate": 4.544285751384585e-06, "loss": 2.0063414573669434, "step": 1676 }, { "epoch": 1.7306501547987616, "grad_norm": 16.539887599121375, "learning_rate": 4.538306500384056e-06, "loss": 0.8558920621871948, "step": 1677 }, { "epoch": 1.7316821465428278, "grad_norm": 16.751030182188703, "learning_rate": 4.5323279152359355e-06, "loss": 1.3210715055465698, "step": 1678 }, { "epoch": 1.7327141382868936, "grad_norm": 7.039841235166431, "learning_rate": 4.526350004562511e-06, "loss": 2.069108009338379, "step": 1679 }, { "epoch": 1.7337461300309598, "grad_norm": 11.392043707468872, "learning_rate": 4.520372776985101e-06, "loss": 1.338263750076294, "step": 1680 }, { "epoch": 1.7347781217750258, "grad_norm": 10.516192126098069, "learning_rate": 4.5143962411240375e-06, "loss": 1.6379947662353516, "step": 1681 }, { "epoch": 1.7358101135190918, "grad_norm": 9.35772522779427, "learning_rate": 4.508420405598653e-06, "loss": 2.372286081314087, "step": 1682 }, { "epoch": 1.736842105263158, "grad_norm": 9.517241992831798, "learning_rate": 4.502445279027277e-06, "loss": 0.9688540697097778, "step": 1683 }, { "epoch": 1.737874097007224, "grad_norm": 13.372923199452366, "learning_rate": 4.496470870027209e-06, "loss": 1.966747760772705, "step": 1684 }, { "epoch": 1.73890608875129, "grad_norm": 10.363511311415849, "learning_rate": 4.490497187214716e-06, "loss": 1.199428915977478, "step": 1685 }, { "epoch": 1.7399380804953561, "grad_norm": 18.07202517668516, "learning_rate": 4.484524239205018e-06, "loss": 1.5780396461486816, "step": 1686 }, { "epoch": 1.7409700722394221, "grad_norm": 9.799818692093627, "learning_rate": 4.478552034612277e-06, "loss": 1.6711639165878296, "step": 1687 }, { "epoch": 1.7420020639834881, "grad_norm": 11.68127685838972, "learning_rate": 4.472580582049578e-06, "loss": 1.4168503284454346, "step": 1688 }, { "epoch": 1.7430340557275543, "grad_norm": 8.526896445904088, "learning_rate": 4.4666098901289275e-06, "loss": 1.3873357772827148, "step": 1689 }, { "epoch": 1.74406604747162, "grad_norm": 9.47407309758453, "learning_rate": 4.460639967461231e-06, "loss": 0.9989665746688843, "step": 1690 }, { "epoch": 1.7450980392156863, "grad_norm": 12.40847033937051, "learning_rate": 4.4546708226562855e-06, "loss": 1.3871872425079346, "step": 1691 }, { "epoch": 1.7461300309597523, "grad_norm": 13.755098765725549, "learning_rate": 4.448702464322764e-06, "loss": 1.8586505651474, "step": 1692 }, { "epoch": 1.7471620227038183, "grad_norm": 15.66048747585057, "learning_rate": 4.4427349010682125e-06, "loss": 0.824250340461731, "step": 1693 }, { "epoch": 1.7481940144478845, "grad_norm": 12.753131385497102, "learning_rate": 4.4367681414990235e-06, "loss": 1.7118589878082275, "step": 1694 }, { "epoch": 1.7492260061919505, "grad_norm": 10.39540501438844, "learning_rate": 4.43080219422043e-06, "loss": 1.2179622650146484, "step": 1695 }, { "epoch": 1.7502579979360164, "grad_norm": 13.089449428313449, "learning_rate": 4.4248370678364995e-06, "loss": 2.330209732055664, "step": 1696 }, { "epoch": 1.7512899896800826, "grad_norm": 8.196659926334611, "learning_rate": 4.418872770950109e-06, "loss": 1.450493335723877, "step": 1697 }, { "epoch": 1.7523219814241486, "grad_norm": 11.185288051737096, "learning_rate": 4.412909312162943e-06, "loss": 1.42171311378479, "step": 1698 }, { "epoch": 1.7533539731682146, "grad_norm": 8.29516600428506, "learning_rate": 4.406946700075478e-06, "loss": 1.521761178970337, "step": 1699 }, { "epoch": 1.7543859649122808, "grad_norm": 11.018372635918848, "learning_rate": 4.400984943286965e-06, "loss": 1.8015716075897217, "step": 1700 }, { "epoch": 1.7554179566563466, "grad_norm": 15.133587195079995, "learning_rate": 4.395024050395425e-06, "loss": 0.9233524203300476, "step": 1701 }, { "epoch": 1.7564499484004128, "grad_norm": 11.500988193878046, "learning_rate": 4.3890640299976345e-06, "loss": 1.43326997756958, "step": 1702 }, { "epoch": 1.757481940144479, "grad_norm": 12.310396485005848, "learning_rate": 4.38310489068911e-06, "loss": 1.3258637189865112, "step": 1703 }, { "epoch": 1.7585139318885448, "grad_norm": 8.523204733246939, "learning_rate": 4.377146641064093e-06, "loss": 1.4248607158660889, "step": 1704 }, { "epoch": 1.759545923632611, "grad_norm": 19.205926331715546, "learning_rate": 4.371189289715549e-06, "loss": 1.5387122631072998, "step": 1705 }, { "epoch": 1.760577915376677, "grad_norm": 15.113038777535353, "learning_rate": 4.365232845235146e-06, "loss": 0.923610508441925, "step": 1706 }, { "epoch": 1.761609907120743, "grad_norm": 12.698087301403156, "learning_rate": 4.35927731621324e-06, "loss": 0.9821719527244568, "step": 1707 }, { "epoch": 1.7626418988648092, "grad_norm": 9.948047333565306, "learning_rate": 4.3533227112388694e-06, "loss": 1.2262959480285645, "step": 1708 }, { "epoch": 1.7636738906088751, "grad_norm": 14.826874614805199, "learning_rate": 4.347369038899744e-06, "loss": 1.7229928970336914, "step": 1709 }, { "epoch": 1.7647058823529411, "grad_norm": 8.110995812952876, "learning_rate": 4.34141630778222e-06, "loss": 1.8245748281478882, "step": 1710 }, { "epoch": 1.7657378740970073, "grad_norm": 9.695390031194178, "learning_rate": 4.335464526471303e-06, "loss": 1.5449144840240479, "step": 1711 }, { "epoch": 1.7667698658410733, "grad_norm": 13.059824028606869, "learning_rate": 4.329513703550628e-06, "loss": 1.0499621629714966, "step": 1712 }, { "epoch": 1.7678018575851393, "grad_norm": 18.788511042618204, "learning_rate": 4.323563847602447e-06, "loss": 1.1592087745666504, "step": 1713 }, { "epoch": 1.7688338493292055, "grad_norm": 12.573399404834106, "learning_rate": 4.317614967207615e-06, "loss": 1.5952107906341553, "step": 1714 }, { "epoch": 1.7698658410732713, "grad_norm": 21.209842652730753, "learning_rate": 4.3116670709455835e-06, "loss": 1.9653873443603516, "step": 1715 }, { "epoch": 1.7708978328173375, "grad_norm": 14.706837026604616, "learning_rate": 4.305720167394381e-06, "loss": 1.9494094848632812, "step": 1716 }, { "epoch": 1.7719298245614035, "grad_norm": 19.022816684709333, "learning_rate": 4.299774265130609e-06, "loss": 1.2872564792633057, "step": 1717 }, { "epoch": 1.7729618163054695, "grad_norm": 13.219640085670449, "learning_rate": 4.293829372729422e-06, "loss": 1.2262442111968994, "step": 1718 }, { "epoch": 1.7739938080495357, "grad_norm": 14.616500111422022, "learning_rate": 4.287885498764518e-06, "loss": 1.4967855215072632, "step": 1719 }, { "epoch": 1.7750257997936016, "grad_norm": 9.026475934495569, "learning_rate": 4.2819426518081265e-06, "loss": 1.3772231340408325, "step": 1720 }, { "epoch": 1.7760577915376676, "grad_norm": 10.921117337301181, "learning_rate": 4.276000840430996e-06, "loss": 1.3878000974655151, "step": 1721 }, { "epoch": 1.7770897832817338, "grad_norm": 10.632278457360254, "learning_rate": 4.270060073202384e-06, "loss": 1.5760631561279297, "step": 1722 }, { "epoch": 1.7781217750257998, "grad_norm": 12.506170854043054, "learning_rate": 4.264120358690037e-06, "loss": 1.424022912979126, "step": 1723 }, { "epoch": 1.7791537667698658, "grad_norm": 10.890925941428618, "learning_rate": 4.258181705460188e-06, "loss": 0.8698490858078003, "step": 1724 }, { "epoch": 1.780185758513932, "grad_norm": 12.789536760043523, "learning_rate": 4.252244122077538e-06, "loss": 1.3168162107467651, "step": 1725 }, { "epoch": 1.7812177502579978, "grad_norm": 12.835238696082937, "learning_rate": 4.246307617105242e-06, "loss": 1.3633615970611572, "step": 1726 }, { "epoch": 1.782249742002064, "grad_norm": 11.61784719244646, "learning_rate": 4.240372199104904e-06, "loss": 1.4262614250183105, "step": 1727 }, { "epoch": 1.7832817337461302, "grad_norm": 9.6906194820901, "learning_rate": 4.234437876636557e-06, "loss": 2.1347155570983887, "step": 1728 }, { "epoch": 1.784313725490196, "grad_norm": 10.324544526775666, "learning_rate": 4.2285046582586585e-06, "loss": 2.0410780906677246, "step": 1729 }, { "epoch": 1.7853457172342622, "grad_norm": 12.604320361206465, "learning_rate": 4.222572552528067e-06, "loss": 1.3613595962524414, "step": 1730 }, { "epoch": 1.7863777089783281, "grad_norm": 18.674271236695578, "learning_rate": 4.216641568000044e-06, "loss": 0.9914162755012512, "step": 1731 }, { "epoch": 1.7874097007223941, "grad_norm": 10.550522092811725, "learning_rate": 4.21071171322823e-06, "loss": 1.4601118564605713, "step": 1732 }, { "epoch": 1.7884416924664603, "grad_norm": 13.143024178808387, "learning_rate": 4.204782996764634e-06, "loss": 1.7215886116027832, "step": 1733 }, { "epoch": 1.7894736842105263, "grad_norm": 12.946786676017096, "learning_rate": 4.198855427159628e-06, "loss": 1.5440949201583862, "step": 1734 }, { "epoch": 1.7905056759545923, "grad_norm": 15.205947894883542, "learning_rate": 4.192929012961927e-06, "loss": 1.1962049007415771, "step": 1735 }, { "epoch": 1.7915376676986585, "grad_norm": 11.249024041201825, "learning_rate": 4.1870037627185785e-06, "loss": 1.5020911693572998, "step": 1736 }, { "epoch": 1.7925696594427245, "grad_norm": 7.122886080105791, "learning_rate": 4.181079684974956e-06, "loss": 1.357811450958252, "step": 1737 }, { "epoch": 1.7936016511867905, "grad_norm": 8.219069585584625, "learning_rate": 4.175156788274738e-06, "loss": 0.9888667464256287, "step": 1738 }, { "epoch": 1.7946336429308567, "grad_norm": 10.65778312145511, "learning_rate": 4.169235081159901e-06, "loss": 1.6089493036270142, "step": 1739 }, { "epoch": 1.7956656346749225, "grad_norm": 14.420633707013382, "learning_rate": 4.163314572170704e-06, "loss": 1.5799247026443481, "step": 1740 }, { "epoch": 1.7966976264189887, "grad_norm": 10.779161976759232, "learning_rate": 4.157395269845684e-06, "loss": 1.1055116653442383, "step": 1741 }, { "epoch": 1.7977296181630547, "grad_norm": 8.526212194371725, "learning_rate": 4.151477182721632e-06, "loss": 1.9454221725463867, "step": 1742 }, { "epoch": 1.7987616099071206, "grad_norm": 12.654906488380304, "learning_rate": 4.145560319333587e-06, "loss": 0.8242064714431763, "step": 1743 }, { "epoch": 1.7997936016511868, "grad_norm": 12.0745341858681, "learning_rate": 4.139644688214827e-06, "loss": 1.7421832084655762, "step": 1744 }, { "epoch": 1.8008255933952528, "grad_norm": 11.404849956167968, "learning_rate": 4.133730297896846e-06, "loss": 0.9724714756011963, "step": 1745 }, { "epoch": 1.8018575851393188, "grad_norm": 9.798225085263166, "learning_rate": 4.1278171569093564e-06, "loss": 1.2271108627319336, "step": 1746 }, { "epoch": 1.802889576883385, "grad_norm": 7.826160390229573, "learning_rate": 4.1219052737802624e-06, "loss": 1.3896276950836182, "step": 1747 }, { "epoch": 1.803921568627451, "grad_norm": 8.696271555135679, "learning_rate": 4.115994657035659e-06, "loss": 1.5526032447814941, "step": 1748 }, { "epoch": 1.804953560371517, "grad_norm": 12.860082171763079, "learning_rate": 4.110085315199811e-06, "loss": 2.7864584922790527, "step": 1749 }, { "epoch": 1.8059855521155832, "grad_norm": 12.092757095998914, "learning_rate": 4.104177256795145e-06, "loss": 1.6778318881988525, "step": 1750 }, { "epoch": 1.807017543859649, "grad_norm": 9.577992702190265, "learning_rate": 4.098270490342241e-06, "loss": 1.6193572282791138, "step": 1751 }, { "epoch": 1.8080495356037152, "grad_norm": 12.270427643491995, "learning_rate": 4.0923650243598104e-06, "loss": 2.024756908416748, "step": 1752 }, { "epoch": 1.8090815273477814, "grad_norm": 15.143367732619192, "learning_rate": 4.086460867364694e-06, "loss": 1.6637275218963623, "step": 1753 }, { "epoch": 1.8101135190918471, "grad_norm": 12.545136666185163, "learning_rate": 4.0805580278718364e-06, "loss": 1.3071680068969727, "step": 1754 }, { "epoch": 1.8111455108359134, "grad_norm": 6.4743236934340915, "learning_rate": 4.074656514394292e-06, "loss": 1.4666328430175781, "step": 1755 }, { "epoch": 1.8121775025799793, "grad_norm": 9.6880750212984, "learning_rate": 4.0687563354431986e-06, "loss": 1.2108535766601562, "step": 1756 }, { "epoch": 1.8132094943240453, "grad_norm": 9.240024657830892, "learning_rate": 4.062857499527767e-06, "loss": 1.6777081489562988, "step": 1757 }, { "epoch": 1.8142414860681115, "grad_norm": 8.029446154665262, "learning_rate": 4.0569600151552745e-06, "loss": 1.3032546043395996, "step": 1758 }, { "epoch": 1.8152734778121775, "grad_norm": 13.2134299208589, "learning_rate": 4.051063890831047e-06, "loss": 1.6355738639831543, "step": 1759 }, { "epoch": 1.8163054695562435, "grad_norm": 18.25673877600439, "learning_rate": 4.045169135058452e-06, "loss": 1.8013114929199219, "step": 1760 }, { "epoch": 1.8173374613003097, "grad_norm": 17.253970737512073, "learning_rate": 4.039275756338881e-06, "loss": 1.6528455018997192, "step": 1761 }, { "epoch": 1.8183694530443757, "grad_norm": 15.441558754292478, "learning_rate": 4.033383763171738e-06, "loss": 2.3111886978149414, "step": 1762 }, { "epoch": 1.8194014447884417, "grad_norm": 31.216500987812733, "learning_rate": 4.027493164054433e-06, "loss": 1.7374169826507568, "step": 1763 }, { "epoch": 1.8204334365325079, "grad_norm": 10.023239892905801, "learning_rate": 4.021603967482361e-06, "loss": 1.4004958868026733, "step": 1764 }, { "epoch": 1.8214654282765737, "grad_norm": 11.113603285626901, "learning_rate": 4.015716181948897e-06, "loss": 1.4151482582092285, "step": 1765 }, { "epoch": 1.8224974200206399, "grad_norm": 10.344500401937307, "learning_rate": 4.009829815945381e-06, "loss": 1.4603420495986938, "step": 1766 }, { "epoch": 1.8235294117647058, "grad_norm": 11.901402794224847, "learning_rate": 4.003944877961102e-06, "loss": 1.086212158203125, "step": 1767 }, { "epoch": 1.8245614035087718, "grad_norm": 9.500123542012892, "learning_rate": 3.998061376483298e-06, "loss": 1.8286771774291992, "step": 1768 }, { "epoch": 1.825593395252838, "grad_norm": 13.81642804897701, "learning_rate": 3.9921793199971235e-06, "loss": 1.088291883468628, "step": 1769 }, { "epoch": 1.826625386996904, "grad_norm": 10.213108360136134, "learning_rate": 3.98629871698566e-06, "loss": 1.7142021656036377, "step": 1770 }, { "epoch": 1.82765737874097, "grad_norm": 12.73036098533881, "learning_rate": 3.980419575929888e-06, "loss": 1.4606733322143555, "step": 1771 }, { "epoch": 1.8286893704850362, "grad_norm": 9.835511553146963, "learning_rate": 3.974541905308679e-06, "loss": 1.0927921533584595, "step": 1772 }, { "epoch": 1.8297213622291022, "grad_norm": 16.83908725373327, "learning_rate": 3.968665713598783e-06, "loss": 1.8018839359283447, "step": 1773 }, { "epoch": 1.8307533539731682, "grad_norm": 15.209294467658857, "learning_rate": 3.962791009274821e-06, "loss": 1.1861164569854736, "step": 1774 }, { "epoch": 1.8317853457172344, "grad_norm": 7.937082353648308, "learning_rate": 3.956917800809266e-06, "loss": 1.1646783351898193, "step": 1775 }, { "epoch": 1.8328173374613002, "grad_norm": 12.31141961301524, "learning_rate": 3.951046096672434e-06, "loss": 1.2939174175262451, "step": 1776 }, { "epoch": 1.8338493292053664, "grad_norm": 9.585711399460749, "learning_rate": 3.945175905332473e-06, "loss": 1.9322181940078735, "step": 1777 }, { "epoch": 1.8348813209494326, "grad_norm": 8.268824748296955, "learning_rate": 3.939307235255343e-06, "loss": 0.9638408422470093, "step": 1778 }, { "epoch": 1.8359133126934983, "grad_norm": 12.594960878000762, "learning_rate": 3.933440094904824e-06, "loss": 2.2295453548431396, "step": 1779 }, { "epoch": 1.8369453044375645, "grad_norm": 15.830275247088338, "learning_rate": 3.927574492742473e-06, "loss": 2.0274429321289062, "step": 1780 }, { "epoch": 1.8379772961816305, "grad_norm": 13.33237828685689, "learning_rate": 3.921710437227641e-06, "loss": 1.5238763093948364, "step": 1781 }, { "epoch": 1.8390092879256965, "grad_norm": 12.66844229019403, "learning_rate": 3.915847936817442e-06, "loss": 1.3702871799468994, "step": 1782 }, { "epoch": 1.8400412796697627, "grad_norm": 17.03081179911215, "learning_rate": 3.909986999966751e-06, "loss": 1.4059205055236816, "step": 1783 }, { "epoch": 1.8410732714138287, "grad_norm": 9.304730452471944, "learning_rate": 3.904127635128184e-06, "loss": 1.6512281894683838, "step": 1784 }, { "epoch": 1.8421052631578947, "grad_norm": 10.942023695799591, "learning_rate": 3.898269850752093e-06, "loss": 1.360194206237793, "step": 1785 }, { "epoch": 1.843137254901961, "grad_norm": 9.76664016025721, "learning_rate": 3.892413655286551e-06, "loss": 1.101757287979126, "step": 1786 }, { "epoch": 1.8441692466460269, "grad_norm": 31.684418851573756, "learning_rate": 3.886559057177337e-06, "loss": 1.9761056900024414, "step": 1787 }, { "epoch": 1.8452012383900929, "grad_norm": 9.70788789701846, "learning_rate": 3.880706064867927e-06, "loss": 1.732954502105713, "step": 1788 }, { "epoch": 1.846233230134159, "grad_norm": 12.994694218945037, "learning_rate": 3.8748546867994815e-06, "loss": 1.4196879863739014, "step": 1789 }, { "epoch": 1.8472652218782248, "grad_norm": 14.131533482147614, "learning_rate": 3.8690049314108355e-06, "loss": 0.7784943580627441, "step": 1790 }, { "epoch": 1.848297213622291, "grad_norm": 7.894747060154967, "learning_rate": 3.863156807138481e-06, "loss": 1.5873610973358154, "step": 1791 }, { "epoch": 1.849329205366357, "grad_norm": 8.08327356417994, "learning_rate": 3.857310322416555e-06, "loss": 1.2376326322555542, "step": 1792 }, { "epoch": 1.850361197110423, "grad_norm": 9.79680906436854, "learning_rate": 3.851465485676836e-06, "loss": 1.8484421968460083, "step": 1793 }, { "epoch": 1.8513931888544892, "grad_norm": 13.421851682551248, "learning_rate": 3.845622305348723e-06, "loss": 1.769080400466919, "step": 1794 }, { "epoch": 1.8524251805985552, "grad_norm": 11.35164598325514, "learning_rate": 3.839780789859222e-06, "loss": 1.3666892051696777, "step": 1795 }, { "epoch": 1.8534571723426212, "grad_norm": 7.278640163373893, "learning_rate": 3.833940947632947e-06, "loss": 1.421882152557373, "step": 1796 }, { "epoch": 1.8544891640866874, "grad_norm": 15.123748431974601, "learning_rate": 3.82810278709209e-06, "loss": 0.8415699005126953, "step": 1797 }, { "epoch": 1.8555211558307534, "grad_norm": 10.864648717292008, "learning_rate": 3.822266316656421e-06, "loss": 2.7428770065307617, "step": 1798 }, { "epoch": 1.8565531475748194, "grad_norm": 11.717969553411004, "learning_rate": 3.816431544743276e-06, "loss": 1.419779658317566, "step": 1799 }, { "epoch": 1.8575851393188856, "grad_norm": 17.129393236970014, "learning_rate": 3.8105984797675364e-06, "loss": 2.135234832763672, "step": 1800 }, { "epoch": 1.8586171310629513, "grad_norm": 13.799126093066207, "learning_rate": 3.8047671301416256e-06, "loss": 1.2894712686538696, "step": 1801 }, { "epoch": 1.8596491228070176, "grad_norm": 11.695133936167258, "learning_rate": 3.798937504275489e-06, "loss": 2.12467622756958, "step": 1802 }, { "epoch": 1.8606811145510835, "grad_norm": 7.861185070945205, "learning_rate": 3.79310961057659e-06, "loss": 1.6944842338562012, "step": 1803 }, { "epoch": 1.8617131062951495, "grad_norm": 19.604668930120155, "learning_rate": 3.7872834574498894e-06, "loss": 1.5712006092071533, "step": 1804 }, { "epoch": 1.8627450980392157, "grad_norm": 18.118263316570626, "learning_rate": 3.7814590532978428e-06, "loss": 1.1979610919952393, "step": 1805 }, { "epoch": 1.8637770897832817, "grad_norm": 11.588503990001286, "learning_rate": 3.775636406520382e-06, "loss": 1.2051764726638794, "step": 1806 }, { "epoch": 1.8648090815273477, "grad_norm": 15.802393803467309, "learning_rate": 3.7698155255149005e-06, "loss": 1.317204236984253, "step": 1807 }, { "epoch": 1.865841073271414, "grad_norm": 7.636660886174154, "learning_rate": 3.7639964186762506e-06, "loss": 1.3314225673675537, "step": 1808 }, { "epoch": 1.86687306501548, "grad_norm": 9.568118739152196, "learning_rate": 3.7581790943967228e-06, "loss": 1.755152940750122, "step": 1809 }, { "epoch": 1.8679050567595459, "grad_norm": 21.68620704466686, "learning_rate": 3.752363561066039e-06, "loss": 2.36037278175354, "step": 1810 }, { "epoch": 1.868937048503612, "grad_norm": 10.836943489605135, "learning_rate": 3.746549827071334e-06, "loss": 1.3274281024932861, "step": 1811 }, { "epoch": 1.869969040247678, "grad_norm": 18.231812775806993, "learning_rate": 3.740737900797151e-06, "loss": 2.0181620121002197, "step": 1812 }, { "epoch": 1.871001031991744, "grad_norm": 10.002199945579473, "learning_rate": 3.7349277906254278e-06, "loss": 1.0888769626617432, "step": 1813 }, { "epoch": 1.8720330237358103, "grad_norm": 13.642398873064936, "learning_rate": 3.7291195049354776e-06, "loss": 3.219508409500122, "step": 1814 }, { "epoch": 1.873065015479876, "grad_norm": 14.86774936708945, "learning_rate": 3.723313052103987e-06, "loss": 1.6249029636383057, "step": 1815 }, { "epoch": 1.8740970072239422, "grad_norm": 8.626190109352544, "learning_rate": 3.7175084405049978e-06, "loss": 1.7096291780471802, "step": 1816 }, { "epoch": 1.8751289989680082, "grad_norm": 12.20701095710016, "learning_rate": 3.7117056785098935e-06, "loss": 1.2819960117340088, "step": 1817 }, { "epoch": 1.8761609907120742, "grad_norm": 10.901552275933442, "learning_rate": 3.705904774487396e-06, "loss": 1.361170768737793, "step": 1818 }, { "epoch": 1.8771929824561404, "grad_norm": 11.950482935105002, "learning_rate": 3.7001057368035446e-06, "loss": 0.9725128412246704, "step": 1819 }, { "epoch": 1.8782249742002064, "grad_norm": 11.1783419506552, "learning_rate": 3.6943085738216855e-06, "loss": 1.6708776950836182, "step": 1820 }, { "epoch": 1.8792569659442724, "grad_norm": 13.04360676985624, "learning_rate": 3.688513293902462e-06, "loss": 1.1926006078720093, "step": 1821 }, { "epoch": 1.8802889576883386, "grad_norm": 14.916308684644543, "learning_rate": 3.6827199054038043e-06, "loss": 1.7682125568389893, "step": 1822 }, { "epoch": 1.8813209494324046, "grad_norm": 8.274121694167667, "learning_rate": 3.6769284166809104e-06, "loss": 1.6041537523269653, "step": 1823 }, { "epoch": 1.8823529411764706, "grad_norm": 9.507652963938067, "learning_rate": 3.6711388360862417e-06, "loss": 1.4414433240890503, "step": 1824 }, { "epoch": 1.8833849329205368, "grad_norm": 8.94598775314897, "learning_rate": 3.6653511719695077e-06, "loss": 1.6177234649658203, "step": 1825 }, { "epoch": 1.8844169246646025, "grad_norm": 26.366290475468524, "learning_rate": 3.659565432677652e-06, "loss": 1.6701890230178833, "step": 1826 }, { "epoch": 1.8854489164086687, "grad_norm": 10.399901839605608, "learning_rate": 3.653781626554842e-06, "loss": 1.2970281839370728, "step": 1827 }, { "epoch": 1.8864809081527347, "grad_norm": 26.30425533380288, "learning_rate": 3.6479997619424605e-06, "loss": 1.2655129432678223, "step": 1828 }, { "epoch": 1.8875128998968007, "grad_norm": 8.452072950248295, "learning_rate": 3.642219847179089e-06, "loss": 1.334805965423584, "step": 1829 }, { "epoch": 1.888544891640867, "grad_norm": 11.066535865814831, "learning_rate": 3.636441890600493e-06, "loss": 1.7193948030471802, "step": 1830 }, { "epoch": 1.889576883384933, "grad_norm": 9.779287406809443, "learning_rate": 3.6306659005396195e-06, "loss": 1.087109088897705, "step": 1831 }, { "epoch": 1.890608875128999, "grad_norm": 10.233826024672242, "learning_rate": 3.6248918853265756e-06, "loss": 1.526931881904602, "step": 1832 }, { "epoch": 1.891640866873065, "grad_norm": 14.236830265198021, "learning_rate": 3.619119853288622e-06, "loss": 1.742538332939148, "step": 1833 }, { "epoch": 1.892672858617131, "grad_norm": 16.3542365001747, "learning_rate": 3.6133498127501587e-06, "loss": 0.9473057985305786, "step": 1834 }, { "epoch": 1.893704850361197, "grad_norm": 10.48141733965336, "learning_rate": 3.607581772032713e-06, "loss": 1.312403678894043, "step": 1835 }, { "epoch": 1.8947368421052633, "grad_norm": 13.417134210608252, "learning_rate": 3.6018157394549287e-06, "loss": 1.765456199645996, "step": 1836 }, { "epoch": 1.8957688338493293, "grad_norm": 18.97243868977882, "learning_rate": 3.596051723332554e-06, "loss": 1.2540202140808105, "step": 1837 }, { "epoch": 1.8968008255933952, "grad_norm": 26.599063236979354, "learning_rate": 3.5902897319784313e-06, "loss": 1.6883485317230225, "step": 1838 }, { "epoch": 1.8978328173374615, "grad_norm": 20.47631536226638, "learning_rate": 3.5845297737024754e-06, "loss": 1.232295274734497, "step": 1839 }, { "epoch": 1.8988648090815272, "grad_norm": 11.457787863331676, "learning_rate": 3.5787718568116764e-06, "loss": 1.3976306915283203, "step": 1840 }, { "epoch": 1.8998968008255934, "grad_norm": 13.605991884385617, "learning_rate": 3.573015989610078e-06, "loss": 1.4287066459655762, "step": 1841 }, { "epoch": 1.9009287925696594, "grad_norm": 15.821839277913396, "learning_rate": 3.567262180398765e-06, "loss": 1.6137340068817139, "step": 1842 }, { "epoch": 1.9019607843137254, "grad_norm": 14.704775588225525, "learning_rate": 3.561510437475858e-06, "loss": 1.3677417039871216, "step": 1843 }, { "epoch": 1.9029927760577916, "grad_norm": 18.49671119582087, "learning_rate": 3.5557607691364983e-06, "loss": 1.4877206087112427, "step": 1844 }, { "epoch": 1.9040247678018576, "grad_norm": 23.286182662515156, "learning_rate": 3.550013183672829e-06, "loss": 1.5937504768371582, "step": 1845 }, { "epoch": 1.9050567595459236, "grad_norm": 12.987288949321805, "learning_rate": 3.5442676893739956e-06, "loss": 1.318833589553833, "step": 1846 }, { "epoch": 1.9060887512899898, "grad_norm": 10.799259403771813, "learning_rate": 3.538524294526127e-06, "loss": 1.4114301204681396, "step": 1847 }, { "epoch": 1.9071207430340558, "grad_norm": 20.78194593074858, "learning_rate": 3.5327830074123214e-06, "loss": 1.7304108142852783, "step": 1848 }, { "epoch": 1.9081527347781218, "grad_norm": 10.79589310794868, "learning_rate": 3.527043836312639e-06, "loss": 1.6173585653305054, "step": 1849 }, { "epoch": 1.909184726522188, "grad_norm": 11.865600573746853, "learning_rate": 3.521306789504089e-06, "loss": 1.1029250621795654, "step": 1850 }, { "epoch": 1.9102167182662537, "grad_norm": 16.51790031229028, "learning_rate": 3.5155718752606126e-06, "loss": 1.731012225151062, "step": 1851 }, { "epoch": 1.91124871001032, "grad_norm": 15.067985774385786, "learning_rate": 3.509839101853082e-06, "loss": 3.3693652153015137, "step": 1852 }, { "epoch": 1.912280701754386, "grad_norm": 13.012022325347537, "learning_rate": 3.504108477549279e-06, "loss": 1.9551976919174194, "step": 1853 }, { "epoch": 1.913312693498452, "grad_norm": 7.879854916635597, "learning_rate": 3.4983800106138833e-06, "loss": 1.4166200160980225, "step": 1854 }, { "epoch": 1.914344685242518, "grad_norm": 8.199603624658192, "learning_rate": 3.4926537093084654e-06, "loss": 1.4520305395126343, "step": 1855 }, { "epoch": 1.915376676986584, "grad_norm": 8.35561969860675, "learning_rate": 3.486929581891476e-06, "loss": 1.4539759159088135, "step": 1856 }, { "epoch": 1.91640866873065, "grad_norm": 24.847987306954906, "learning_rate": 3.4812076366182256e-06, "loss": 1.5426585674285889, "step": 1857 }, { "epoch": 1.9174406604747163, "grad_norm": 26.640742297972697, "learning_rate": 3.475487881740879e-06, "loss": 1.4774678945541382, "step": 1858 }, { "epoch": 1.9184726522187823, "grad_norm": 21.636408870276405, "learning_rate": 3.4697703255084426e-06, "loss": 2.1614882946014404, "step": 1859 }, { "epoch": 1.9195046439628483, "grad_norm": 6.965501959643614, "learning_rate": 3.464054976166753e-06, "loss": 1.8535370826721191, "step": 1860 }, { "epoch": 1.9205366357069145, "grad_norm": 9.259509440773725, "learning_rate": 3.45834184195846e-06, "loss": 1.6998491287231445, "step": 1861 }, { "epoch": 1.9215686274509802, "grad_norm": 16.29518792543394, "learning_rate": 3.4526309311230238e-06, "loss": 2.3080320358276367, "step": 1862 }, { "epoch": 1.9226006191950464, "grad_norm": 10.880967440643758, "learning_rate": 3.446922251896696e-06, "loss": 1.5008809566497803, "step": 1863 }, { "epoch": 1.9236326109391126, "grad_norm": 14.202936012317007, "learning_rate": 3.441215812512508e-06, "loss": 1.4904303550720215, "step": 1864 }, { "epoch": 1.9246646026831784, "grad_norm": 16.546369603640375, "learning_rate": 3.4355116212002616e-06, "loss": 2.3721206188201904, "step": 1865 }, { "epoch": 1.9256965944272446, "grad_norm": 9.13217757918041, "learning_rate": 3.4298096861865204e-06, "loss": 1.7444360256195068, "step": 1866 }, { "epoch": 1.9267285861713106, "grad_norm": 14.92466668241575, "learning_rate": 3.4241100156945907e-06, "loss": 1.4486478567123413, "step": 1867 }, { "epoch": 1.9277605779153766, "grad_norm": 16.97368297096261, "learning_rate": 3.41841261794451e-06, "loss": 0.9204004406929016, "step": 1868 }, { "epoch": 1.9287925696594428, "grad_norm": 14.013135225488206, "learning_rate": 3.4127175011530443e-06, "loss": 1.501824975013733, "step": 1869 }, { "epoch": 1.9298245614035088, "grad_norm": 15.653228452019427, "learning_rate": 3.407024673533665e-06, "loss": 1.8300246000289917, "step": 1870 }, { "epoch": 1.9308565531475748, "grad_norm": 9.199922021024074, "learning_rate": 3.401334143296544e-06, "loss": 2.072077989578247, "step": 1871 }, { "epoch": 1.931888544891641, "grad_norm": 9.150098006383843, "learning_rate": 3.3956459186485414e-06, "loss": 1.2317850589752197, "step": 1872 }, { "epoch": 1.932920536635707, "grad_norm": 10.395256249908842, "learning_rate": 3.389960007793189e-06, "loss": 1.273494839668274, "step": 1873 }, { "epoch": 1.933952528379773, "grad_norm": 10.38634924924544, "learning_rate": 3.3842764189306844e-06, "loss": 1.8254051208496094, "step": 1874 }, { "epoch": 1.9349845201238391, "grad_norm": 9.880787086639312, "learning_rate": 3.3785951602578728e-06, "loss": 1.6783366203308105, "step": 1875 }, { "epoch": 1.936016511867905, "grad_norm": 8.844458764291927, "learning_rate": 3.372916239968246e-06, "loss": 1.068297266960144, "step": 1876 }, { "epoch": 1.9370485036119711, "grad_norm": 18.409259489228877, "learning_rate": 3.367239666251915e-06, "loss": 1.3494412899017334, "step": 1877 }, { "epoch": 1.938080495356037, "grad_norm": 8.125723974719769, "learning_rate": 3.361565447295612e-06, "loss": 1.8301430940628052, "step": 1878 }, { "epoch": 1.939112487100103, "grad_norm": 15.844934465283734, "learning_rate": 3.355893591282672e-06, "loss": 1.3259280920028687, "step": 1879 }, { "epoch": 1.9401444788441693, "grad_norm": 8.309531976854908, "learning_rate": 3.3502241063930196e-06, "loss": 1.5008363723754883, "step": 1880 }, { "epoch": 1.9411764705882353, "grad_norm": 12.989009848670683, "learning_rate": 3.344557000803163e-06, "loss": 1.2330214977264404, "step": 1881 }, { "epoch": 1.9422084623323013, "grad_norm": 15.10676301137227, "learning_rate": 3.3388922826861794e-06, "loss": 1.171676516532898, "step": 1882 }, { "epoch": 1.9432404540763675, "grad_norm": 15.57918362721749, "learning_rate": 3.333229960211698e-06, "loss": 1.2732231616973877, "step": 1883 }, { "epoch": 1.9442724458204335, "grad_norm": 12.219239159218336, "learning_rate": 3.327570041545897e-06, "loss": 2.0378801822662354, "step": 1884 }, { "epoch": 1.9453044375644994, "grad_norm": 21.248851380839813, "learning_rate": 3.321912534851489e-06, "loss": 1.3067893981933594, "step": 1885 }, { "epoch": 1.9463364293085657, "grad_norm": 22.363387549472883, "learning_rate": 3.3162574482877063e-06, "loss": 1.843634009361267, "step": 1886 }, { "epoch": 1.9473684210526314, "grad_norm": 8.637840897549372, "learning_rate": 3.310604790010289e-06, "loss": 1.3801031112670898, "step": 1887 }, { "epoch": 1.9484004127966976, "grad_norm": 15.57742329719762, "learning_rate": 3.304954568171478e-06, "loss": 1.2379931211471558, "step": 1888 }, { "epoch": 1.9494324045407638, "grad_norm": 11.929156205044768, "learning_rate": 3.2993067909199982e-06, "loss": 1.2321984767913818, "step": 1889 }, { "epoch": 1.9504643962848296, "grad_norm": 11.506361878048958, "learning_rate": 3.2936614664010503e-06, "loss": 1.3178244829177856, "step": 1890 }, { "epoch": 1.9514963880288958, "grad_norm": 9.574280751560662, "learning_rate": 3.288018602756299e-06, "loss": 1.5133464336395264, "step": 1891 }, { "epoch": 1.9525283797729618, "grad_norm": 8.761103303332469, "learning_rate": 3.282378208123856e-06, "loss": 0.8351538181304932, "step": 1892 }, { "epoch": 1.9535603715170278, "grad_norm": 15.119584543608218, "learning_rate": 3.276740290638275e-06, "loss": 1.6170310974121094, "step": 1893 }, { "epoch": 1.954592363261094, "grad_norm": 9.257877103523935, "learning_rate": 3.271104858430537e-06, "loss": 1.283090353012085, "step": 1894 }, { "epoch": 1.95562435500516, "grad_norm": 8.949450609209322, "learning_rate": 3.26547191962804e-06, "loss": 1.5036699771881104, "step": 1895 }, { "epoch": 1.956656346749226, "grad_norm": 16.853847438903518, "learning_rate": 3.259841482354582e-06, "loss": 1.4999172687530518, "step": 1896 }, { "epoch": 1.9576883384932922, "grad_norm": 12.450045679558006, "learning_rate": 3.254213554730358e-06, "loss": 1.9045746326446533, "step": 1897 }, { "epoch": 1.9587203302373581, "grad_norm": 8.13458772630108, "learning_rate": 3.24858814487194e-06, "loss": 2.0080020427703857, "step": 1898 }, { "epoch": 1.9597523219814241, "grad_norm": 18.88602798086758, "learning_rate": 3.242965260892269e-06, "loss": 1.2330591678619385, "step": 1899 }, { "epoch": 1.9607843137254903, "grad_norm": 9.646930886353221, "learning_rate": 3.2373449109006476e-06, "loss": 1.6764395236968994, "step": 1900 }, { "epoch": 1.961816305469556, "grad_norm": 8.797859795063196, "learning_rate": 3.231727103002718e-06, "loss": 1.0952730178833008, "step": 1901 }, { "epoch": 1.9628482972136223, "grad_norm": 12.331075461924263, "learning_rate": 3.2261118453004595e-06, "loss": 1.2641353607177734, "step": 1902 }, { "epoch": 1.9638802889576883, "grad_norm": 6.027711307261203, "learning_rate": 3.220499145892173e-06, "loss": 0.8769614696502686, "step": 1903 }, { "epoch": 1.9649122807017543, "grad_norm": 27.1225755613239, "learning_rate": 3.21488901287247e-06, "loss": 1.1951849460601807, "step": 1904 }, { "epoch": 1.9659442724458205, "grad_norm": 30.675159473591993, "learning_rate": 3.2092814543322624e-06, "loss": 1.5789157152175903, "step": 1905 }, { "epoch": 1.9669762641898865, "grad_norm": 9.410282310517966, "learning_rate": 3.2036764783587446e-06, "loss": 1.4281562566757202, "step": 1906 }, { "epoch": 1.9680082559339525, "grad_norm": 10.964885396257436, "learning_rate": 3.198074093035391e-06, "loss": 1.7447469234466553, "step": 1907 }, { "epoch": 1.9690402476780187, "grad_norm": 13.532327740709574, "learning_rate": 3.192474306441936e-06, "loss": 1.466497540473938, "step": 1908 }, { "epoch": 1.9700722394220846, "grad_norm": 9.531111577263932, "learning_rate": 3.186877126654369e-06, "loss": 1.7062510251998901, "step": 1909 }, { "epoch": 1.9711042311661506, "grad_norm": 13.433433122569705, "learning_rate": 3.1812825617449207e-06, "loss": 1.2298617362976074, "step": 1910 }, { "epoch": 1.9721362229102168, "grad_norm": 9.117118600131604, "learning_rate": 3.175690619782046e-06, "loss": 1.32313072681427, "step": 1911 }, { "epoch": 1.9731682146542826, "grad_norm": 13.632547041799603, "learning_rate": 3.170101308830421e-06, "loss": 1.7262647151947021, "step": 1912 }, { "epoch": 1.9742002063983488, "grad_norm": 8.962755655228897, "learning_rate": 3.164514636950925e-06, "loss": 2.2817416191101074, "step": 1913 }, { "epoch": 1.975232198142415, "grad_norm": 16.111500659178432, "learning_rate": 3.1589306122006367e-06, "loss": 1.1965981721878052, "step": 1914 }, { "epoch": 1.9762641898864808, "grad_norm": 9.702276870170294, "learning_rate": 3.1533492426328086e-06, "loss": 1.3094780445098877, "step": 1915 }, { "epoch": 1.977296181630547, "grad_norm": 13.158818670104788, "learning_rate": 3.1477705362968702e-06, "loss": 1.5770702362060547, "step": 1916 }, { "epoch": 1.978328173374613, "grad_norm": 15.374685127791743, "learning_rate": 3.1421945012384085e-06, "loss": 1.5390429496765137, "step": 1917 }, { "epoch": 1.979360165118679, "grad_norm": 5.198005672311248, "learning_rate": 3.1366211454991558e-06, "loss": 0.8496372699737549, "step": 1918 }, { "epoch": 1.9803921568627452, "grad_norm": 10.909086321860805, "learning_rate": 3.1310504771169835e-06, "loss": 1.5664114952087402, "step": 1919 }, { "epoch": 1.9814241486068112, "grad_norm": 11.822442026920228, "learning_rate": 3.1254825041258852e-06, "loss": 1.2260844707489014, "step": 1920 }, { "epoch": 1.9824561403508771, "grad_norm": 11.125967110797111, "learning_rate": 3.119917234555968e-06, "loss": 1.2665646076202393, "step": 1921 }, { "epoch": 1.9834881320949433, "grad_norm": 11.399017901383028, "learning_rate": 3.114354676433441e-06, "loss": 2.8943164348602295, "step": 1922 }, { "epoch": 1.9845201238390093, "grad_norm": 13.818827066188698, "learning_rate": 3.108794837780599e-06, "loss": 1.7140778303146362, "step": 1923 }, { "epoch": 1.9855521155830753, "grad_norm": 11.136556319523761, "learning_rate": 3.103237726615822e-06, "loss": 1.912706732749939, "step": 1924 }, { "epoch": 1.9865841073271415, "grad_norm": 12.291829033063722, "learning_rate": 3.0976833509535494e-06, "loss": 1.3148201704025269, "step": 1925 }, { "epoch": 1.9876160990712073, "grad_norm": 12.445042354781304, "learning_rate": 3.09213171880428e-06, "loss": 1.282441258430481, "step": 1926 }, { "epoch": 1.9886480908152735, "grad_norm": 14.61958855274096, "learning_rate": 3.0865828381745515e-06, "loss": 1.6544055938720703, "step": 1927 }, { "epoch": 1.9896800825593395, "grad_norm": 25.232339435869783, "learning_rate": 3.081036717066938e-06, "loss": 2.1010539531707764, "step": 1928 }, { "epoch": 1.9907120743034055, "grad_norm": 16.073894486944297, "learning_rate": 3.075493363480032e-06, "loss": 1.3920762538909912, "step": 1929 }, { "epoch": 1.9917440660474717, "grad_norm": 10.095724298366525, "learning_rate": 3.0699527854084338e-06, "loss": 1.5464189052581787, "step": 1930 }, { "epoch": 1.9927760577915377, "grad_norm": 9.52645488696497, "learning_rate": 3.064414990842742e-06, "loss": 1.5836119651794434, "step": 1931 }, { "epoch": 1.9938080495356036, "grad_norm": 15.194079441704625, "learning_rate": 3.0588799877695375e-06, "loss": 1.3523647785186768, "step": 1932 }, { "epoch": 1.9948400412796699, "grad_norm": 17.3143929301106, "learning_rate": 3.0533477841713833e-06, "loss": 1.4108376502990723, "step": 1933 }, { "epoch": 1.9958720330237358, "grad_norm": 13.667781813150206, "learning_rate": 3.047818388026797e-06, "loss": 1.1967389583587646, "step": 1934 }, { "epoch": 1.9969040247678018, "grad_norm": 8.305388590205636, "learning_rate": 3.0422918073102505e-06, "loss": 0.9703651070594788, "step": 1935 }, { "epoch": 1.997936016511868, "grad_norm": 11.508512870175016, "learning_rate": 3.036768049992157e-06, "loss": 1.4309837818145752, "step": 1936 }, { "epoch": 1.9989680082559338, "grad_norm": 19.643208795588926, "learning_rate": 3.0312471240388523e-06, "loss": 1.8425827026367188, "step": 1937 }, { "epoch": 2.0, "grad_norm": 11.548933037588704, "learning_rate": 3.025729037412596e-06, "loss": 1.0835847854614258, "step": 1938 }, { "epoch": 2.001031991744066, "grad_norm": 7.610702897217233, "learning_rate": 3.020213798071546e-06, "loss": 0.5837053656578064, "step": 1939 }, { "epoch": 2.002063983488132, "grad_norm": 11.266433976578904, "learning_rate": 3.0147014139697596e-06, "loss": 0.7347603440284729, "step": 1940 }, { "epoch": 2.003095975232198, "grad_norm": 9.880288129649191, "learning_rate": 3.0091918930571735e-06, "loss": 0.4960901737213135, "step": 1941 }, { "epoch": 2.0041279669762644, "grad_norm": 9.582387875341349, "learning_rate": 3.0036852432795925e-06, "loss": 0.721099853515625, "step": 1942 }, { "epoch": 2.00515995872033, "grad_norm": 7.546639419003511, "learning_rate": 2.998181472578686e-06, "loss": 0.45264309644699097, "step": 1943 }, { "epoch": 2.0061919504643964, "grad_norm": 11.8161746256189, "learning_rate": 2.99268058889197e-06, "loss": 0.527152419090271, "step": 1944 }, { "epoch": 2.007223942208462, "grad_norm": 11.263796030627102, "learning_rate": 2.987182600152794e-06, "loss": 0.8129073977470398, "step": 1945 }, { "epoch": 2.0082559339525283, "grad_norm": 9.663767619188024, "learning_rate": 2.981687514290334e-06, "loss": 0.5236397981643677, "step": 1946 }, { "epoch": 2.0092879256965945, "grad_norm": 15.543270477389466, "learning_rate": 2.9761953392295795e-06, "loss": 0.4407860040664673, "step": 1947 }, { "epoch": 2.0103199174406603, "grad_norm": 8.862278960252826, "learning_rate": 2.9707060828913226e-06, "loss": 0.9539024829864502, "step": 1948 }, { "epoch": 2.0113519091847265, "grad_norm": 17.028877179665482, "learning_rate": 2.9652197531921443e-06, "loss": 0.3500378131866455, "step": 1949 }, { "epoch": 2.0123839009287927, "grad_norm": 10.583949836368962, "learning_rate": 2.959736358044409e-06, "loss": 0.6622694730758667, "step": 1950 }, { "epoch": 2.0134158926728585, "grad_norm": 11.500190396469042, "learning_rate": 2.954255905356242e-06, "loss": 0.7597059607505798, "step": 1951 }, { "epoch": 2.0144478844169247, "grad_norm": 12.346486958403212, "learning_rate": 2.9487784030315297e-06, "loss": 0.7360637187957764, "step": 1952 }, { "epoch": 2.015479876160991, "grad_norm": 13.827330375019702, "learning_rate": 2.943303858969905e-06, "loss": 0.5327243804931641, "step": 1953 }, { "epoch": 2.0165118679050567, "grad_norm": 11.735084359293525, "learning_rate": 2.937832281066731e-06, "loss": 0.8617746829986572, "step": 1954 }, { "epoch": 2.017543859649123, "grad_norm": 12.041591232803045, "learning_rate": 2.9323636772130948e-06, "loss": 0.2598215639591217, "step": 1955 }, { "epoch": 2.018575851393189, "grad_norm": 9.62893293691884, "learning_rate": 2.9268980552957917e-06, "loss": 0.42265596985816956, "step": 1956 }, { "epoch": 2.019607843137255, "grad_norm": 15.03567444823228, "learning_rate": 2.921435423197321e-06, "loss": 0.8368219137191772, "step": 1957 }, { "epoch": 2.020639834881321, "grad_norm": 13.460049532005664, "learning_rate": 2.915975788795864e-06, "loss": 0.48208919167518616, "step": 1958 }, { "epoch": 2.021671826625387, "grad_norm": 15.191495719958873, "learning_rate": 2.910519159965288e-06, "loss": 0.7324795722961426, "step": 1959 }, { "epoch": 2.022703818369453, "grad_norm": 16.961021228805286, "learning_rate": 2.905065544575114e-06, "loss": 0.2796049118041992, "step": 1960 }, { "epoch": 2.023735810113519, "grad_norm": 10.869212843674253, "learning_rate": 2.8996149504905235e-06, "loss": 0.573403000831604, "step": 1961 }, { "epoch": 2.024767801857585, "grad_norm": 9.624391087664794, "learning_rate": 2.894167385572344e-06, "loss": 0.32885146141052246, "step": 1962 }, { "epoch": 2.025799793601651, "grad_norm": 11.72752318773145, "learning_rate": 2.8887228576770302e-06, "loss": 0.6362261772155762, "step": 1963 }, { "epoch": 2.0268317853457174, "grad_norm": 10.330774917455367, "learning_rate": 2.8832813746566546e-06, "loss": 0.5546295642852783, "step": 1964 }, { "epoch": 2.027863777089783, "grad_norm": 16.016020234822413, "learning_rate": 2.8778429443589007e-06, "loss": 1.2917468547821045, "step": 1965 }, { "epoch": 2.0288957688338494, "grad_norm": 16.38153593326629, "learning_rate": 2.8724075746270513e-06, "loss": 0.5093211531639099, "step": 1966 }, { "epoch": 2.0299277605779156, "grad_norm": 10.512995345727335, "learning_rate": 2.8669752732999736e-06, "loss": 0.6507851481437683, "step": 1967 }, { "epoch": 2.0309597523219813, "grad_norm": 9.787277460258979, "learning_rate": 2.86154604821211e-06, "loss": 0.5437256097793579, "step": 1968 }, { "epoch": 2.0319917440660475, "grad_norm": 10.197545673598418, "learning_rate": 2.856119907193463e-06, "loss": 0.4393252432346344, "step": 1969 }, { "epoch": 2.0330237358101133, "grad_norm": 11.060147206827484, "learning_rate": 2.8506968580695926e-06, "loss": 0.5344643592834473, "step": 1970 }, { "epoch": 2.0340557275541795, "grad_norm": 10.273833742280798, "learning_rate": 2.8452769086615943e-06, "loss": 0.47633272409439087, "step": 1971 }, { "epoch": 2.0350877192982457, "grad_norm": 11.053008212226397, "learning_rate": 2.8398600667861032e-06, "loss": 0.9062159657478333, "step": 1972 }, { "epoch": 2.0361197110423115, "grad_norm": 10.355456409344027, "learning_rate": 2.834446340255258e-06, "loss": 0.45710867643356323, "step": 1973 }, { "epoch": 2.0371517027863777, "grad_norm": 8.151931898870096, "learning_rate": 2.829035736876715e-06, "loss": 0.40623173117637634, "step": 1974 }, { "epoch": 2.038183694530444, "grad_norm": 7.952996765570959, "learning_rate": 2.823628264453625e-06, "loss": 0.21306441724300385, "step": 1975 }, { "epoch": 2.0392156862745097, "grad_norm": 9.880121410681433, "learning_rate": 2.8182239307846195e-06, "loss": 0.6863006353378296, "step": 1976 }, { "epoch": 2.040247678018576, "grad_norm": 8.176021094360394, "learning_rate": 2.812822743663808e-06, "loss": 0.3915032148361206, "step": 1977 }, { "epoch": 2.041279669762642, "grad_norm": 13.471316196705478, "learning_rate": 2.8074247108807568e-06, "loss": 0.9592699408531189, "step": 1978 }, { "epoch": 2.042311661506708, "grad_norm": 7.656361104421015, "learning_rate": 2.802029840220487e-06, "loss": 0.2343856692314148, "step": 1979 }, { "epoch": 2.043343653250774, "grad_norm": 8.6987171546621, "learning_rate": 2.796638139463456e-06, "loss": 0.35938137769699097, "step": 1980 }, { "epoch": 2.0443756449948403, "grad_norm": 7.906012051058154, "learning_rate": 2.7912496163855563e-06, "loss": 0.6258907914161682, "step": 1981 }, { "epoch": 2.045407636738906, "grad_norm": 11.36063073025996, "learning_rate": 2.7858642787580937e-06, "loss": 1.2554874420166016, "step": 1982 }, { "epoch": 2.0464396284829722, "grad_norm": 6.584040487023417, "learning_rate": 2.780482134347774e-06, "loss": 0.3977857828140259, "step": 1983 }, { "epoch": 2.047471620227038, "grad_norm": 9.79108752002825, "learning_rate": 2.7751031909167046e-06, "loss": 0.3324020206928253, "step": 1984 }, { "epoch": 2.048503611971104, "grad_norm": 11.132342107747727, "learning_rate": 2.7697274562223762e-06, "loss": 0.48161542415618896, "step": 1985 }, { "epoch": 2.0495356037151704, "grad_norm": 7.706079328113716, "learning_rate": 2.764354938017651e-06, "loss": 0.445020854473114, "step": 1986 }, { "epoch": 2.050567595459236, "grad_norm": 13.318361101046666, "learning_rate": 2.7589856440507523e-06, "loss": 0.667015790939331, "step": 1987 }, { "epoch": 2.0515995872033024, "grad_norm": 9.204842314118537, "learning_rate": 2.7536195820652506e-06, "loss": 0.46111762523651123, "step": 1988 }, { "epoch": 2.0526315789473686, "grad_norm": 11.196672429674683, "learning_rate": 2.7482567598000586e-06, "loss": 0.25720328092575073, "step": 1989 }, { "epoch": 2.0536635706914343, "grad_norm": 7.289064766870941, "learning_rate": 2.742897184989414e-06, "loss": 0.33461254835128784, "step": 1990 }, { "epoch": 2.0546955624355006, "grad_norm": 7.993905358399321, "learning_rate": 2.73754086536288e-06, "loss": 0.35615432262420654, "step": 1991 }, { "epoch": 2.0557275541795668, "grad_norm": 7.165879335402671, "learning_rate": 2.73218780864531e-06, "loss": 0.32950523495674133, "step": 1992 }, { "epoch": 2.0567595459236325, "grad_norm": 7.746768635857653, "learning_rate": 2.7268380225568635e-06, "loss": 0.28539013862609863, "step": 1993 }, { "epoch": 2.0577915376676987, "grad_norm": 10.27642457032376, "learning_rate": 2.721491514812979e-06, "loss": 0.31158798933029175, "step": 1994 }, { "epoch": 2.0588235294117645, "grad_norm": 12.72944670746384, "learning_rate": 2.7161482931243668e-06, "loss": 0.4267995357513428, "step": 1995 }, { "epoch": 2.0598555211558307, "grad_norm": 8.525124260723349, "learning_rate": 2.710808365197e-06, "loss": 0.20493009686470032, "step": 1996 }, { "epoch": 2.060887512899897, "grad_norm": 12.474481030332862, "learning_rate": 2.7054717387320973e-06, "loss": 0.4201287031173706, "step": 1997 }, { "epoch": 2.0619195046439627, "grad_norm": 8.33891289384112, "learning_rate": 2.7001384214261204e-06, "loss": 0.4269750714302063, "step": 1998 }, { "epoch": 2.062951496388029, "grad_norm": 9.30474416961196, "learning_rate": 2.6948084209707566e-06, "loss": 0.22076722979545593, "step": 1999 }, { "epoch": 2.063983488132095, "grad_norm": 10.100526322224276, "learning_rate": 2.689481745052908e-06, "loss": 0.42582884430885315, "step": 2000 }, { "epoch": 2.065015479876161, "grad_norm": 16.06469621263434, "learning_rate": 2.6841584013546894e-06, "loss": 0.6067658066749573, "step": 2001 }, { "epoch": 2.066047471620227, "grad_norm": 14.33291742560144, "learning_rate": 2.678838397553399e-06, "loss": 0.751406729221344, "step": 2002 }, { "epoch": 2.0670794633642933, "grad_norm": 8.332923727656144, "learning_rate": 2.673521741321527e-06, "loss": 0.383941650390625, "step": 2003 }, { "epoch": 2.068111455108359, "grad_norm": 14.255250305964788, "learning_rate": 2.6682084403267305e-06, "loss": 0.3727511167526245, "step": 2004 }, { "epoch": 2.0691434468524252, "grad_norm": 12.660218738337791, "learning_rate": 2.662898502231831e-06, "loss": 0.3918401002883911, "step": 2005 }, { "epoch": 2.0701754385964914, "grad_norm": 10.43156888417619, "learning_rate": 2.6575919346948e-06, "loss": 0.2653726637363434, "step": 2006 }, { "epoch": 2.071207430340557, "grad_norm": 11.433483427690277, "learning_rate": 2.652288745368743e-06, "loss": 0.899912416934967, "step": 2007 }, { "epoch": 2.0722394220846234, "grad_norm": 11.360721893826966, "learning_rate": 2.6469889419018985e-06, "loss": 0.11928503215312958, "step": 2008 }, { "epoch": 2.073271413828689, "grad_norm": 11.605801479680045, "learning_rate": 2.6416925319376195e-06, "loss": 0.3207067847251892, "step": 2009 }, { "epoch": 2.0743034055727554, "grad_norm": 14.412850460880513, "learning_rate": 2.6363995231143714e-06, "loss": 0.6017872095108032, "step": 2010 }, { "epoch": 2.0753353973168216, "grad_norm": 11.631141232534024, "learning_rate": 2.6311099230657033e-06, "loss": 0.5595146417617798, "step": 2011 }, { "epoch": 2.0763673890608874, "grad_norm": 9.660589581892381, "learning_rate": 2.6258237394202556e-06, "loss": 0.7578150033950806, "step": 2012 }, { "epoch": 2.0773993808049536, "grad_norm": 10.56875579356723, "learning_rate": 2.6205409798017407e-06, "loss": 0.2807178795337677, "step": 2013 }, { "epoch": 2.0784313725490198, "grad_norm": 11.182376725264405, "learning_rate": 2.6152616518289307e-06, "loss": 0.36187225580215454, "step": 2014 }, { "epoch": 2.0794633642930855, "grad_norm": 10.450314187269546, "learning_rate": 2.6099857631156544e-06, "loss": 1.092901349067688, "step": 2015 }, { "epoch": 2.0804953560371517, "grad_norm": 21.10493659862159, "learning_rate": 2.60471332127077e-06, "loss": 0.5096402764320374, "step": 2016 }, { "epoch": 2.081527347781218, "grad_norm": 8.046419730983052, "learning_rate": 2.5994443338981732e-06, "loss": 0.27640849351882935, "step": 2017 }, { "epoch": 2.0825593395252837, "grad_norm": 16.38030602904602, "learning_rate": 2.5941788085967757e-06, "loss": 0.5682850480079651, "step": 2018 }, { "epoch": 2.08359133126935, "grad_norm": 8.53582038116658, "learning_rate": 2.5889167529604952e-06, "loss": 0.27553099393844604, "step": 2019 }, { "epoch": 2.0846233230134157, "grad_norm": 11.37774393999609, "learning_rate": 2.5836581745782474e-06, "loss": 0.3816848695278168, "step": 2020 }, { "epoch": 2.085655314757482, "grad_norm": 7.72441522627485, "learning_rate": 2.57840308103393e-06, "loss": 0.4121520519256592, "step": 2021 }, { "epoch": 2.086687306501548, "grad_norm": 13.081335997853959, "learning_rate": 2.573151479906417e-06, "loss": 0.2650455832481384, "step": 2022 }, { "epoch": 2.087719298245614, "grad_norm": 12.924865840532487, "learning_rate": 2.5679033787695457e-06, "loss": 0.5901508331298828, "step": 2023 }, { "epoch": 2.08875128998968, "grad_norm": 11.10572686553205, "learning_rate": 2.5626587851921053e-06, "loss": 0.23889164626598358, "step": 2024 }, { "epoch": 2.0897832817337463, "grad_norm": 14.331827592304306, "learning_rate": 2.557417706737828e-06, "loss": 0.540855884552002, "step": 2025 }, { "epoch": 2.090815273477812, "grad_norm": 10.452283403325024, "learning_rate": 2.552180150965372e-06, "loss": 0.495063841342926, "step": 2026 }, { "epoch": 2.0918472652218782, "grad_norm": 11.627502238168367, "learning_rate": 2.5469461254283188e-06, "loss": 0.36154189705848694, "step": 2027 }, { "epoch": 2.0928792569659445, "grad_norm": 9.678570676226414, "learning_rate": 2.541715637675156e-06, "loss": 0.23651531338691711, "step": 2028 }, { "epoch": 2.09391124871001, "grad_norm": 13.55207712153128, "learning_rate": 2.5364886952492775e-06, "loss": 0.40609103441238403, "step": 2029 }, { "epoch": 2.0949432404540764, "grad_norm": 9.821258605592359, "learning_rate": 2.531265305688951e-06, "loss": 0.2444392591714859, "step": 2030 }, { "epoch": 2.0959752321981426, "grad_norm": 16.229633508796756, "learning_rate": 2.526045476527329e-06, "loss": 0.4760296046733856, "step": 2031 }, { "epoch": 2.0970072239422084, "grad_norm": 10.244670958345468, "learning_rate": 2.520829215292426e-06, "loss": 0.3381337523460388, "step": 2032 }, { "epoch": 2.0980392156862746, "grad_norm": 19.68762391684102, "learning_rate": 2.5156165295071134e-06, "loss": 0.48230743408203125, "step": 2033 }, { "epoch": 2.0990712074303404, "grad_norm": 8.34117887968612, "learning_rate": 2.5104074266891055e-06, "loss": 0.4035602807998657, "step": 2034 }, { "epoch": 2.1001031991744066, "grad_norm": 8.272915623942248, "learning_rate": 2.5052019143509454e-06, "loss": 0.3126871883869171, "step": 2035 }, { "epoch": 2.101135190918473, "grad_norm": 9.047040893287747, "learning_rate": 2.5000000000000015e-06, "loss": 0.5362358093261719, "step": 2036 }, { "epoch": 2.1021671826625385, "grad_norm": 11.128227003602051, "learning_rate": 2.494801691138453e-06, "loss": 0.2896101176738739, "step": 2037 }, { "epoch": 2.1031991744066048, "grad_norm": 9.20607292832876, "learning_rate": 2.489606995263279e-06, "loss": 0.2587195336818695, "step": 2038 }, { "epoch": 2.104231166150671, "grad_norm": 10.749347258860894, "learning_rate": 2.484415919866248e-06, "loss": 0.477316290140152, "step": 2039 }, { "epoch": 2.1052631578947367, "grad_norm": 9.253270074434505, "learning_rate": 2.4792284724339077e-06, "loss": 0.30313533544540405, "step": 2040 }, { "epoch": 2.106295149638803, "grad_norm": 10.419790311969663, "learning_rate": 2.4740446604475715e-06, "loss": 0.3734855055809021, "step": 2041 }, { "epoch": 2.107327141382869, "grad_norm": 9.075436281088471, "learning_rate": 2.468864491383311e-06, "loss": 0.3065953850746155, "step": 2042 }, { "epoch": 2.108359133126935, "grad_norm": 9.09576839076326, "learning_rate": 2.463687972711945e-06, "loss": 0.7746214270591736, "step": 2043 }, { "epoch": 2.109391124871001, "grad_norm": 13.341558733150194, "learning_rate": 2.4585151118990286e-06, "loss": 0.41721075773239136, "step": 2044 }, { "epoch": 2.110423116615067, "grad_norm": 10.442466435447885, "learning_rate": 2.453345916404835e-06, "loss": 0.4096275269985199, "step": 2045 }, { "epoch": 2.111455108359133, "grad_norm": 8.206956814870292, "learning_rate": 2.4481803936843586e-06, "loss": 0.1956387758255005, "step": 2046 }, { "epoch": 2.1124871001031993, "grad_norm": 9.703779186684427, "learning_rate": 2.4430185511872944e-06, "loss": 0.7298110723495483, "step": 2047 }, { "epoch": 2.113519091847265, "grad_norm": 8.37066996055875, "learning_rate": 2.4378603963580293e-06, "loss": 0.2986023724079132, "step": 2048 }, { "epoch": 2.1145510835913313, "grad_norm": 9.125753074947662, "learning_rate": 2.4327059366356325e-06, "loss": 0.253151535987854, "step": 2049 }, { "epoch": 2.1155830753353975, "grad_norm": 9.022273371401743, "learning_rate": 2.4275551794538445e-06, "loss": 0.3628351092338562, "step": 2050 }, { "epoch": 2.1166150670794632, "grad_norm": 13.284143093222118, "learning_rate": 2.422408132241065e-06, "loss": 0.48799097537994385, "step": 2051 }, { "epoch": 2.1176470588235294, "grad_norm": 47.35592241039155, "learning_rate": 2.417264802420343e-06, "loss": 1.0655995607376099, "step": 2052 }, { "epoch": 2.1186790505675956, "grad_norm": 9.004160541592759, "learning_rate": 2.4121251974093706e-06, "loss": 0.26724356412887573, "step": 2053 }, { "epoch": 2.1197110423116614, "grad_norm": 10.765443314103234, "learning_rate": 2.406989324620459e-06, "loss": 0.23049749433994293, "step": 2054 }, { "epoch": 2.1207430340557276, "grad_norm": 8.958513741945511, "learning_rate": 2.401857191460544e-06, "loss": 0.3267396092414856, "step": 2055 }, { "epoch": 2.121775025799794, "grad_norm": 11.180547607555656, "learning_rate": 2.396728805331167e-06, "loss": 0.5201942920684814, "step": 2056 }, { "epoch": 2.1228070175438596, "grad_norm": 9.6878631954348, "learning_rate": 2.391604173628465e-06, "loss": 0.38582032918930054, "step": 2057 }, { "epoch": 2.123839009287926, "grad_norm": 9.627683093068656, "learning_rate": 2.3864833037431596e-06, "loss": 0.3799446225166321, "step": 2058 }, { "epoch": 2.1248710010319916, "grad_norm": 14.077616949596758, "learning_rate": 2.3813662030605473e-06, "loss": 0.3688233494758606, "step": 2059 }, { "epoch": 2.1259029927760578, "grad_norm": 8.238448213968562, "learning_rate": 2.3762528789604887e-06, "loss": 0.2844564616680145, "step": 2060 }, { "epoch": 2.126934984520124, "grad_norm": 14.778310625808261, "learning_rate": 2.3711433388174e-06, "loss": 0.9820846915245056, "step": 2061 }, { "epoch": 2.1279669762641897, "grad_norm": 7.038877981529248, "learning_rate": 2.3660375900002364e-06, "loss": 0.23352807760238647, "step": 2062 }, { "epoch": 2.128998968008256, "grad_norm": 8.801565337485316, "learning_rate": 2.3609356398724896e-06, "loss": 0.47995999455451965, "step": 2063 }, { "epoch": 2.130030959752322, "grad_norm": 6.5514911977888906, "learning_rate": 2.3558374957921678e-06, "loss": 0.1579141765832901, "step": 2064 }, { "epoch": 2.131062951496388, "grad_norm": 13.345001457721189, "learning_rate": 2.350743165111793e-06, "loss": 0.7573691606521606, "step": 2065 }, { "epoch": 2.132094943240454, "grad_norm": 8.851065098558594, "learning_rate": 2.3456526551783874e-06, "loss": 0.32301467657089233, "step": 2066 }, { "epoch": 2.1331269349845203, "grad_norm": 8.579398770461948, "learning_rate": 2.340565973333464e-06, "loss": 0.38894182443618774, "step": 2067 }, { "epoch": 2.134158926728586, "grad_norm": 10.964225902857754, "learning_rate": 2.3354831269130133e-06, "loss": 0.4255586564540863, "step": 2068 }, { "epoch": 2.1351909184726523, "grad_norm": 7.552562151337086, "learning_rate": 2.330404123247495e-06, "loss": 0.24562402069568634, "step": 2069 }, { "epoch": 2.136222910216718, "grad_norm": 17.04563208493501, "learning_rate": 2.3253289696618257e-06, "loss": 0.3281315565109253, "step": 2070 }, { "epoch": 2.1372549019607843, "grad_norm": 10.631771131445332, "learning_rate": 2.320257673475371e-06, "loss": 1.191027283668518, "step": 2071 }, { "epoch": 2.1382868937048505, "grad_norm": 10.709973973674364, "learning_rate": 2.3151902420019357e-06, "loss": 0.6106898784637451, "step": 2072 }, { "epoch": 2.1393188854489162, "grad_norm": 11.71516331609504, "learning_rate": 2.310126682549742e-06, "loss": 0.7854446172714233, "step": 2073 }, { "epoch": 2.1403508771929824, "grad_norm": 10.26037239508922, "learning_rate": 2.305067002421438e-06, "loss": 0.441925972700119, "step": 2074 }, { "epoch": 2.1413828689370487, "grad_norm": 11.886166662674606, "learning_rate": 2.300011208914071e-06, "loss": 0.4592568278312683, "step": 2075 }, { "epoch": 2.1424148606811144, "grad_norm": 10.504523851406987, "learning_rate": 2.2949593093190863e-06, "loss": 0.3115933835506439, "step": 2076 }, { "epoch": 2.1434468524251806, "grad_norm": 13.024770233030189, "learning_rate": 2.2899113109223113e-06, "loss": 0.40064454078674316, "step": 2077 }, { "epoch": 2.144478844169247, "grad_norm": 17.22807437979318, "learning_rate": 2.2848672210039484e-06, "loss": 0.6171914935112, "step": 2078 }, { "epoch": 2.1455108359133126, "grad_norm": 6.25175917101113, "learning_rate": 2.279827046838562e-06, "loss": 0.3618736267089844, "step": 2079 }, { "epoch": 2.146542827657379, "grad_norm": 7.078420574869955, "learning_rate": 2.274790795695071e-06, "loss": 0.43610116839408875, "step": 2080 }, { "epoch": 2.147574819401445, "grad_norm": 11.264346934071357, "learning_rate": 2.269758474836734e-06, "loss": 0.8911107778549194, "step": 2081 }, { "epoch": 2.1486068111455108, "grad_norm": 15.930742320989614, "learning_rate": 2.264730091521146e-06, "loss": 0.48314476013183594, "step": 2082 }, { "epoch": 2.149638802889577, "grad_norm": 8.171698428610899, "learning_rate": 2.259705653000216e-06, "loss": 0.25453081727027893, "step": 2083 }, { "epoch": 2.1506707946336427, "grad_norm": 7.6375827588902965, "learning_rate": 2.2546851665201692e-06, "loss": 0.1955549716949463, "step": 2084 }, { "epoch": 2.151702786377709, "grad_norm": 7.383691170416679, "learning_rate": 2.24966863932153e-06, "loss": 0.1405426263809204, "step": 2085 }, { "epoch": 2.152734778121775, "grad_norm": 8.801702748150783, "learning_rate": 2.2446560786391135e-06, "loss": 0.8943809270858765, "step": 2086 }, { "epoch": 2.153766769865841, "grad_norm": 7.145573332227521, "learning_rate": 2.239647491702013e-06, "loss": 0.35383838415145874, "step": 2087 }, { "epoch": 2.154798761609907, "grad_norm": 12.765906885545176, "learning_rate": 2.2346428857335904e-06, "loss": 0.7786715030670166, "step": 2088 }, { "epoch": 2.1558307533539733, "grad_norm": 9.688973922110497, "learning_rate": 2.2296422679514686e-06, "loss": 0.39823275804519653, "step": 2089 }, { "epoch": 2.156862745098039, "grad_norm": 12.736295425673598, "learning_rate": 2.224645645567517e-06, "loss": 0.9067201018333435, "step": 2090 }, { "epoch": 2.1578947368421053, "grad_norm": 11.046850636454053, "learning_rate": 2.219653025787844e-06, "loss": 0.38563841581344604, "step": 2091 }, { "epoch": 2.1589267285861715, "grad_norm": 12.670959912418832, "learning_rate": 2.2146644158127827e-06, "loss": 0.44574642181396484, "step": 2092 }, { "epoch": 2.1599587203302373, "grad_norm": 11.52877149882546, "learning_rate": 2.209679822836886e-06, "loss": 0.7529317140579224, "step": 2093 }, { "epoch": 2.1609907120743035, "grad_norm": 13.481079102148628, "learning_rate": 2.2046992540489126e-06, "loss": 0.28910043835639954, "step": 2094 }, { "epoch": 2.1620227038183693, "grad_norm": 10.545525658517882, "learning_rate": 2.199722716631818e-06, "loss": 0.5858748555183411, "step": 2095 }, { "epoch": 2.1630546955624355, "grad_norm": 13.286208111212561, "learning_rate": 2.1947502177627437e-06, "loss": 0.37049344182014465, "step": 2096 }, { "epoch": 2.1640866873065017, "grad_norm": 13.777848367998493, "learning_rate": 2.189781764613006e-06, "loss": 0.4377231299877167, "step": 2097 }, { "epoch": 2.1651186790505674, "grad_norm": 9.496773578931737, "learning_rate": 2.1848173643480875e-06, "loss": 0.5167649388313293, "step": 2098 }, { "epoch": 2.1661506707946336, "grad_norm": 15.849745603382729, "learning_rate": 2.179857024127624e-06, "loss": 0.5104716420173645, "step": 2099 }, { "epoch": 2.1671826625387, "grad_norm": 12.76752296827376, "learning_rate": 2.1749007511054005e-06, "loss": 0.39890056848526, "step": 2100 }, { "epoch": 2.1682146542827656, "grad_norm": 8.773963974580964, "learning_rate": 2.1699485524293285e-06, "loss": 0.4501742124557495, "step": 2101 }, { "epoch": 2.169246646026832, "grad_norm": 13.173250814161523, "learning_rate": 2.1650004352414493e-06, "loss": 0.555133044719696, "step": 2102 }, { "epoch": 2.170278637770898, "grad_norm": 11.020899187251302, "learning_rate": 2.1600564066779172e-06, "loss": 0.38730090856552124, "step": 2103 }, { "epoch": 2.171310629514964, "grad_norm": 11.130112411783447, "learning_rate": 2.1551164738689896e-06, "loss": 0.5620251893997192, "step": 2104 }, { "epoch": 2.17234262125903, "grad_norm": 11.614057941800839, "learning_rate": 2.1501806439390156e-06, "loss": 0.41809794306755066, "step": 2105 }, { "epoch": 2.173374613003096, "grad_norm": 12.650393566481371, "learning_rate": 2.1452489240064284e-06, "loss": 0.6263318061828613, "step": 2106 }, { "epoch": 2.174406604747162, "grad_norm": 12.3807562566126, "learning_rate": 2.1403213211837344e-06, "loss": 0.5498677492141724, "step": 2107 }, { "epoch": 2.175438596491228, "grad_norm": 8.76125024687879, "learning_rate": 2.1353978425775006e-06, "loss": 0.3538694977760315, "step": 2108 }, { "epoch": 2.176470588235294, "grad_norm": 15.819502963317236, "learning_rate": 2.130478495288347e-06, "loss": 0.7942072153091431, "step": 2109 }, { "epoch": 2.17750257997936, "grad_norm": 9.908160370261319, "learning_rate": 2.125563286410938e-06, "loss": 0.47650521993637085, "step": 2110 }, { "epoch": 2.1785345717234263, "grad_norm": 14.55609857912769, "learning_rate": 2.1206522230339634e-06, "loss": 0.42236289381980896, "step": 2111 }, { "epoch": 2.179566563467492, "grad_norm": 17.012814649988037, "learning_rate": 2.1157453122401385e-06, "loss": 1.215108036994934, "step": 2112 }, { "epoch": 2.1805985552115583, "grad_norm": 15.556015184879993, "learning_rate": 2.11084256110619e-06, "loss": 0.7476073503494263, "step": 2113 }, { "epoch": 2.1816305469556245, "grad_norm": 12.208046332164217, "learning_rate": 2.105943976702845e-06, "loss": 0.30959609150886536, "step": 2114 }, { "epoch": 2.1826625386996903, "grad_norm": 11.309562486851062, "learning_rate": 2.1010495660948206e-06, "loss": 0.6432572603225708, "step": 2115 }, { "epoch": 2.1836945304437565, "grad_norm": 11.565800824630927, "learning_rate": 2.0961593363408154e-06, "loss": 1.2213436365127563, "step": 2116 }, { "epoch": 2.1847265221878223, "grad_norm": 10.806743127598923, "learning_rate": 2.0912732944934984e-06, "loss": 0.2402806282043457, "step": 2117 }, { "epoch": 2.1857585139318885, "grad_norm": 15.266326681062148, "learning_rate": 2.0863914475994974e-06, "loss": 0.7473478317260742, "step": 2118 }, { "epoch": 2.1867905056759547, "grad_norm": 11.85831662157018, "learning_rate": 2.081513802699394e-06, "loss": 1.5603134632110596, "step": 2119 }, { "epoch": 2.1878224974200204, "grad_norm": 12.610129105672378, "learning_rate": 2.076640366827703e-06, "loss": 0.5242027044296265, "step": 2120 }, { "epoch": 2.1888544891640866, "grad_norm": 7.985787003390586, "learning_rate": 2.0717711470128747e-06, "loss": 0.22941672801971436, "step": 2121 }, { "epoch": 2.189886480908153, "grad_norm": 18.4640777229605, "learning_rate": 2.0669061502772776e-06, "loss": 0.5810708999633789, "step": 2122 }, { "epoch": 2.1909184726522186, "grad_norm": 14.977477850492251, "learning_rate": 2.0620453836371885e-06, "loss": 0.4758716821670532, "step": 2123 }, { "epoch": 2.191950464396285, "grad_norm": 8.226507927253131, "learning_rate": 2.0571888541027857e-06, "loss": 0.3519587218761444, "step": 2124 }, { "epoch": 2.192982456140351, "grad_norm": 8.9390473817918, "learning_rate": 2.0523365686781345e-06, "loss": 0.4949192702770233, "step": 2125 }, { "epoch": 2.194014447884417, "grad_norm": 11.399187864137117, "learning_rate": 2.0474885343611804e-06, "loss": 0.33248478174209595, "step": 2126 }, { "epoch": 2.195046439628483, "grad_norm": 9.581866135820988, "learning_rate": 2.0426447581437386e-06, "loss": 0.3073532283306122, "step": 2127 }, { "epoch": 2.196078431372549, "grad_norm": 13.688929287126498, "learning_rate": 2.0378052470114822e-06, "loss": 0.5941171646118164, "step": 2128 }, { "epoch": 2.197110423116615, "grad_norm": 10.344955312103714, "learning_rate": 2.032970007943935e-06, "loss": 0.42333728075027466, "step": 2129 }, { "epoch": 2.198142414860681, "grad_norm": 11.410027161328824, "learning_rate": 2.028139047914456e-06, "loss": 0.45369571447372437, "step": 2130 }, { "epoch": 2.1991744066047474, "grad_norm": 9.717065128794871, "learning_rate": 2.0233123738902355e-06, "loss": 0.2441929429769516, "step": 2131 }, { "epoch": 2.200206398348813, "grad_norm": 10.775810333693101, "learning_rate": 2.018489992832283e-06, "loss": 0.47238659858703613, "step": 2132 }, { "epoch": 2.2012383900928794, "grad_norm": 10.077291541682422, "learning_rate": 2.0136719116954168e-06, "loss": 0.2882574200630188, "step": 2133 }, { "epoch": 2.202270381836945, "grad_norm": 8.125021055807146, "learning_rate": 2.0088581374282514e-06, "loss": 0.5156424045562744, "step": 2134 }, { "epoch": 2.2033023735810113, "grad_norm": 8.818725252024239, "learning_rate": 2.0040486769731928e-06, "loss": 0.23826254904270172, "step": 2135 }, { "epoch": 2.2043343653250775, "grad_norm": 8.018001950456375, "learning_rate": 1.999243537266424e-06, "loss": 0.4738239049911499, "step": 2136 }, { "epoch": 2.2053663570691433, "grad_norm": 10.2654264334262, "learning_rate": 1.994442725237898e-06, "loss": 0.34067076444625854, "step": 2137 }, { "epoch": 2.2063983488132095, "grad_norm": 10.726847895363598, "learning_rate": 1.989646247811326e-06, "loss": 0.4780583381652832, "step": 2138 }, { "epoch": 2.2074303405572757, "grad_norm": 11.005879210645626, "learning_rate": 1.9848541119041644e-06, "loss": 0.4559696912765503, "step": 2139 }, { "epoch": 2.2084623323013415, "grad_norm": 11.221008336553952, "learning_rate": 1.980066324427613e-06, "loss": 0.5784523487091064, "step": 2140 }, { "epoch": 2.2094943240454077, "grad_norm": 11.789084325482024, "learning_rate": 1.9752828922865993e-06, "loss": 0.6254563331604004, "step": 2141 }, { "epoch": 2.2105263157894735, "grad_norm": 9.819567138617387, "learning_rate": 1.9705038223797673e-06, "loss": 0.5376569628715515, "step": 2142 }, { "epoch": 2.2115583075335397, "grad_norm": 9.042544730500095, "learning_rate": 1.965729121599473e-06, "loss": 0.32408738136291504, "step": 2143 }, { "epoch": 2.212590299277606, "grad_norm": 10.077898140702857, "learning_rate": 1.960958796831769e-06, "loss": 0.2823505401611328, "step": 2144 }, { "epoch": 2.2136222910216716, "grad_norm": 7.705419094182372, "learning_rate": 1.956192854956397e-06, "loss": 0.2653104066848755, "step": 2145 }, { "epoch": 2.214654282765738, "grad_norm": 9.482777149614346, "learning_rate": 1.9514313028467783e-06, "loss": 0.4442105293273926, "step": 2146 }, { "epoch": 2.215686274509804, "grad_norm": 18.816500800240203, "learning_rate": 1.9466741473700033e-06, "loss": 0.7567444443702698, "step": 2147 }, { "epoch": 2.21671826625387, "grad_norm": 10.523644645810517, "learning_rate": 1.9419213953868236e-06, "loss": 0.6099028587341309, "step": 2148 }, { "epoch": 2.217750257997936, "grad_norm": 13.450744551287302, "learning_rate": 1.9371730537516344e-06, "loss": 0.6623318195343018, "step": 2149 }, { "epoch": 2.218782249742002, "grad_norm": 9.569934224545488, "learning_rate": 1.9324291293124747e-06, "loss": 0.47942644357681274, "step": 2150 }, { "epoch": 2.219814241486068, "grad_norm": 10.18178417543629, "learning_rate": 1.927689628911013e-06, "loss": 1.0906792879104614, "step": 2151 }, { "epoch": 2.220846233230134, "grad_norm": 13.166259873833667, "learning_rate": 1.9229545593825367e-06, "loss": 0.30344152450561523, "step": 2152 }, { "epoch": 2.2218782249742004, "grad_norm": 9.508415212815473, "learning_rate": 1.9182239275559443e-06, "loss": 0.4512457549571991, "step": 2153 }, { "epoch": 2.222910216718266, "grad_norm": 13.74278373206301, "learning_rate": 1.913497740253728e-06, "loss": 0.3366636633872986, "step": 2154 }, { "epoch": 2.2239422084623324, "grad_norm": 11.872675446972762, "learning_rate": 1.9087760042919808e-06, "loss": 0.2936578392982483, "step": 2155 }, { "epoch": 2.2249742002063986, "grad_norm": 9.914652472935117, "learning_rate": 1.9040587264803673e-06, "loss": 0.6060217618942261, "step": 2156 }, { "epoch": 2.2260061919504643, "grad_norm": 10.656783486055465, "learning_rate": 1.899345913622128e-06, "loss": 0.763049840927124, "step": 2157 }, { "epoch": 2.2270381836945305, "grad_norm": 12.604264061427578, "learning_rate": 1.8946375725140581e-06, "loss": 0.6956782937049866, "step": 2158 }, { "epoch": 2.2280701754385963, "grad_norm": 9.332473056549954, "learning_rate": 1.8899337099465092e-06, "loss": 0.690141499042511, "step": 2159 }, { "epoch": 2.2291021671826625, "grad_norm": 7.301853545257415, "learning_rate": 1.8852343327033717e-06, "loss": 0.23047930002212524, "step": 2160 }, { "epoch": 2.2301341589267287, "grad_norm": 9.85687211577048, "learning_rate": 1.8805394475620674e-06, "loss": 0.4893918037414551, "step": 2161 }, { "epoch": 2.2311661506707945, "grad_norm": 8.689634125858236, "learning_rate": 1.8758490612935398e-06, "loss": 0.447439968585968, "step": 2162 }, { "epoch": 2.2321981424148607, "grad_norm": 13.30896756432028, "learning_rate": 1.8711631806622443e-06, "loss": 0.48308444023132324, "step": 2163 }, { "epoch": 2.233230134158927, "grad_norm": 12.456869689142426, "learning_rate": 1.8664818124261375e-06, "loss": 0.3598782420158386, "step": 2164 }, { "epoch": 2.2342621259029927, "grad_norm": 9.21047384707439, "learning_rate": 1.8618049633366698e-06, "loss": 0.5054575800895691, "step": 2165 }, { "epoch": 2.235294117647059, "grad_norm": 7.834072364800285, "learning_rate": 1.8571326401387717e-06, "loss": 0.14533498883247375, "step": 2166 }, { "epoch": 2.2363261093911246, "grad_norm": 8.6530833971032, "learning_rate": 1.8524648495708514e-06, "loss": 0.661357045173645, "step": 2167 }, { "epoch": 2.237358101135191, "grad_norm": 11.19943394122764, "learning_rate": 1.8478015983647718e-06, "loss": 0.6174920797348022, "step": 2168 }, { "epoch": 2.238390092879257, "grad_norm": 10.366083307918593, "learning_rate": 1.8431428932458556e-06, "loss": 0.6961145997047424, "step": 2169 }, { "epoch": 2.239422084623323, "grad_norm": 16.282273584205967, "learning_rate": 1.8384887409328688e-06, "loss": 0.42053890228271484, "step": 2170 }, { "epoch": 2.240454076367389, "grad_norm": 11.270734561202865, "learning_rate": 1.8338391481380097e-06, "loss": 0.6044822335243225, "step": 2171 }, { "epoch": 2.2414860681114552, "grad_norm": 11.13863087178757, "learning_rate": 1.8291941215669024e-06, "loss": 0.17979833483695984, "step": 2172 }, { "epoch": 2.242518059855521, "grad_norm": 11.87594192768117, "learning_rate": 1.8245536679185793e-06, "loss": 0.6116322875022888, "step": 2173 }, { "epoch": 2.243550051599587, "grad_norm": 9.237256375764636, "learning_rate": 1.8199177938854895e-06, "loss": 0.44815587997436523, "step": 2174 }, { "epoch": 2.2445820433436534, "grad_norm": 6.584204256501777, "learning_rate": 1.8152865061534675e-06, "loss": 0.4116666615009308, "step": 2175 }, { "epoch": 2.245614035087719, "grad_norm": 13.36188671219953, "learning_rate": 1.8106598114017398e-06, "loss": 0.5253646969795227, "step": 2176 }, { "epoch": 2.2466460268317854, "grad_norm": 11.92523191299553, "learning_rate": 1.806037716302902e-06, "loss": 0.48259201645851135, "step": 2177 }, { "epoch": 2.2476780185758516, "grad_norm": 13.561237646204148, "learning_rate": 1.801420227522921e-06, "loss": 1.8981776237487793, "step": 2178 }, { "epoch": 2.2487100103199174, "grad_norm": 9.074617736581224, "learning_rate": 1.796807351721121e-06, "loss": 0.30176305770874023, "step": 2179 }, { "epoch": 2.2497420020639836, "grad_norm": 19.161891287407926, "learning_rate": 1.7921990955501705e-06, "loss": 0.301688551902771, "step": 2180 }, { "epoch": 2.2507739938080498, "grad_norm": 15.535421035921493, "learning_rate": 1.7875954656560802e-06, "loss": 1.0767804384231567, "step": 2181 }, { "epoch": 2.2518059855521155, "grad_norm": 17.0125475831456, "learning_rate": 1.7829964686781793e-06, "loss": 0.8623077869415283, "step": 2182 }, { "epoch": 2.2528379772961817, "grad_norm": 8.124126424867834, "learning_rate": 1.7784021112491273e-06, "loss": 0.5567214488983154, "step": 2183 }, { "epoch": 2.2538699690402475, "grad_norm": 10.875316602560629, "learning_rate": 1.7738123999948853e-06, "loss": 0.3203074336051941, "step": 2184 }, { "epoch": 2.2549019607843137, "grad_norm": 10.703077649740377, "learning_rate": 1.769227341534715e-06, "loss": 0.8443643450737, "step": 2185 }, { "epoch": 2.25593395252838, "grad_norm": 10.909463202886121, "learning_rate": 1.7646469424811707e-06, "loss": 0.6140201687812805, "step": 2186 }, { "epoch": 2.2569659442724457, "grad_norm": 8.473032286683102, "learning_rate": 1.7600712094400802e-06, "loss": 0.3071803152561188, "step": 2187 }, { "epoch": 2.257997936016512, "grad_norm": 11.657311094006454, "learning_rate": 1.755500149010549e-06, "loss": 0.8473285436630249, "step": 2188 }, { "epoch": 2.259029927760578, "grad_norm": 9.516720867180952, "learning_rate": 1.7509337677849404e-06, "loss": 0.1772429496049881, "step": 2189 }, { "epoch": 2.260061919504644, "grad_norm": 9.494675596695124, "learning_rate": 1.7463720723488698e-06, "loss": 0.48868194222450256, "step": 2190 }, { "epoch": 2.26109391124871, "grad_norm": 12.060059218685938, "learning_rate": 1.7418150692811969e-06, "loss": 0.2509000897407532, "step": 2191 }, { "epoch": 2.262125902992776, "grad_norm": 7.065907524965014, "learning_rate": 1.737262765154008e-06, "loss": 0.16848008334636688, "step": 2192 }, { "epoch": 2.263157894736842, "grad_norm": 6.858130398118923, "learning_rate": 1.7327151665326208e-06, "loss": 0.25383830070495605, "step": 2193 }, { "epoch": 2.2641898864809082, "grad_norm": 25.3718609084507, "learning_rate": 1.7281722799755613e-06, "loss": 0.4060966968536377, "step": 2194 }, { "epoch": 2.265221878224974, "grad_norm": 12.947905219908122, "learning_rate": 1.723634112034563e-06, "loss": 0.5273417234420776, "step": 2195 }, { "epoch": 2.26625386996904, "grad_norm": 13.566005766002926, "learning_rate": 1.7191006692545493e-06, "loss": 0.26978084444999695, "step": 2196 }, { "epoch": 2.2672858617131064, "grad_norm": 15.518192045444478, "learning_rate": 1.7145719581736337e-06, "loss": 0.8343046307563782, "step": 2197 }, { "epoch": 2.268317853457172, "grad_norm": 11.141763148539908, "learning_rate": 1.710047985323104e-06, "loss": 0.21038788557052612, "step": 2198 }, { "epoch": 2.2693498452012384, "grad_norm": 10.9815731017179, "learning_rate": 1.7055287572274142e-06, "loss": 0.40513765811920166, "step": 2199 }, { "epoch": 2.2703818369453046, "grad_norm": 13.750227883056459, "learning_rate": 1.7010142804041785e-06, "loss": 0.532772421836853, "step": 2200 }, { "epoch": 2.2714138286893704, "grad_norm": 8.871418169099249, "learning_rate": 1.6965045613641523e-06, "loss": 0.4089523255825043, "step": 2201 }, { "epoch": 2.2724458204334366, "grad_norm": 9.437343178262012, "learning_rate": 1.6919996066112337e-06, "loss": 0.2525821030139923, "step": 2202 }, { "epoch": 2.2734778121775028, "grad_norm": 18.80428833428539, "learning_rate": 1.6874994226424518e-06, "loss": 0.3479749262332916, "step": 2203 }, { "epoch": 2.2745098039215685, "grad_norm": 11.304407306056925, "learning_rate": 1.6830040159479521e-06, "loss": 0.5370907783508301, "step": 2204 }, { "epoch": 2.2755417956656347, "grad_norm": 17.206317290083987, "learning_rate": 1.6785133930109927e-06, "loss": 1.4786996841430664, "step": 2205 }, { "epoch": 2.276573787409701, "grad_norm": 11.036270573698204, "learning_rate": 1.674027560307927e-06, "loss": 0.34293660521507263, "step": 2206 }, { "epoch": 2.2776057791537667, "grad_norm": 10.455555407974552, "learning_rate": 1.6695465243082055e-06, "loss": 0.27484482526779175, "step": 2207 }, { "epoch": 2.278637770897833, "grad_norm": 6.027728038520487, "learning_rate": 1.66507029147436e-06, "loss": 0.16362959146499634, "step": 2208 }, { "epoch": 2.2796697626418987, "grad_norm": 11.285134553373428, "learning_rate": 1.6605988682619944e-06, "loss": 0.23489339649677277, "step": 2209 }, { "epoch": 2.280701754385965, "grad_norm": 13.041895042789484, "learning_rate": 1.6561322611197772e-06, "loss": 1.8350913524627686, "step": 2210 }, { "epoch": 2.281733746130031, "grad_norm": 6.148647089292928, "learning_rate": 1.6516704764894265e-06, "loss": 0.1827671229839325, "step": 2211 }, { "epoch": 2.282765737874097, "grad_norm": 9.946301771192338, "learning_rate": 1.6472135208057128e-06, "loss": 0.3029034733772278, "step": 2212 }, { "epoch": 2.283797729618163, "grad_norm": 10.743644308129369, "learning_rate": 1.642761400496438e-06, "loss": 0.25189274549484253, "step": 2213 }, { "epoch": 2.2848297213622293, "grad_norm": 8.923316366999579, "learning_rate": 1.6383141219824328e-06, "loss": 0.22978025674819946, "step": 2214 }, { "epoch": 2.285861713106295, "grad_norm": 10.195349858709296, "learning_rate": 1.6338716916775394e-06, "loss": 0.16604535281658173, "step": 2215 }, { "epoch": 2.2868937048503613, "grad_norm": 6.643488404122427, "learning_rate": 1.629434115988614e-06, "loss": 0.35414767265319824, "step": 2216 }, { "epoch": 2.287925696594427, "grad_norm": 8.525699870440226, "learning_rate": 1.6250014013155092e-06, "loss": 0.3973991274833679, "step": 2217 }, { "epoch": 2.2889576883384932, "grad_norm": 8.372392423539754, "learning_rate": 1.6205735540510676e-06, "loss": 0.4818735718727112, "step": 2218 }, { "epoch": 2.2899896800825594, "grad_norm": 8.99426902317675, "learning_rate": 1.6161505805811135e-06, "loss": 0.23810598254203796, "step": 2219 }, { "epoch": 2.291021671826625, "grad_norm": 10.67750050459878, "learning_rate": 1.611732487284437e-06, "loss": 0.4141588807106018, "step": 2220 }, { "epoch": 2.2920536635706914, "grad_norm": 9.985959799982476, "learning_rate": 1.6073192805327936e-06, "loss": 0.5675234198570251, "step": 2221 }, { "epoch": 2.2930856553147576, "grad_norm": 8.269013039840578, "learning_rate": 1.6029109666908944e-06, "loss": 0.3020309805870056, "step": 2222 }, { "epoch": 2.2941176470588234, "grad_norm": 10.309152116306388, "learning_rate": 1.5985075521163907e-06, "loss": 0.5373930931091309, "step": 2223 }, { "epoch": 2.2951496388028896, "grad_norm": 14.136819996226265, "learning_rate": 1.5941090431598654e-06, "loss": 0.6357072591781616, "step": 2224 }, { "epoch": 2.296181630546956, "grad_norm": 9.207755036750997, "learning_rate": 1.5897154461648317e-06, "loss": 0.562431812286377, "step": 2225 }, { "epoch": 2.2972136222910216, "grad_norm": 7.501876404025285, "learning_rate": 1.5853267674677154e-06, "loss": 0.3901820480823517, "step": 2226 }, { "epoch": 2.2982456140350878, "grad_norm": 10.882395654407299, "learning_rate": 1.580943013397851e-06, "loss": 0.2067510038614273, "step": 2227 }, { "epoch": 2.299277605779154, "grad_norm": 8.980900675238086, "learning_rate": 1.5765641902774704e-06, "loss": 0.2763798236846924, "step": 2228 }, { "epoch": 2.3003095975232197, "grad_norm": 13.686128722812816, "learning_rate": 1.572190304421694e-06, "loss": 0.3706619143486023, "step": 2229 }, { "epoch": 2.301341589267286, "grad_norm": 10.631613008658723, "learning_rate": 1.567821362138518e-06, "loss": 0.8434795141220093, "step": 2230 }, { "epoch": 2.302373581011352, "grad_norm": 7.8863603274727225, "learning_rate": 1.5634573697288164e-06, "loss": 0.3212193548679352, "step": 2231 }, { "epoch": 2.303405572755418, "grad_norm": 10.281296985834022, "learning_rate": 1.5590983334863191e-06, "loss": 0.4050968587398529, "step": 2232 }, { "epoch": 2.304437564499484, "grad_norm": 10.029639794509817, "learning_rate": 1.5547442596976115e-06, "loss": 0.5072900652885437, "step": 2233 }, { "epoch": 2.30546955624355, "grad_norm": 8.773520153143581, "learning_rate": 1.550395154642117e-06, "loss": 0.23122380673885345, "step": 2234 }, { "epoch": 2.306501547987616, "grad_norm": 14.580360787576096, "learning_rate": 1.5460510245920984e-06, "loss": 0.37928903102874756, "step": 2235 }, { "epoch": 2.3075335397316823, "grad_norm": 22.26283919312677, "learning_rate": 1.5417118758126408e-06, "loss": 0.6587377786636353, "step": 2236 }, { "epoch": 2.308565531475748, "grad_norm": 8.557340391624383, "learning_rate": 1.537377714561647e-06, "loss": 0.3196367025375366, "step": 2237 }, { "epoch": 2.3095975232198143, "grad_norm": 15.63210352214777, "learning_rate": 1.533048547089827e-06, "loss": 0.5556677579879761, "step": 2238 }, { "epoch": 2.3106295149638805, "grad_norm": 10.211616555441974, "learning_rate": 1.5287243796406852e-06, "loss": 0.2998398542404175, "step": 2239 }, { "epoch": 2.3116615067079462, "grad_norm": 9.720441767418976, "learning_rate": 1.524405218450517e-06, "loss": 0.3288514018058777, "step": 2240 }, { "epoch": 2.3126934984520124, "grad_norm": 10.325944493786375, "learning_rate": 1.5200910697484016e-06, "loss": 0.5593407154083252, "step": 2241 }, { "epoch": 2.313725490196078, "grad_norm": 10.501982039385146, "learning_rate": 1.5157819397561863e-06, "loss": 0.3179510831832886, "step": 2242 }, { "epoch": 2.3147574819401444, "grad_norm": 9.417056273079762, "learning_rate": 1.5114778346884768e-06, "loss": 0.5840306878089905, "step": 2243 }, { "epoch": 2.3157894736842106, "grad_norm": 37.14468874568553, "learning_rate": 1.5071787607526366e-06, "loss": 0.5103886723518372, "step": 2244 }, { "epoch": 2.3168214654282764, "grad_norm": 9.950016255402419, "learning_rate": 1.5028847241487715e-06, "loss": 0.5624645948410034, "step": 2245 }, { "epoch": 2.3178534571723426, "grad_norm": 9.161989586045065, "learning_rate": 1.4985957310697242e-06, "loss": 0.37405914068222046, "step": 2246 }, { "epoch": 2.318885448916409, "grad_norm": 7.827841822954748, "learning_rate": 1.4943117877010605e-06, "loss": 0.32541224360466003, "step": 2247 }, { "epoch": 2.3199174406604746, "grad_norm": 10.214626163709012, "learning_rate": 1.4900329002210684e-06, "loss": 0.3640068471431732, "step": 2248 }, { "epoch": 2.3209494324045408, "grad_norm": 16.64776495496287, "learning_rate": 1.4857590748007373e-06, "loss": 0.36090803146362305, "step": 2249 }, { "epoch": 2.321981424148607, "grad_norm": 8.39665025975816, "learning_rate": 1.4814903176037605e-06, "loss": 0.23087266087532043, "step": 2250 }, { "epoch": 2.3230134158926727, "grad_norm": 12.400366359823169, "learning_rate": 1.477226634786525e-06, "loss": 0.5182145833969116, "step": 2251 }, { "epoch": 2.324045407636739, "grad_norm": 23.01924582709264, "learning_rate": 1.472968032498095e-06, "loss": 1.7229740619659424, "step": 2252 }, { "epoch": 2.325077399380805, "grad_norm": 8.801602741757135, "learning_rate": 1.4687145168802063e-06, "loss": 0.24391722679138184, "step": 2253 }, { "epoch": 2.326109391124871, "grad_norm": 9.167374353837271, "learning_rate": 1.4644660940672628e-06, "loss": 0.4265590310096741, "step": 2254 }, { "epoch": 2.327141382868937, "grad_norm": 8.87708478323919, "learning_rate": 1.4602227701863214e-06, "loss": 0.30143553018569946, "step": 2255 }, { "epoch": 2.3281733746130033, "grad_norm": 14.076434184627097, "learning_rate": 1.4559845513570859e-06, "loss": 0.345781147480011, "step": 2256 }, { "epoch": 2.329205366357069, "grad_norm": 9.343761101164164, "learning_rate": 1.4517514436918995e-06, "loss": 0.27536025643348694, "step": 2257 }, { "epoch": 2.3302373581011353, "grad_norm": 8.772146897151353, "learning_rate": 1.4475234532957284e-06, "loss": 0.38081449270248413, "step": 2258 }, { "epoch": 2.331269349845201, "grad_norm": 8.377025171999012, "learning_rate": 1.4433005862661625e-06, "loss": 0.30673426389694214, "step": 2259 }, { "epoch": 2.3323013415892673, "grad_norm": 13.785014247606318, "learning_rate": 1.439082848693406e-06, "loss": 0.7989313006401062, "step": 2260 }, { "epoch": 2.3333333333333335, "grad_norm": 12.196511340922179, "learning_rate": 1.434870246660262e-06, "loss": 0.4146811366081238, "step": 2261 }, { "epoch": 2.3343653250773992, "grad_norm": 11.294083924432297, "learning_rate": 1.4306627862421246e-06, "loss": 0.5966222882270813, "step": 2262 }, { "epoch": 2.3353973168214655, "grad_norm": 10.66807634497044, "learning_rate": 1.4264604735069764e-06, "loss": 0.5763157606124878, "step": 2263 }, { "epoch": 2.3364293085655317, "grad_norm": 11.371466604330612, "learning_rate": 1.4222633145153758e-06, "loss": 0.7577279806137085, "step": 2264 }, { "epoch": 2.3374613003095974, "grad_norm": 9.889572174171429, "learning_rate": 1.4180713153204468e-06, "loss": 0.4307854175567627, "step": 2265 }, { "epoch": 2.3384932920536636, "grad_norm": 7.072193984041829, "learning_rate": 1.4138844819678726e-06, "loss": 0.20140615105628967, "step": 2266 }, { "epoch": 2.3395252837977294, "grad_norm": 10.632779713688336, "learning_rate": 1.4097028204958896e-06, "loss": 0.22186976671218872, "step": 2267 }, { "epoch": 2.3405572755417956, "grad_norm": 8.929669950167531, "learning_rate": 1.4055263369352673e-06, "loss": 0.8361318707466125, "step": 2268 }, { "epoch": 2.341589267285862, "grad_norm": 18.286877202260154, "learning_rate": 1.4013550373093139e-06, "loss": 0.8228157162666321, "step": 2269 }, { "epoch": 2.3426212590299276, "grad_norm": 8.923437567911852, "learning_rate": 1.397188927633863e-06, "loss": 0.6820056438446045, "step": 2270 }, { "epoch": 2.343653250773994, "grad_norm": 13.026055261230391, "learning_rate": 1.393028013917259e-06, "loss": 0.7587333917617798, "step": 2271 }, { "epoch": 2.34468524251806, "grad_norm": 11.29064677317461, "learning_rate": 1.388872302160353e-06, "loss": 0.9072903394699097, "step": 2272 }, { "epoch": 2.3457172342621257, "grad_norm": 12.179380361158485, "learning_rate": 1.3847217983564943e-06, "loss": 0.4823061227798462, "step": 2273 }, { "epoch": 2.346749226006192, "grad_norm": 6.953779587008528, "learning_rate": 1.3805765084915236e-06, "loss": 0.2917562425136566, "step": 2274 }, { "epoch": 2.347781217750258, "grad_norm": 11.39337241380322, "learning_rate": 1.3764364385437595e-06, "loss": 0.42079344391822815, "step": 2275 }, { "epoch": 2.348813209494324, "grad_norm": 8.464953413026237, "learning_rate": 1.3723015944839947e-06, "loss": 0.6202406287193298, "step": 2276 }, { "epoch": 2.34984520123839, "grad_norm": 17.585128569200954, "learning_rate": 1.3681719822754813e-06, "loss": 0.6479356288909912, "step": 2277 }, { "epoch": 2.3508771929824563, "grad_norm": 12.926284507484658, "learning_rate": 1.3640476078739296e-06, "loss": 0.44959646463394165, "step": 2278 }, { "epoch": 2.351909184726522, "grad_norm": 10.32503660981902, "learning_rate": 1.3599284772274935e-06, "loss": 0.3125981092453003, "step": 2279 }, { "epoch": 2.3529411764705883, "grad_norm": 11.281541729676572, "learning_rate": 1.35581459627677e-06, "loss": 0.7096989154815674, "step": 2280 }, { "epoch": 2.3539731682146545, "grad_norm": 8.101741112538424, "learning_rate": 1.351705970954777e-06, "loss": 0.6925749182701111, "step": 2281 }, { "epoch": 2.3550051599587203, "grad_norm": 11.7984677504724, "learning_rate": 1.347602607186957e-06, "loss": 0.7987287640571594, "step": 2282 }, { "epoch": 2.3560371517027865, "grad_norm": 15.291266015010487, "learning_rate": 1.3435045108911648e-06, "loss": 0.9378319978713989, "step": 2283 }, { "epoch": 2.3570691434468523, "grad_norm": 8.389071900028682, "learning_rate": 1.339411687977657e-06, "loss": 0.2552429139614105, "step": 2284 }, { "epoch": 2.3581011351909185, "grad_norm": 13.02561353962284, "learning_rate": 1.335324144349085e-06, "loss": 0.23329763114452362, "step": 2285 }, { "epoch": 2.3591331269349847, "grad_norm": 10.87277502286887, "learning_rate": 1.3312418859004895e-06, "loss": 0.2614462375640869, "step": 2286 }, { "epoch": 2.3601651186790504, "grad_norm": 13.95195753887422, "learning_rate": 1.327164918519282e-06, "loss": 0.2634202539920807, "step": 2287 }, { "epoch": 2.3611971104231166, "grad_norm": 11.846596561960679, "learning_rate": 1.3230932480852487e-06, "loss": 0.4762287437915802, "step": 2288 }, { "epoch": 2.362229102167183, "grad_norm": 11.405128940430034, "learning_rate": 1.3190268804705381e-06, "loss": 0.5081691741943359, "step": 2289 }, { "epoch": 2.3632610939112486, "grad_norm": 14.103546259617673, "learning_rate": 1.3149658215396478e-06, "loss": 0.35150325298309326, "step": 2290 }, { "epoch": 2.364293085655315, "grad_norm": 14.532903837666758, "learning_rate": 1.310910077149417e-06, "loss": 0.7659963965415955, "step": 2291 }, { "epoch": 2.3653250773993806, "grad_norm": 13.34599463001999, "learning_rate": 1.3068596531490253e-06, "loss": 0.5429165959358215, "step": 2292 }, { "epoch": 2.366357069143447, "grad_norm": 7.885947716842902, "learning_rate": 1.3028145553799764e-06, "loss": 0.31663644313812256, "step": 2293 }, { "epoch": 2.367389060887513, "grad_norm": 8.921384888300858, "learning_rate": 1.2987747896760927e-06, "loss": 1.0047956705093384, "step": 2294 }, { "epoch": 2.3684210526315788, "grad_norm": 18.076640304312118, "learning_rate": 1.2947403618635097e-06, "loss": 0.6527321338653564, "step": 2295 }, { "epoch": 2.369453044375645, "grad_norm": 16.577720379130852, "learning_rate": 1.290711277760658e-06, "loss": 0.49626123905181885, "step": 2296 }, { "epoch": 2.370485036119711, "grad_norm": 10.918242651297279, "learning_rate": 1.2866875431782677e-06, "loss": 0.5808942914009094, "step": 2297 }, { "epoch": 2.371517027863777, "grad_norm": 17.45010139274395, "learning_rate": 1.2826691639193506e-06, "loss": 0.8238117098808289, "step": 2298 }, { "epoch": 2.372549019607843, "grad_norm": 11.926863010648972, "learning_rate": 1.2786561457791996e-06, "loss": 0.6759803891181946, "step": 2299 }, { "epoch": 2.3735810113519094, "grad_norm": 13.867351303340826, "learning_rate": 1.2746484945453691e-06, "loss": 0.3109396994113922, "step": 2300 }, { "epoch": 2.374613003095975, "grad_norm": 11.264931022174274, "learning_rate": 1.270646215997678e-06, "loss": 0.2699579894542694, "step": 2301 }, { "epoch": 2.3756449948400413, "grad_norm": 11.601205893014052, "learning_rate": 1.2666493159081944e-06, "loss": 0.32813602685928345, "step": 2302 }, { "epoch": 2.3766769865841075, "grad_norm": 9.489393114719974, "learning_rate": 1.262657800041232e-06, "loss": 0.29331862926483154, "step": 2303 }, { "epoch": 2.3777089783281733, "grad_norm": 10.455110525170294, "learning_rate": 1.2586716741533389e-06, "loss": 0.35240501165390015, "step": 2304 }, { "epoch": 2.3787409700722395, "grad_norm": 12.1430793084311, "learning_rate": 1.2546909439932858e-06, "loss": 0.4376395344734192, "step": 2305 }, { "epoch": 2.3797729618163057, "grad_norm": 20.666356804549835, "learning_rate": 1.2507156153020667e-06, "loss": 0.6594339609146118, "step": 2306 }, { "epoch": 2.3808049535603715, "grad_norm": 7.966081233097103, "learning_rate": 1.2467456938128824e-06, "loss": 0.4372865557670593, "step": 2307 }, { "epoch": 2.3818369453044377, "grad_norm": 8.177508156131061, "learning_rate": 1.2427811852511396e-06, "loss": 0.1819734275341034, "step": 2308 }, { "epoch": 2.3828689370485034, "grad_norm": 7.302440475460186, "learning_rate": 1.2388220953344354e-06, "loss": 0.24572576582431793, "step": 2309 }, { "epoch": 2.3839009287925697, "grad_norm": 10.31916401694975, "learning_rate": 1.23486842977255e-06, "loss": 0.5179727077484131, "step": 2310 }, { "epoch": 2.384932920536636, "grad_norm": 8.252763135789019, "learning_rate": 1.2309201942674442e-06, "loss": 0.21115389466285706, "step": 2311 }, { "epoch": 2.3859649122807016, "grad_norm": 10.930992870936334, "learning_rate": 1.226977394513247e-06, "loss": 0.5590957999229431, "step": 2312 }, { "epoch": 2.386996904024768, "grad_norm": 13.945253351389052, "learning_rate": 1.2230400361962469e-06, "loss": 0.8369548916816711, "step": 2313 }, { "epoch": 2.388028895768834, "grad_norm": 12.529360499358702, "learning_rate": 1.2191081249948871e-06, "loss": 0.4372384548187256, "step": 2314 }, { "epoch": 2.3890608875129, "grad_norm": 11.004924824102241, "learning_rate": 1.2151816665797507e-06, "loss": 0.3626002073287964, "step": 2315 }, { "epoch": 2.390092879256966, "grad_norm": 13.728593391482939, "learning_rate": 1.2112606666135602e-06, "loss": 0.259071946144104, "step": 2316 }, { "epoch": 2.3911248710010318, "grad_norm": 10.280716365371559, "learning_rate": 1.2073451307511642e-06, "loss": 0.45200228691101074, "step": 2317 }, { "epoch": 2.392156862745098, "grad_norm": 10.155973965955244, "learning_rate": 1.203435064639536e-06, "loss": 0.3288681209087372, "step": 2318 }, { "epoch": 2.393188854489164, "grad_norm": 8.58553962060409, "learning_rate": 1.1995304739177515e-06, "loss": 0.43590980768203735, "step": 2319 }, { "epoch": 2.39422084623323, "grad_norm": 27.483561695551096, "learning_rate": 1.1956313642169974e-06, "loss": 0.8669304251670837, "step": 2320 }, { "epoch": 2.395252837977296, "grad_norm": 9.371469568007717, "learning_rate": 1.1917377411605523e-06, "loss": 0.3421843945980072, "step": 2321 }, { "epoch": 2.3962848297213624, "grad_norm": 13.730655837456155, "learning_rate": 1.1878496103637838e-06, "loss": 0.8081228137016296, "step": 2322 }, { "epoch": 2.397316821465428, "grad_norm": 15.974869879662167, "learning_rate": 1.1839669774341378e-06, "loss": 1.2036049365997314, "step": 2323 }, { "epoch": 2.3983488132094943, "grad_norm": 13.471015946681664, "learning_rate": 1.1800898479711293e-06, "loss": 0.18934336304664612, "step": 2324 }, { "epoch": 2.3993808049535605, "grad_norm": 11.525094925651421, "learning_rate": 1.1762182275663387e-06, "loss": 0.4728744626045227, "step": 2325 }, { "epoch": 2.4004127966976263, "grad_norm": 10.72246303484596, "learning_rate": 1.1723521218034006e-06, "loss": 0.3724917769432068, "step": 2326 }, { "epoch": 2.4014447884416925, "grad_norm": 12.89596258783316, "learning_rate": 1.1684915362579951e-06, "loss": 0.3012378215789795, "step": 2327 }, { "epoch": 2.4024767801857587, "grad_norm": 7.97123740423598, "learning_rate": 1.1646364764978468e-06, "loss": 0.426517516374588, "step": 2328 }, { "epoch": 2.4035087719298245, "grad_norm": 10.567269384597846, "learning_rate": 1.1607869480827016e-06, "loss": 0.8786559104919434, "step": 2329 }, { "epoch": 2.4045407636738907, "grad_norm": 9.465822104953064, "learning_rate": 1.1569429565643353e-06, "loss": 0.27923858165740967, "step": 2330 }, { "epoch": 2.405572755417957, "grad_norm": 8.941211304728357, "learning_rate": 1.1531045074865355e-06, "loss": 0.35923856496810913, "step": 2331 }, { "epoch": 2.4066047471620227, "grad_norm": 8.354879524912997, "learning_rate": 1.1492716063850973e-06, "loss": 0.22897693514823914, "step": 2332 }, { "epoch": 2.407636738906089, "grad_norm": 10.360519545976057, "learning_rate": 1.1454442587878161e-06, "loss": 1.8416224718093872, "step": 2333 }, { "epoch": 2.4086687306501546, "grad_norm": 10.263453437778633, "learning_rate": 1.1416224702144734e-06, "loss": 0.19484372437000275, "step": 2334 }, { "epoch": 2.409700722394221, "grad_norm": 12.678350979764224, "learning_rate": 1.137806246176838e-06, "loss": 0.3071631193161011, "step": 2335 }, { "epoch": 2.410732714138287, "grad_norm": 8.604280482284869, "learning_rate": 1.1339955921786504e-06, "loss": 0.2432839423418045, "step": 2336 }, { "epoch": 2.411764705882353, "grad_norm": 14.531747010640835, "learning_rate": 1.1301905137156238e-06, "loss": 0.3913341760635376, "step": 2337 }, { "epoch": 2.412796697626419, "grad_norm": 15.11076558143236, "learning_rate": 1.1263910162754222e-06, "loss": 0.23445601761341095, "step": 2338 }, { "epoch": 2.4138286893704852, "grad_norm": 8.677501081099685, "learning_rate": 1.1225971053376661e-06, "loss": 0.273960679769516, "step": 2339 }, { "epoch": 2.414860681114551, "grad_norm": 8.861797371537126, "learning_rate": 1.1188087863739173e-06, "loss": 0.5194929242134094, "step": 2340 }, { "epoch": 2.415892672858617, "grad_norm": 9.66066843155121, "learning_rate": 1.1150260648476742e-06, "loss": 0.6325628757476807, "step": 2341 }, { "epoch": 2.416924664602683, "grad_norm": 9.558262645529188, "learning_rate": 1.1112489462143622e-06, "loss": 0.6525687575340271, "step": 2342 }, { "epoch": 2.417956656346749, "grad_norm": 26.355092347093272, "learning_rate": 1.1074774359213236e-06, "loss": 1.1010922193527222, "step": 2343 }, { "epoch": 2.4189886480908154, "grad_norm": 22.52338795222645, "learning_rate": 1.1037115394078162e-06, "loss": 0.6247187852859497, "step": 2344 }, { "epoch": 2.420020639834881, "grad_norm": 28.624581804368976, "learning_rate": 1.0999512621049991e-06, "loss": 1.2945120334625244, "step": 2345 }, { "epoch": 2.4210526315789473, "grad_norm": 13.87565959898355, "learning_rate": 1.096196609435929e-06, "loss": 0.8663886785507202, "step": 2346 }, { "epoch": 2.4220846233230136, "grad_norm": 18.65055265213526, "learning_rate": 1.0924475868155493e-06, "loss": 0.36518436670303345, "step": 2347 }, { "epoch": 2.4231166150670793, "grad_norm": 7.921531494445021, "learning_rate": 1.0887041996506858e-06, "loss": 0.24292084574699402, "step": 2348 }, { "epoch": 2.4241486068111455, "grad_norm": 17.719547304099176, "learning_rate": 1.084966453340034e-06, "loss": 1.2447023391723633, "step": 2349 }, { "epoch": 2.4251805985552117, "grad_norm": 12.712028230215658, "learning_rate": 1.081234353274157e-06, "loss": 0.37312689423561096, "step": 2350 }, { "epoch": 2.4262125902992775, "grad_norm": 13.940826103948076, "learning_rate": 1.0775079048354736e-06, "loss": 0.4350719153881073, "step": 2351 }, { "epoch": 2.4272445820433437, "grad_norm": 10.100638431884445, "learning_rate": 1.0737871133982524e-06, "loss": 0.9279487729072571, "step": 2352 }, { "epoch": 2.42827657378741, "grad_norm": 6.804514506619492, "learning_rate": 1.070071984328601e-06, "loss": 0.40028977394104004, "step": 2353 }, { "epoch": 2.4293085655314757, "grad_norm": 17.521722269336678, "learning_rate": 1.0663625229844643e-06, "loss": 0.5591195225715637, "step": 2354 }, { "epoch": 2.430340557275542, "grad_norm": 11.965660390374483, "learning_rate": 1.0626587347156097e-06, "loss": 0.7699227333068848, "step": 2355 }, { "epoch": 2.431372549019608, "grad_norm": 9.677677366026419, "learning_rate": 1.0589606248636291e-06, "loss": 0.796596109867096, "step": 2356 }, { "epoch": 2.432404540763674, "grad_norm": 15.7077268294847, "learning_rate": 1.055268198761918e-06, "loss": 0.35285720229148865, "step": 2357 }, { "epoch": 2.43343653250774, "grad_norm": 11.019236024085936, "learning_rate": 1.0515814617356773e-06, "loss": 0.3330291509628296, "step": 2358 }, { "epoch": 2.434468524251806, "grad_norm": 9.845431324155186, "learning_rate": 1.0479004191019043e-06, "loss": 0.35188984870910645, "step": 2359 }, { "epoch": 2.435500515995872, "grad_norm": 10.27888078280417, "learning_rate": 1.0442250761693829e-06, "loss": 0.42126739025115967, "step": 2360 }, { "epoch": 2.4365325077399382, "grad_norm": 9.77599850425449, "learning_rate": 1.040555438238679e-06, "loss": 0.300399512052536, "step": 2361 }, { "epoch": 2.437564499484004, "grad_norm": 7.193000367836019, "learning_rate": 1.0368915106021255e-06, "loss": 0.6267426013946533, "step": 2362 }, { "epoch": 2.43859649122807, "grad_norm": 8.350912664417, "learning_rate": 1.0332332985438248e-06, "loss": 0.43233785033226013, "step": 2363 }, { "epoch": 2.4396284829721364, "grad_norm": 7.908070153243322, "learning_rate": 1.0295808073396352e-06, "loss": 0.28449347615242004, "step": 2364 }, { "epoch": 2.440660474716202, "grad_norm": 9.445311334028778, "learning_rate": 1.0259340422571635e-06, "loss": 0.6865044832229614, "step": 2365 }, { "epoch": 2.4416924664602684, "grad_norm": 9.637391373408922, "learning_rate": 1.0222930085557593e-06, "loss": 0.27788588404655457, "step": 2366 }, { "epoch": 2.442724458204334, "grad_norm": 11.592538564975873, "learning_rate": 1.0186577114865053e-06, "loss": 0.5116929411888123, "step": 2367 }, { "epoch": 2.4437564499484004, "grad_norm": 13.014236028703879, "learning_rate": 1.015028156292212e-06, "loss": 0.7098373770713806, "step": 2368 }, { "epoch": 2.4447884416924666, "grad_norm": 9.186002802022488, "learning_rate": 1.0114043482074088e-06, "loss": 0.5690082311630249, "step": 2369 }, { "epoch": 2.4458204334365323, "grad_norm": 9.488909720476801, "learning_rate": 1.0077862924583354e-06, "loss": 0.3127894401550293, "step": 2370 }, { "epoch": 2.4468524251805985, "grad_norm": 8.92075291296032, "learning_rate": 1.0041739942629387e-06, "loss": 0.2941093444824219, "step": 2371 }, { "epoch": 2.4478844169246647, "grad_norm": 21.400702288335943, "learning_rate": 1.0005674588308566e-06, "loss": 0.615723192691803, "step": 2372 }, { "epoch": 2.4489164086687305, "grad_norm": 16.998484336603784, "learning_rate": 9.969666913634207e-07, "loss": 0.33745965361595154, "step": 2373 }, { "epoch": 2.4499484004127967, "grad_norm": 9.728842337723819, "learning_rate": 9.933716970536428e-07, "loss": 0.36395740509033203, "step": 2374 }, { "epoch": 2.450980392156863, "grad_norm": 18.40886473242549, "learning_rate": 9.897824810862084e-07, "loss": 0.3619149923324585, "step": 2375 }, { "epoch": 2.4520123839009287, "grad_norm": 39.54015632713799, "learning_rate": 9.861990486374695e-07, "loss": 0.6555899381637573, "step": 2376 }, { "epoch": 2.453044375644995, "grad_norm": 11.682157076951567, "learning_rate": 9.826214048754368e-07, "loss": 0.40092623233795166, "step": 2377 }, { "epoch": 2.454076367389061, "grad_norm": 9.362559088256313, "learning_rate": 9.790495549597733e-07, "loss": 0.18525682389736176, "step": 2378 }, { "epoch": 2.455108359133127, "grad_norm": 7.775023798181715, "learning_rate": 9.75483504041786e-07, "loss": 0.3614833950996399, "step": 2379 }, { "epoch": 2.456140350877193, "grad_norm": 14.837476186767415, "learning_rate": 9.719232572644189e-07, "loss": 0.9362202286720276, "step": 2380 }, { "epoch": 2.4571723426212593, "grad_norm": 11.002975691995825, "learning_rate": 9.683688197622432e-07, "loss": 0.47554850578308105, "step": 2381 }, { "epoch": 2.458204334365325, "grad_norm": 13.068756446775595, "learning_rate": 9.648201966614546e-07, "loss": 0.4872112572193146, "step": 2382 }, { "epoch": 2.4592363261093912, "grad_norm": 11.014793773785462, "learning_rate": 9.61277393079862e-07, "loss": 0.2566531002521515, "step": 2383 }, { "epoch": 2.460268317853457, "grad_norm": 12.206664490362016, "learning_rate": 9.577404141268815e-07, "loss": 0.4083847403526306, "step": 2384 }, { "epoch": 2.461300309597523, "grad_norm": 8.639693634651767, "learning_rate": 9.542092649035295e-07, "loss": 0.26377323269844055, "step": 2385 }, { "epoch": 2.4623323013415894, "grad_norm": 11.586254114186088, "learning_rate": 9.506839505024146e-07, "loss": 0.6863972544670105, "step": 2386 }, { "epoch": 2.463364293085655, "grad_norm": 8.17869956434473, "learning_rate": 9.471644760077297e-07, "loss": 0.5492748618125916, "step": 2387 }, { "epoch": 2.4643962848297214, "grad_norm": 7.728135075325033, "learning_rate": 9.436508464952471e-07, "loss": 0.257376492023468, "step": 2388 }, { "epoch": 2.4654282765737876, "grad_norm": 13.235499955162616, "learning_rate": 9.40143067032307e-07, "loss": 2.1276726722717285, "step": 2389 }, { "epoch": 2.4664602683178534, "grad_norm": 11.469789882583859, "learning_rate": 9.366411426778165e-07, "loss": 0.5650811791419983, "step": 2390 }, { "epoch": 2.4674922600619196, "grad_norm": 14.094658908361136, "learning_rate": 9.331450784822326e-07, "loss": 0.35821136832237244, "step": 2391 }, { "epoch": 2.4685242518059853, "grad_norm": 11.027558569826938, "learning_rate": 9.296548794875659e-07, "loss": 0.49398985505104065, "step": 2392 }, { "epoch": 2.4695562435500515, "grad_norm": 13.783391565891415, "learning_rate": 9.261705507273666e-07, "loss": 0.7277883291244507, "step": 2393 }, { "epoch": 2.4705882352941178, "grad_norm": 8.55942333562643, "learning_rate": 9.22692097226719e-07, "loss": 0.5172953605651855, "step": 2394 }, { "epoch": 2.4716202270381835, "grad_norm": 11.393872910806078, "learning_rate": 9.19219524002234e-07, "loss": 0.3203275203704834, "step": 2395 }, { "epoch": 2.4726522187822497, "grad_norm": 9.980558628262713, "learning_rate": 9.157528360620416e-07, "loss": 0.26650774478912354, "step": 2396 }, { "epoch": 2.473684210526316, "grad_norm": 29.038587672697723, "learning_rate": 9.122920384057849e-07, "loss": 0.9730219841003418, "step": 2397 }, { "epoch": 2.4747162022703817, "grad_norm": 7.0552253170083015, "learning_rate": 9.088371360246107e-07, "loss": 0.24205099046230316, "step": 2398 }, { "epoch": 2.475748194014448, "grad_norm": 9.612073593825405, "learning_rate": 9.053881339011672e-07, "loss": 0.5207908153533936, "step": 2399 }, { "epoch": 2.476780185758514, "grad_norm": 8.051099464493184, "learning_rate": 9.019450370095867e-07, "loss": 0.27800866961479187, "step": 2400 }, { "epoch": 2.47781217750258, "grad_norm": 9.826849297611608, "learning_rate": 8.985078503154914e-07, "loss": 0.2225874662399292, "step": 2401 }, { "epoch": 2.478844169246646, "grad_norm": 12.338649401779252, "learning_rate": 8.950765787759769e-07, "loss": 0.18932898342609406, "step": 2402 }, { "epoch": 2.4798761609907123, "grad_norm": 8.506983689882658, "learning_rate": 8.916512273396078e-07, "loss": 0.4070678949356079, "step": 2403 }, { "epoch": 2.480908152734778, "grad_norm": 12.940466747256666, "learning_rate": 8.882318009464124e-07, "loss": 0.37709781527519226, "step": 2404 }, { "epoch": 2.4819401444788443, "grad_norm": 7.538269987278947, "learning_rate": 8.848183045278729e-07, "loss": 0.1588832139968872, "step": 2405 }, { "epoch": 2.4829721362229105, "grad_norm": 7.4930689254547005, "learning_rate": 8.814107430069185e-07, "loss": 0.5599884986877441, "step": 2406 }, { "epoch": 2.4840041279669762, "grad_norm": 13.417182779620711, "learning_rate": 8.780091212979208e-07, "loss": 0.7557123899459839, "step": 2407 }, { "epoch": 2.4850361197110424, "grad_norm": 5.534363337722231, "learning_rate": 8.74613444306684e-07, "loss": 0.07814184576272964, "step": 2408 }, { "epoch": 2.486068111455108, "grad_norm": 9.35144847838186, "learning_rate": 8.712237169304394e-07, "loss": 0.20516231656074524, "step": 2409 }, { "epoch": 2.4871001031991744, "grad_norm": 8.075249750271306, "learning_rate": 8.678399440578367e-07, "loss": 0.2661157250404358, "step": 2410 }, { "epoch": 2.4881320949432406, "grad_norm": 13.388476095299996, "learning_rate": 8.644621305689383e-07, "loss": 0.29081588983535767, "step": 2411 }, { "epoch": 2.4891640866873064, "grad_norm": 9.58389049030021, "learning_rate": 8.61090281335214e-07, "loss": 0.3577726483345032, "step": 2412 }, { "epoch": 2.4901960784313726, "grad_norm": 8.567604008779373, "learning_rate": 8.577244012195291e-07, "loss": 0.6649857759475708, "step": 2413 }, { "epoch": 2.4912280701754383, "grad_norm": 8.7965865174537, "learning_rate": 8.543644950761426e-07, "loss": 0.3712612986564636, "step": 2414 }, { "epoch": 2.4922600619195046, "grad_norm": 7.005903539625946, "learning_rate": 8.510105677506964e-07, "loss": 0.10786724090576172, "step": 2415 }, { "epoch": 2.4932920536635708, "grad_norm": 9.08726282307587, "learning_rate": 8.476626240802099e-07, "loss": 0.5389184355735779, "step": 2416 }, { "epoch": 2.4943240454076365, "grad_norm": 12.889574549268769, "learning_rate": 8.443206688930744e-07, "loss": 0.4571652114391327, "step": 2417 }, { "epoch": 2.4953560371517027, "grad_norm": 9.210834471883407, "learning_rate": 8.409847070090437e-07, "loss": 0.5685957074165344, "step": 2418 }, { "epoch": 2.496388028895769, "grad_norm": 10.263316886694389, "learning_rate": 8.376547432392262e-07, "loss": 0.6476658582687378, "step": 2419 }, { "epoch": 2.4974200206398347, "grad_norm": 17.78802954389812, "learning_rate": 8.343307823860819e-07, "loss": 0.5561540126800537, "step": 2420 }, { "epoch": 2.498452012383901, "grad_norm": 8.664026801032575, "learning_rate": 8.310128292434139e-07, "loss": 0.2646542191505432, "step": 2421 }, { "epoch": 2.499484004127967, "grad_norm": 11.559484893196826, "learning_rate": 8.277008885963594e-07, "loss": 0.44151124358177185, "step": 2422 }, { "epoch": 2.500515995872033, "grad_norm": 9.83190452333222, "learning_rate": 8.243949652213862e-07, "loss": 0.41169655323028564, "step": 2423 }, { "epoch": 2.501547987616099, "grad_norm": 10.979346072111683, "learning_rate": 8.210950638862813e-07, "loss": 0.31727972626686096, "step": 2424 }, { "epoch": 2.5025799793601653, "grad_norm": 11.557106442230344, "learning_rate": 8.178011893501498e-07, "loss": 0.29078972339630127, "step": 2425 }, { "epoch": 2.503611971104231, "grad_norm": 9.674782624549838, "learning_rate": 8.145133463634031e-07, "loss": 0.9324804544448853, "step": 2426 }, { "epoch": 2.5046439628482973, "grad_norm": 14.034812449654558, "learning_rate": 8.112315396677561e-07, "loss": 0.7548234462738037, "step": 2427 }, { "epoch": 2.5056759545923635, "grad_norm": 13.9406219318329, "learning_rate": 8.079557739962129e-07, "loss": 0.3924209475517273, "step": 2428 }, { "epoch": 2.5067079463364292, "grad_norm": 14.264471133053592, "learning_rate": 8.046860540730711e-07, "loss": 0.23829391598701477, "step": 2429 }, { "epoch": 2.5077399380804954, "grad_norm": 9.555475674044061, "learning_rate": 8.014223846139069e-07, "loss": 0.27250006794929504, "step": 2430 }, { "epoch": 2.5087719298245617, "grad_norm": 11.176391033621893, "learning_rate": 7.981647703255702e-07, "loss": 0.41484761238098145, "step": 2431 }, { "epoch": 2.5098039215686274, "grad_norm": 6.698502697538434, "learning_rate": 7.949132159061784e-07, "loss": 0.2051486372947693, "step": 2432 }, { "epoch": 2.5108359133126936, "grad_norm": 13.280473592235404, "learning_rate": 7.916677260451095e-07, "loss": 0.417132169008255, "step": 2433 }, { "epoch": 2.5118679050567594, "grad_norm": 10.079119541318564, "learning_rate": 7.884283054229958e-07, "loss": 0.27953770756721497, "step": 2434 }, { "epoch": 2.5128998968008256, "grad_norm": 9.000723058812289, "learning_rate": 7.851949587117152e-07, "loss": 0.5249171853065491, "step": 2435 }, { "epoch": 2.513931888544892, "grad_norm": 10.290660728328614, "learning_rate": 7.819676905743872e-07, "loss": 0.31173601746559143, "step": 2436 }, { "epoch": 2.5149638802889576, "grad_norm": 11.538047770457174, "learning_rate": 7.787465056653653e-07, "loss": 0.6719921827316284, "step": 2437 }, { "epoch": 2.5159958720330238, "grad_norm": 9.22266151911703, "learning_rate": 7.755314086302257e-07, "loss": 0.44996383786201477, "step": 2438 }, { "epoch": 2.5170278637770895, "grad_norm": 7.624242369239375, "learning_rate": 7.723224041057697e-07, "loss": 0.26270487904548645, "step": 2439 }, { "epoch": 2.5180598555211557, "grad_norm": 16.048347022232104, "learning_rate": 7.691194967200099e-07, "loss": 0.19687271118164062, "step": 2440 }, { "epoch": 2.519091847265222, "grad_norm": 13.4497672853125, "learning_rate": 7.659226910921652e-07, "loss": 0.553430438041687, "step": 2441 }, { "epoch": 2.5201238390092877, "grad_norm": 11.394367224771276, "learning_rate": 7.627319918326559e-07, "loss": 0.7229731678962708, "step": 2442 }, { "epoch": 2.521155830753354, "grad_norm": 7.754596846111269, "learning_rate": 7.595474035430944e-07, "loss": 0.2186160832643509, "step": 2443 }, { "epoch": 2.52218782249742, "grad_norm": 12.784166446936656, "learning_rate": 7.563689308162803e-07, "loss": 0.5006595849990845, "step": 2444 }, { "epoch": 2.523219814241486, "grad_norm": 20.0431695330293, "learning_rate": 7.531965782361939e-07, "loss": 1.1238250732421875, "step": 2445 }, { "epoch": 2.524251805985552, "grad_norm": 15.706339089105576, "learning_rate": 7.500303503779898e-07, "loss": 0.33517026901245117, "step": 2446 }, { "epoch": 2.5252837977296183, "grad_norm": 11.617057309870031, "learning_rate": 7.468702518079857e-07, "loss": 0.3896215856075287, "step": 2447 }, { "epoch": 2.526315789473684, "grad_norm": 19.340965941158494, "learning_rate": 7.43716287083664e-07, "loss": 1.8497036695480347, "step": 2448 }, { "epoch": 2.5273477812177503, "grad_norm": 15.18705838277447, "learning_rate": 7.405684607536584e-07, "loss": 1.9955216646194458, "step": 2449 }, { "epoch": 2.5283797729618165, "grad_norm": 10.877736708597599, "learning_rate": 7.374267773577515e-07, "loss": 1.1957073211669922, "step": 2450 }, { "epoch": 2.5294117647058822, "grad_norm": 30.178788325484387, "learning_rate": 7.342912414268654e-07, "loss": 1.469434142112732, "step": 2451 }, { "epoch": 2.5304437564499485, "grad_norm": 9.400424513052604, "learning_rate": 7.31161857483057e-07, "loss": 0.32218417525291443, "step": 2452 }, { "epoch": 2.5314757481940147, "grad_norm": 9.012375664833659, "learning_rate": 7.280386300395104e-07, "loss": 0.22649557888507843, "step": 2453 }, { "epoch": 2.5325077399380804, "grad_norm": 11.655272587248323, "learning_rate": 7.249215636005308e-07, "loss": 0.49087798595428467, "step": 2454 }, { "epoch": 2.5335397316821466, "grad_norm": 10.896749070097966, "learning_rate": 7.218106626615384e-07, "loss": 0.472625732421875, "step": 2455 }, { "epoch": 2.534571723426213, "grad_norm": 10.457972855615788, "learning_rate": 7.187059317090622e-07, "loss": 0.6984173059463501, "step": 2456 }, { "epoch": 2.5356037151702786, "grad_norm": 14.40471006951635, "learning_rate": 7.156073752207304e-07, "loss": 0.8018549680709839, "step": 2457 }, { "epoch": 2.536635706914345, "grad_norm": 14.039618022353128, "learning_rate": 7.125149976652684e-07, "loss": 0.38079196214675903, "step": 2458 }, { "epoch": 2.5376676986584106, "grad_norm": 13.56849882237949, "learning_rate": 7.094288035024905e-07, "loss": 0.3471815586090088, "step": 2459 }, { "epoch": 2.538699690402477, "grad_norm": 6.644592311169667, "learning_rate": 7.063487971832922e-07, "loss": 0.26021164655685425, "step": 2460 }, { "epoch": 2.539731682146543, "grad_norm": 18.06651652107949, "learning_rate": 7.032749831496466e-07, "loss": 0.4367244839668274, "step": 2461 }, { "epoch": 2.5407636738906088, "grad_norm": 10.006337776595991, "learning_rate": 7.002073658345943e-07, "loss": 0.3738947808742523, "step": 2462 }, { "epoch": 2.541795665634675, "grad_norm": 11.307412438943082, "learning_rate": 6.971459496622401e-07, "loss": 0.2714172899723053, "step": 2463 }, { "epoch": 2.5428276573787407, "grad_norm": 9.942800760696215, "learning_rate": 6.940907390477458e-07, "loss": 0.2196260392665863, "step": 2464 }, { "epoch": 2.543859649122807, "grad_norm": 12.911374595634284, "learning_rate": 6.910417383973244e-07, "loss": 0.5117729902267456, "step": 2465 }, { "epoch": 2.544891640866873, "grad_norm": 15.765117131752072, "learning_rate": 6.879989521082292e-07, "loss": 0.5069431662559509, "step": 2466 }, { "epoch": 2.545923632610939, "grad_norm": 10.612093959667995, "learning_rate": 6.849623845687547e-07, "loss": 0.5721427798271179, "step": 2467 }, { "epoch": 2.546955624355005, "grad_norm": 16.032074380217704, "learning_rate": 6.819320401582258e-07, "loss": 0.4624538719654083, "step": 2468 }, { "epoch": 2.5479876160990713, "grad_norm": 12.54192301084899, "learning_rate": 6.789079232469925e-07, "loss": 0.606447696685791, "step": 2469 }, { "epoch": 2.549019607843137, "grad_norm": 7.463315600117024, "learning_rate": 6.758900381964228e-07, "loss": 0.2632223069667816, "step": 2470 }, { "epoch": 2.5500515995872033, "grad_norm": 10.988763485820304, "learning_rate": 6.728783893588986e-07, "loss": 0.6660168766975403, "step": 2471 }, { "epoch": 2.5510835913312695, "grad_norm": 8.09866210736358, "learning_rate": 6.698729810778065e-07, "loss": 0.35602515935897827, "step": 2472 }, { "epoch": 2.5521155830753353, "grad_norm": 9.982111656825898, "learning_rate": 6.668738176875339e-07, "loss": 0.3641916513442993, "step": 2473 }, { "epoch": 2.5531475748194015, "grad_norm": 7.348812433182777, "learning_rate": 6.638809035134614e-07, "loss": 0.18170510232448578, "step": 2474 }, { "epoch": 2.5541795665634677, "grad_norm": 5.66582504929301, "learning_rate": 6.608942428719583e-07, "loss": 0.19020238518714905, "step": 2475 }, { "epoch": 2.5552115583075334, "grad_norm": 10.64433572809659, "learning_rate": 6.579138400703716e-07, "loss": 0.6588963866233826, "step": 2476 }, { "epoch": 2.5562435500515996, "grad_norm": 11.455825600050641, "learning_rate": 6.549396994070262e-07, "loss": 0.41902226209640503, "step": 2477 }, { "epoch": 2.557275541795666, "grad_norm": 8.586632321929025, "learning_rate": 6.519718251712159e-07, "loss": 0.31348979473114014, "step": 2478 }, { "epoch": 2.5583075335397316, "grad_norm": 9.056130934377439, "learning_rate": 6.490102216431964e-07, "loss": 0.21359741687774658, "step": 2479 }, { "epoch": 2.559339525283798, "grad_norm": 8.46713170840959, "learning_rate": 6.460548930941801e-07, "loss": 0.1758163869380951, "step": 2480 }, { "epoch": 2.560371517027864, "grad_norm": 8.635977324174299, "learning_rate": 6.431058437863269e-07, "loss": 0.2282043695449829, "step": 2481 }, { "epoch": 2.56140350877193, "grad_norm": 17.59283103512317, "learning_rate": 6.401630779727453e-07, "loss": 1.196661114692688, "step": 2482 }, { "epoch": 2.562435500515996, "grad_norm": 15.598402324436318, "learning_rate": 6.372265998974797e-07, "loss": 0.5663949847221375, "step": 2483 }, { "epoch": 2.5634674922600618, "grad_norm": 8.688801397974835, "learning_rate": 6.342964137955071e-07, "loss": 0.29304569959640503, "step": 2484 }, { "epoch": 2.564499484004128, "grad_norm": 11.251060722008908, "learning_rate": 6.313725238927271e-07, "loss": 0.4160574972629547, "step": 2485 }, { "epoch": 2.565531475748194, "grad_norm": 12.134033946614794, "learning_rate": 6.28454934405962e-07, "loss": 0.7963811159133911, "step": 2486 }, { "epoch": 2.56656346749226, "grad_norm": 9.34676118545788, "learning_rate": 6.255436495429478e-07, "loss": 0.2370861917734146, "step": 2487 }, { "epoch": 2.567595459236326, "grad_norm": 12.439532661969023, "learning_rate": 6.226386735023271e-07, "loss": 0.3600406348705292, "step": 2488 }, { "epoch": 2.568627450980392, "grad_norm": 11.858949836866532, "learning_rate": 6.197400104736439e-07, "loss": 0.4274609088897705, "step": 2489 }, { "epoch": 2.569659442724458, "grad_norm": 7.485843655541987, "learning_rate": 6.168476646373372e-07, "loss": 0.35645705461502075, "step": 2490 }, { "epoch": 2.5706914344685243, "grad_norm": 10.876910518645126, "learning_rate": 6.139616401647364e-07, "loss": 0.658128559589386, "step": 2491 }, { "epoch": 2.57172342621259, "grad_norm": 11.220677018841794, "learning_rate": 6.110819412180535e-07, "loss": 0.28535163402557373, "step": 2492 }, { "epoch": 2.5727554179566563, "grad_norm": 8.449879516914931, "learning_rate": 6.082085719503788e-07, "loss": 0.2511385679244995, "step": 2493 }, { "epoch": 2.5737874097007225, "grad_norm": 9.277463295533472, "learning_rate": 6.053415365056731e-07, "loss": 0.48958778381347656, "step": 2494 }, { "epoch": 2.5748194014447883, "grad_norm": 11.781584043356633, "learning_rate": 6.02480839018762e-07, "loss": 0.39071589708328247, "step": 2495 }, { "epoch": 2.5758513931888545, "grad_norm": 10.71000621503098, "learning_rate": 5.99626483615331e-07, "loss": 0.37455040216445923, "step": 2496 }, { "epoch": 2.5768833849329207, "grad_norm": 9.84869526655172, "learning_rate": 5.967784744119204e-07, "loss": 0.5807996988296509, "step": 2497 }, { "epoch": 2.5779153766769864, "grad_norm": 13.933072369238419, "learning_rate": 5.939368155159164e-07, "loss": 0.25031518936157227, "step": 2498 }, { "epoch": 2.5789473684210527, "grad_norm": 8.586408084297304, "learning_rate": 5.911015110255492e-07, "loss": 0.4800521731376648, "step": 2499 }, { "epoch": 2.579979360165119, "grad_norm": 8.09655300736004, "learning_rate": 5.882725650298787e-07, "loss": 0.20928475260734558, "step": 2500 }, { "epoch": 2.5810113519091846, "grad_norm": 14.6859228627622, "learning_rate": 5.854499816088027e-07, "loss": 0.39752423763275146, "step": 2501 }, { "epoch": 2.582043343653251, "grad_norm": 12.158917900342898, "learning_rate": 5.826337648330377e-07, "loss": 0.4349973797798157, "step": 2502 }, { "epoch": 2.583075335397317, "grad_norm": 9.681776111710935, "learning_rate": 5.798239187641208e-07, "loss": 0.45683524012565613, "step": 2503 }, { "epoch": 2.584107327141383, "grad_norm": 8.787952484909313, "learning_rate": 5.770204474543978e-07, "loss": 0.3035852313041687, "step": 2504 }, { "epoch": 2.585139318885449, "grad_norm": 6.630879856298401, "learning_rate": 5.742233549470239e-07, "loss": 0.3834453225135803, "step": 2505 }, { "epoch": 2.586171310629515, "grad_norm": 8.001953975708291, "learning_rate": 5.71432645275955e-07, "loss": 0.8652332425117493, "step": 2506 }, { "epoch": 2.587203302373581, "grad_norm": 18.533290450557388, "learning_rate": 5.6864832246594e-07, "loss": 0.4923500120639801, "step": 2507 }, { "epoch": 2.588235294117647, "grad_norm": 6.044956146468534, "learning_rate": 5.658703905325186e-07, "loss": 0.3429776430130005, "step": 2508 }, { "epoch": 2.589267285861713, "grad_norm": 12.613913614264515, "learning_rate": 5.630988534820097e-07, "loss": 0.34255480766296387, "step": 2509 }, { "epoch": 2.590299277605779, "grad_norm": 9.33459919728904, "learning_rate": 5.603337153115145e-07, "loss": 0.2871645390987396, "step": 2510 }, { "epoch": 2.5913312693498454, "grad_norm": 8.145714999328998, "learning_rate": 5.575749800089036e-07, "loss": 0.2089812457561493, "step": 2511 }, { "epoch": 2.592363261093911, "grad_norm": 8.6645424037609, "learning_rate": 5.548226515528133e-07, "loss": 0.505244255065918, "step": 2512 }, { "epoch": 2.5933952528379773, "grad_norm": 7.017950431806059, "learning_rate": 5.520767339126398e-07, "loss": 0.23949137330055237, "step": 2513 }, { "epoch": 2.594427244582043, "grad_norm": 9.882265890708554, "learning_rate": 5.493372310485329e-07, "loss": 0.4443948268890381, "step": 2514 }, { "epoch": 2.5954592363261093, "grad_norm": 10.91476539486523, "learning_rate": 5.466041469113925e-07, "loss": 0.5866333246231079, "step": 2515 }, { "epoch": 2.5964912280701755, "grad_norm": 13.318863757727463, "learning_rate": 5.438774854428614e-07, "loss": 0.330798864364624, "step": 2516 }, { "epoch": 2.5975232198142413, "grad_norm": 10.171733346667224, "learning_rate": 5.411572505753193e-07, "loss": 0.43884724378585815, "step": 2517 }, { "epoch": 2.5985552115583075, "grad_norm": 11.893739777185429, "learning_rate": 5.384434462318778e-07, "loss": 1.344459891319275, "step": 2518 }, { "epoch": 2.5995872033023737, "grad_norm": 16.36984848554477, "learning_rate": 5.357360763263713e-07, "loss": 0.9081894755363464, "step": 2519 }, { "epoch": 2.6006191950464395, "grad_norm": 14.972619594796644, "learning_rate": 5.330351447633603e-07, "loss": 0.2988734841346741, "step": 2520 }, { "epoch": 2.6016511867905057, "grad_norm": 9.481854122496655, "learning_rate": 5.303406554381157e-07, "loss": 0.6907855868339539, "step": 2521 }, { "epoch": 2.602683178534572, "grad_norm": 8.798461888110825, "learning_rate": 5.276526122366194e-07, "loss": 0.9872853755950928, "step": 2522 }, { "epoch": 2.6037151702786376, "grad_norm": 11.330081466508329, "learning_rate": 5.249710190355545e-07, "loss": 0.5052847266197205, "step": 2523 }, { "epoch": 2.604747162022704, "grad_norm": 16.83636731445847, "learning_rate": 5.222958797023036e-07, "loss": 0.49554190039634705, "step": 2524 }, { "epoch": 2.60577915376677, "grad_norm": 5.684493515055032, "learning_rate": 5.196271980949419e-07, "loss": 0.15073411166667938, "step": 2525 }, { "epoch": 2.606811145510836, "grad_norm": 10.619974690207405, "learning_rate": 5.169649780622304e-07, "loss": 0.40889978408813477, "step": 2526 }, { "epoch": 2.607843137254902, "grad_norm": 6.075467322496469, "learning_rate": 5.143092234436125e-07, "loss": 0.18674173951148987, "step": 2527 }, { "epoch": 2.6088751289989682, "grad_norm": 14.683336159569796, "learning_rate": 5.11659938069205e-07, "loss": 0.8641536235809326, "step": 2528 }, { "epoch": 2.609907120743034, "grad_norm": 8.275272204780919, "learning_rate": 5.090171257597948e-07, "loss": 0.2988804280757904, "step": 2529 }, { "epoch": 2.6109391124871, "grad_norm": 7.632913465500986, "learning_rate": 5.06380790326837e-07, "loss": 0.18241772055625916, "step": 2530 }, { "epoch": 2.6119711042311664, "grad_norm": 8.976502679663055, "learning_rate": 5.037509355724429e-07, "loss": 0.40648674964904785, "step": 2531 }, { "epoch": 2.613003095975232, "grad_norm": 12.031314667668429, "learning_rate": 5.011275652893782e-07, "loss": 0.4976109564304352, "step": 2532 }, { "epoch": 2.6140350877192984, "grad_norm": 10.559616396859743, "learning_rate": 4.985106832610553e-07, "loss": 0.29959365725517273, "step": 2533 }, { "epoch": 2.615067079463364, "grad_norm": 10.244172209304915, "learning_rate": 4.959002932615303e-07, "loss": 0.5306459665298462, "step": 2534 }, { "epoch": 2.6160990712074303, "grad_norm": 11.724042807932554, "learning_rate": 4.932963990554974e-07, "loss": 0.8250664472579956, "step": 2535 }, { "epoch": 2.617131062951496, "grad_norm": 10.10555559418158, "learning_rate": 4.906990043982813e-07, "loss": 0.23426461219787598, "step": 2536 }, { "epoch": 2.6181630546955623, "grad_norm": 10.423993619036137, "learning_rate": 4.881081130358345e-07, "loss": 0.523322582244873, "step": 2537 }, { "epoch": 2.6191950464396285, "grad_norm": 14.41673356600909, "learning_rate": 4.855237287047265e-07, "loss": 0.2107163816690445, "step": 2538 }, { "epoch": 2.6202270381836943, "grad_norm": 13.693033367636005, "learning_rate": 4.829458551321492e-07, "loss": 0.41339415311813354, "step": 2539 }, { "epoch": 2.6212590299277605, "grad_norm": 10.235350260735638, "learning_rate": 4.803744960358992e-07, "loss": 0.3401373028755188, "step": 2540 }, { "epoch": 2.6222910216718267, "grad_norm": 10.602668549796832, "learning_rate": 4.77809655124381e-07, "loss": 0.33104604482650757, "step": 2541 }, { "epoch": 2.6233230134158925, "grad_norm": 9.639984300173323, "learning_rate": 4.752513360965949e-07, "loss": 0.38588908314704895, "step": 2542 }, { "epoch": 2.6243550051599587, "grad_norm": 18.481304271361157, "learning_rate": 4.7269954264213935e-07, "loss": 0.5953255891799927, "step": 2543 }, { "epoch": 2.625386996904025, "grad_norm": 12.791025971074705, "learning_rate": 4.701542784411994e-07, "loss": 0.28090372681617737, "step": 2544 }, { "epoch": 2.6264189886480906, "grad_norm": 14.954317766614954, "learning_rate": 4.676155471645444e-07, "loss": 0.331378698348999, "step": 2545 }, { "epoch": 2.627450980392157, "grad_norm": 12.171313532906659, "learning_rate": 4.650833524735232e-07, "loss": 0.5808216333389282, "step": 2546 }, { "epoch": 2.628482972136223, "grad_norm": 6.875020329613904, "learning_rate": 4.6255769802005414e-07, "loss": 0.32516252994537354, "step": 2547 }, { "epoch": 2.629514963880289, "grad_norm": 9.99065210903672, "learning_rate": 4.6003858744662564e-07, "loss": 0.327812135219574, "step": 2548 }, { "epoch": 2.630546955624355, "grad_norm": 9.40256075754474, "learning_rate": 4.5752602438628945e-07, "loss": 0.268187016248703, "step": 2549 }, { "epoch": 2.6315789473684212, "grad_norm": 11.367727046797691, "learning_rate": 4.5502001246265416e-07, "loss": 0.7534793615341187, "step": 2550 }, { "epoch": 2.632610939112487, "grad_norm": 16.325306855068217, "learning_rate": 4.5252055528987647e-07, "loss": 0.7205969095230103, "step": 2551 }, { "epoch": 2.633642930856553, "grad_norm": 11.0242810820087, "learning_rate": 4.500276564726652e-07, "loss": 0.6006823778152466, "step": 2552 }, { "epoch": 2.6346749226006194, "grad_norm": 9.32383878761997, "learning_rate": 4.4754131960626777e-07, "loss": 0.4036358594894409, "step": 2553 }, { "epoch": 2.635706914344685, "grad_norm": 10.861707031976824, "learning_rate": 4.4506154827646917e-07, "loss": 0.35124677419662476, "step": 2554 }, { "epoch": 2.6367389060887514, "grad_norm": 20.241972042836696, "learning_rate": 4.4258834605958424e-07, "loss": 2.209158182144165, "step": 2555 }, { "epoch": 2.6377708978328176, "grad_norm": 10.957452091178835, "learning_rate": 4.401217165224564e-07, "loss": 0.4837624430656433, "step": 2556 }, { "epoch": 2.6388028895768834, "grad_norm": 9.37118737601346, "learning_rate": 4.3766166322244505e-07, "loss": 0.31439873576164246, "step": 2557 }, { "epoch": 2.6398348813209496, "grad_norm": 20.670899732864804, "learning_rate": 4.3520818970743174e-07, "loss": 0.4805004596710205, "step": 2558 }, { "epoch": 2.6408668730650153, "grad_norm": 10.230178996368872, "learning_rate": 4.327612995158043e-07, "loss": 0.5170422792434692, "step": 2559 }, { "epoch": 2.6418988648090815, "grad_norm": 11.215599006098603, "learning_rate": 4.3032099617645874e-07, "loss": 0.43596765398979187, "step": 2560 }, { "epoch": 2.6429308565531473, "grad_norm": 8.901642713887572, "learning_rate": 4.2788728320878827e-07, "loss": 0.3309241235256195, "step": 2561 }, { "epoch": 2.6439628482972135, "grad_norm": 9.234533641791927, "learning_rate": 4.254601641226835e-07, "loss": 0.270923912525177, "step": 2562 }, { "epoch": 2.6449948400412797, "grad_norm": 11.873686654548335, "learning_rate": 4.230396424185268e-07, "loss": 0.5812723636627197, "step": 2563 }, { "epoch": 2.6460268317853455, "grad_norm": 9.747512038747255, "learning_rate": 4.2062572158718284e-07, "loss": 0.6770668625831604, "step": 2564 }, { "epoch": 2.6470588235294117, "grad_norm": 7.510586607263941, "learning_rate": 4.1821840510999965e-07, "loss": 0.16891315579414368, "step": 2565 }, { "epoch": 2.648090815273478, "grad_norm": 9.520517790823536, "learning_rate": 4.1581769645879675e-07, "loss": 0.6919821500778198, "step": 2566 }, { "epoch": 2.6491228070175437, "grad_norm": 10.289191172399185, "learning_rate": 4.134235990958668e-07, "loss": 0.5486763715744019, "step": 2567 }, { "epoch": 2.65015479876161, "grad_norm": 11.37490516967252, "learning_rate": 4.1103611647396734e-07, "loss": 0.6272962689399719, "step": 2568 }, { "epoch": 2.651186790505676, "grad_norm": 10.000458568771393, "learning_rate": 4.0865525203631626e-07, "loss": 0.21195337176322937, "step": 2569 }, { "epoch": 2.652218782249742, "grad_norm": 8.026791633329674, "learning_rate": 4.0628100921658475e-07, "loss": 0.2913268208503723, "step": 2570 }, { "epoch": 2.653250773993808, "grad_norm": 8.514124896425205, "learning_rate": 4.039133914388965e-07, "loss": 0.7255011796951294, "step": 2571 }, { "epoch": 2.6542827657378743, "grad_norm": 11.993469857334887, "learning_rate": 4.0155240211781966e-07, "loss": 0.3760417401790619, "step": 2572 }, { "epoch": 2.65531475748194, "grad_norm": 9.849969686885421, "learning_rate": 3.9919804465836263e-07, "loss": 0.3319247364997864, "step": 2573 }, { "epoch": 2.656346749226006, "grad_norm": 11.081626558031994, "learning_rate": 3.9685032245596997e-07, "loss": 0.29489922523498535, "step": 2574 }, { "epoch": 2.6573787409700724, "grad_norm": 20.890441985451485, "learning_rate": 3.9450923889651825e-07, "loss": 1.2522039413452148, "step": 2575 }, { "epoch": 2.658410732714138, "grad_norm": 12.594726781366386, "learning_rate": 3.921747973563056e-07, "loss": 0.252694696187973, "step": 2576 }, { "epoch": 2.6594427244582044, "grad_norm": 10.667974819332423, "learning_rate": 3.8984700120205387e-07, "loss": 0.5963405966758728, "step": 2577 }, { "epoch": 2.6604747162022706, "grad_norm": 11.189244057357962, "learning_rate": 3.875258537909032e-07, "loss": 0.7342553734779358, "step": 2578 }, { "epoch": 2.6615067079463364, "grad_norm": 12.808316425027316, "learning_rate": 3.85211358470402e-07, "loss": 1.8337653875350952, "step": 2579 }, { "epoch": 2.6625386996904026, "grad_norm": 12.695716920942775, "learning_rate": 3.829035185785035e-07, "loss": 0.7166613340377808, "step": 2580 }, { "epoch": 2.663570691434469, "grad_norm": 15.965685815266713, "learning_rate": 3.8060233744356634e-07, "loss": 0.3166029751300812, "step": 2581 }, { "epoch": 2.6646026831785345, "grad_norm": 15.228449537881858, "learning_rate": 3.783078183843436e-07, "loss": 0.6232759952545166, "step": 2582 }, { "epoch": 2.6656346749226008, "grad_norm": 10.586080227145011, "learning_rate": 3.7601996470998156e-07, "loss": 0.2533762454986572, "step": 2583 }, { "epoch": 2.6666666666666665, "grad_norm": 13.97178141622236, "learning_rate": 3.737387797200126e-07, "loss": 0.49488574266433716, "step": 2584 }, { "epoch": 2.6676986584107327, "grad_norm": 13.599094780061206, "learning_rate": 3.7146426670435166e-07, "loss": 0.8465943932533264, "step": 2585 }, { "epoch": 2.6687306501547985, "grad_norm": 8.733972776218572, "learning_rate": 3.691964289432914e-07, "loss": 0.31913653016090393, "step": 2586 }, { "epoch": 2.6697626418988647, "grad_norm": 41.93487863173271, "learning_rate": 3.669352697074996e-07, "loss": 1.0798392295837402, "step": 2587 }, { "epoch": 2.670794633642931, "grad_norm": 20.657609086193606, "learning_rate": 3.646807922580098e-07, "loss": 1.051876187324524, "step": 2588 }, { "epoch": 2.6718266253869967, "grad_norm": 7.488384625739389, "learning_rate": 3.624329998462189e-07, "loss": 0.4441087543964386, "step": 2589 }, { "epoch": 2.672858617131063, "grad_norm": 9.081245894719912, "learning_rate": 3.6019189571388444e-07, "loss": 0.5778773427009583, "step": 2590 }, { "epoch": 2.673890608875129, "grad_norm": 9.197366691178136, "learning_rate": 3.5795748309311707e-07, "loss": 0.2405458688735962, "step": 2591 }, { "epoch": 2.674922600619195, "grad_norm": 8.672764900791982, "learning_rate": 3.557297652063768e-07, "loss": 0.7079523801803589, "step": 2592 }, { "epoch": 2.675954592363261, "grad_norm": 9.513513997500619, "learning_rate": 3.5350874526646925e-07, "loss": 1.325160264968872, "step": 2593 }, { "epoch": 2.6769865841073273, "grad_norm": 11.620643797708638, "learning_rate": 3.512944264765411e-07, "loss": 0.37012574076652527, "step": 2594 }, { "epoch": 2.678018575851393, "grad_norm": 9.739107116416724, "learning_rate": 3.4908681203007167e-07, "loss": 0.6634089946746826, "step": 2595 }, { "epoch": 2.6790505675954592, "grad_norm": 8.271222786255464, "learning_rate": 3.4688590511087304e-07, "loss": 0.35661011934280396, "step": 2596 }, { "epoch": 2.6800825593395254, "grad_norm": 10.331308569039791, "learning_rate": 3.446917088930851e-07, "loss": 0.3146715760231018, "step": 2597 }, { "epoch": 2.681114551083591, "grad_norm": 13.441579495000436, "learning_rate": 3.4250422654116933e-07, "loss": 0.6467351913452148, "step": 2598 }, { "epoch": 2.6821465428276574, "grad_norm": 8.705423478386244, "learning_rate": 3.40323461209901e-07, "loss": 0.3432140350341797, "step": 2599 }, { "epoch": 2.6831785345717236, "grad_norm": 6.746735612398821, "learning_rate": 3.3814941604437155e-07, "loss": 0.5899496674537659, "step": 2600 }, { "epoch": 2.6842105263157894, "grad_norm": 14.93423327939512, "learning_rate": 3.359820941799796e-07, "loss": 0.68793785572052, "step": 2601 }, { "epoch": 2.6852425180598556, "grad_norm": 8.156724190137599, "learning_rate": 3.338214987424282e-07, "loss": 0.12590919435024261, "step": 2602 }, { "epoch": 2.686274509803922, "grad_norm": 14.12186668178814, "learning_rate": 3.316676328477192e-07, "loss": 0.24262937903404236, "step": 2603 }, { "epoch": 2.6873065015479876, "grad_norm": 13.624667824422568, "learning_rate": 3.2952049960214785e-07, "loss": 0.5101624727249146, "step": 2604 }, { "epoch": 2.6883384932920538, "grad_norm": 9.527792356559328, "learning_rate": 3.273801021023004e-07, "loss": 0.5193660855293274, "step": 2605 }, { "epoch": 2.68937048503612, "grad_norm": 12.691636236212773, "learning_rate": 3.2524644343504887e-07, "loss": 0.729722797870636, "step": 2606 }, { "epoch": 2.6904024767801857, "grad_norm": 10.937206199199641, "learning_rate": 3.231195266775489e-07, "loss": 0.6672416925430298, "step": 2607 }, { "epoch": 2.691434468524252, "grad_norm": 9.946644179075593, "learning_rate": 3.20999354897229e-07, "loss": 0.39457738399505615, "step": 2608 }, { "epoch": 2.6924664602683177, "grad_norm": 5.906614566792187, "learning_rate": 3.1888593115179225e-07, "loss": 0.3601424992084503, "step": 2609 }, { "epoch": 2.693498452012384, "grad_norm": 11.398058075247581, "learning_rate": 3.167792584892093e-07, "loss": 0.5270384550094604, "step": 2610 }, { "epoch": 2.6945304437564497, "grad_norm": 11.726090044245446, "learning_rate": 3.146793399477144e-07, "loss": 0.6068238615989685, "step": 2611 }, { "epoch": 2.695562435500516, "grad_norm": 7.509456538501037, "learning_rate": 3.1258617855580155e-07, "loss": 0.5335989594459534, "step": 2612 }, { "epoch": 2.696594427244582, "grad_norm": 8.602600505566611, "learning_rate": 3.104997773322205e-07, "loss": 0.29156219959259033, "step": 2613 }, { "epoch": 2.697626418988648, "grad_norm": 13.710570424583297, "learning_rate": 3.0842013928596757e-07, "loss": 0.41730475425720215, "step": 2614 }, { "epoch": 2.698658410732714, "grad_norm": 10.720182485615819, "learning_rate": 3.063472674162882e-07, "loss": 0.53395676612854, "step": 2615 }, { "epoch": 2.6996904024767803, "grad_norm": 7.4405866757185715, "learning_rate": 3.0428116471267146e-07, "loss": 0.5421364307403564, "step": 2616 }, { "epoch": 2.700722394220846, "grad_norm": 15.399616022833726, "learning_rate": 3.022218341548422e-07, "loss": 0.39568254351615906, "step": 2617 }, { "epoch": 2.7017543859649122, "grad_norm": 12.618503901643852, "learning_rate": 3.0016927871275524e-07, "loss": 0.4751710593700409, "step": 2618 }, { "epoch": 2.7027863777089784, "grad_norm": 10.459012951475524, "learning_rate": 2.981235013465994e-07, "loss": 0.47635746002197266, "step": 2619 }, { "epoch": 2.703818369453044, "grad_norm": 11.870003226809803, "learning_rate": 2.9608450500678566e-07, "loss": 0.8148362636566162, "step": 2620 }, { "epoch": 2.7048503611971104, "grad_norm": 21.98035525761005, "learning_rate": 2.940522926339462e-07, "loss": 0.28189510107040405, "step": 2621 }, { "epoch": 2.7058823529411766, "grad_norm": 7.909654450133803, "learning_rate": 2.9202686715892934e-07, "loss": 0.3399927020072937, "step": 2622 }, { "epoch": 2.7069143446852424, "grad_norm": 12.964749406405296, "learning_rate": 2.9000823150279355e-07, "loss": 0.411767840385437, "step": 2623 }, { "epoch": 2.7079463364293086, "grad_norm": 8.8267704870169, "learning_rate": 2.879963885768083e-07, "loss": 0.38652342557907104, "step": 2624 }, { "epoch": 2.708978328173375, "grad_norm": 7.294871041892732, "learning_rate": 2.859913412824428e-07, "loss": 0.2980038523674011, "step": 2625 }, { "epoch": 2.7100103199174406, "grad_norm": 10.785565333143657, "learning_rate": 2.839930925113715e-07, "loss": 0.6537376642227173, "step": 2626 }, { "epoch": 2.7110423116615068, "grad_norm": 7.807193554695228, "learning_rate": 2.8200164514545657e-07, "loss": 0.404613733291626, "step": 2627 }, { "epoch": 2.712074303405573, "grad_norm": 8.772347522340448, "learning_rate": 2.800170020567566e-07, "loss": 0.31629127264022827, "step": 2628 }, { "epoch": 2.7131062951496387, "grad_norm": 8.979858832049977, "learning_rate": 2.780391661075155e-07, "loss": 0.38792482018470764, "step": 2629 }, { "epoch": 2.714138286893705, "grad_norm": 15.835606847253056, "learning_rate": 2.760681401501597e-07, "loss": 0.6627403497695923, "step": 2630 }, { "epoch": 2.715170278637771, "grad_norm": 11.307368841720763, "learning_rate": 2.7410392702729495e-07, "loss": 0.6148909330368042, "step": 2631 }, { "epoch": 2.716202270381837, "grad_norm": 9.753722183697175, "learning_rate": 2.721465295716996e-07, "loss": 0.7569953799247742, "step": 2632 }, { "epoch": 2.717234262125903, "grad_norm": 9.776518927595994, "learning_rate": 2.701959506063251e-07, "loss": 1.4264123439788818, "step": 2633 }, { "epoch": 2.718266253869969, "grad_norm": 9.543544280612078, "learning_rate": 2.6825219294428773e-07, "loss": 0.24961692094802856, "step": 2634 }, { "epoch": 2.719298245614035, "grad_norm": 9.719510604440641, "learning_rate": 2.663152593888668e-07, "loss": 0.6841002106666565, "step": 2635 }, { "epoch": 2.720330237358101, "grad_norm": 7.413237897907853, "learning_rate": 2.643851527335006e-07, "loss": 0.29095280170440674, "step": 2636 }, { "epoch": 2.721362229102167, "grad_norm": 10.584843614063363, "learning_rate": 2.624618757617792e-07, "loss": 0.25524187088012695, "step": 2637 }, { "epoch": 2.7223942208462333, "grad_norm": 7.752309826241938, "learning_rate": 2.605454312474448e-07, "loss": 0.19103951752185822, "step": 2638 }, { "epoch": 2.723426212590299, "grad_norm": 10.875904791849495, "learning_rate": 2.586358219543861e-07, "loss": 0.2569884657859802, "step": 2639 }, { "epoch": 2.7244582043343653, "grad_norm": 11.864527583325158, "learning_rate": 2.5673305063663335e-07, "loss": 0.5300930142402649, "step": 2640 }, { "epoch": 2.7254901960784315, "grad_norm": 12.264536840582725, "learning_rate": 2.5483712003835535e-07, "loss": 0.41859257221221924, "step": 2641 }, { "epoch": 2.7265221878224972, "grad_norm": 9.740302429611017, "learning_rate": 2.529480328938549e-07, "loss": 0.2770339548587799, "step": 2642 }, { "epoch": 2.7275541795665634, "grad_norm": 10.099875607541852, "learning_rate": 2.510657919275655e-07, "loss": 0.32811036705970764, "step": 2643 }, { "epoch": 2.7285861713106296, "grad_norm": 12.218220392873288, "learning_rate": 2.4919039985404626e-07, "loss": 0.3892282545566559, "step": 2644 }, { "epoch": 2.7296181630546954, "grad_norm": 16.99587425966017, "learning_rate": 2.4732185937798193e-07, "loss": 0.5392187833786011, "step": 2645 }, { "epoch": 2.7306501547987616, "grad_norm": 10.629289948428452, "learning_rate": 2.4546017319417195e-07, "loss": 0.5916829109191895, "step": 2646 }, { "epoch": 2.731682146542828, "grad_norm": 12.159168968204627, "learning_rate": 2.436053439875319e-07, "loss": 0.49346038699150085, "step": 2647 }, { "epoch": 2.7327141382868936, "grad_norm": 7.354390122040929, "learning_rate": 2.4175737443308976e-07, "loss": 0.17073936760425568, "step": 2648 }, { "epoch": 2.73374613003096, "grad_norm": 11.696783298201472, "learning_rate": 2.399162671959793e-07, "loss": 0.2301475703716278, "step": 2649 }, { "epoch": 2.734778121775026, "grad_norm": 11.353460350336352, "learning_rate": 2.380820249314375e-07, "loss": 0.3915819525718689, "step": 2650 }, { "epoch": 2.7358101135190918, "grad_norm": 14.48128568885365, "learning_rate": 2.3625465028479955e-07, "loss": 0.47442927956581116, "step": 2651 }, { "epoch": 2.736842105263158, "grad_norm": 8.508034736982234, "learning_rate": 2.3443414589149838e-07, "loss": 0.32633233070373535, "step": 2652 }, { "epoch": 2.737874097007224, "grad_norm": 22.605320819509327, "learning_rate": 2.3262051437705768e-07, "loss": 0.3886736035346985, "step": 2653 }, { "epoch": 2.73890608875129, "grad_norm": 15.517220409316668, "learning_rate": 2.3081375835708854e-07, "loss": 0.5935795903205872, "step": 2654 }, { "epoch": 2.739938080495356, "grad_norm": 8.07802463401297, "learning_rate": 2.2901388043728878e-07, "loss": 0.17653796076774597, "step": 2655 }, { "epoch": 2.7409700722394224, "grad_norm": 15.402356283167004, "learning_rate": 2.272208832134326e-07, "loss": 1.0124492645263672, "step": 2656 }, { "epoch": 2.742002063983488, "grad_norm": 9.665398333246364, "learning_rate": 2.254347692713732e-07, "loss": 0.38071686029434204, "step": 2657 }, { "epoch": 2.7430340557275543, "grad_norm": 8.458922156834284, "learning_rate": 2.236555411870378e-07, "loss": 0.28697431087493896, "step": 2658 }, { "epoch": 2.74406604747162, "grad_norm": 10.098512735439094, "learning_rate": 2.218832015264205e-07, "loss": 0.2906286120414734, "step": 2659 }, { "epoch": 2.7450980392156863, "grad_norm": 18.255008975926504, "learning_rate": 2.201177528455828e-07, "loss": 0.6745045781135559, "step": 2660 }, { "epoch": 2.746130030959752, "grad_norm": 9.185863469817656, "learning_rate": 2.183591976906463e-07, "loss": 0.4779427647590637, "step": 2661 }, { "epoch": 2.7471620227038183, "grad_norm": 10.67008278019397, "learning_rate": 2.1660753859779225e-07, "loss": 0.40853461623191833, "step": 2662 }, { "epoch": 2.7481940144478845, "grad_norm": 12.329079381594989, "learning_rate": 2.1486277809325552e-07, "loss": 0.3884234130382538, "step": 2663 }, { "epoch": 2.7492260061919502, "grad_norm": 9.108233903645676, "learning_rate": 2.131249186933243e-07, "loss": 0.7551658153533936, "step": 2664 }, { "epoch": 2.7502579979360164, "grad_norm": 10.067547605251312, "learning_rate": 2.113939629043299e-07, "loss": 0.2735165059566498, "step": 2665 }, { "epoch": 2.7512899896800826, "grad_norm": 9.680391414190792, "learning_rate": 2.0966991322264984e-07, "loss": 0.3912424147129059, "step": 2666 }, { "epoch": 2.7523219814241484, "grad_norm": 11.378370517148733, "learning_rate": 2.0795277213470188e-07, "loss": 0.2264825999736786, "step": 2667 }, { "epoch": 2.7533539731682146, "grad_norm": 21.941128890870687, "learning_rate": 2.0624254211693894e-07, "loss": 0.3313651382923126, "step": 2668 }, { "epoch": 2.754385964912281, "grad_norm": 15.594446975408829, "learning_rate": 2.045392256358486e-07, "loss": 0.58062344789505, "step": 2669 }, { "epoch": 2.7554179566563466, "grad_norm": 12.128011911960778, "learning_rate": 2.0284282514794475e-07, "loss": 0.4429638385772705, "step": 2670 }, { "epoch": 2.756449948400413, "grad_norm": 11.255435272305718, "learning_rate": 2.0115334309977085e-07, "loss": 0.2681140899658203, "step": 2671 }, { "epoch": 2.757481940144479, "grad_norm": 10.030708809594579, "learning_rate": 1.994707819278896e-07, "loss": 0.45492902398109436, "step": 2672 }, { "epoch": 2.7585139318885448, "grad_norm": 14.285857459671988, "learning_rate": 1.9779514405888377e-07, "loss": 0.6418532133102417, "step": 2673 }, { "epoch": 2.759545923632611, "grad_norm": 14.377463040463253, "learning_rate": 1.96126431909352e-07, "loss": 0.411813884973526, "step": 2674 }, { "epoch": 2.760577915376677, "grad_norm": 21.43587231087201, "learning_rate": 1.9446464788590303e-07, "loss": 0.5893786549568176, "step": 2675 }, { "epoch": 2.761609907120743, "grad_norm": 11.225164649348446, "learning_rate": 1.9280979438515479e-07, "loss": 0.620746910572052, "step": 2676 }, { "epoch": 2.762641898864809, "grad_norm": 7.883313488136548, "learning_rate": 1.9116187379373043e-07, "loss": 0.38842883706092834, "step": 2677 }, { "epoch": 2.7636738906088754, "grad_norm": 9.710407091495496, "learning_rate": 1.8952088848825323e-07, "loss": 0.2083079218864441, "step": 2678 }, { "epoch": 2.764705882352941, "grad_norm": 8.553670477437477, "learning_rate": 1.878868408353468e-07, "loss": 0.18988867104053497, "step": 2679 }, { "epoch": 2.7657378740970073, "grad_norm": 9.294659711703655, "learning_rate": 1.8625973319162605e-07, "loss": 0.72264164686203, "step": 2680 }, { "epoch": 2.7667698658410735, "grad_norm": 10.670834169664904, "learning_rate": 1.846395679036994e-07, "loss": 0.4202941656112671, "step": 2681 }, { "epoch": 2.7678018575851393, "grad_norm": 12.53950084393448, "learning_rate": 1.830263473081617e-07, "loss": 0.7468389272689819, "step": 2682 }, { "epoch": 2.7688338493292055, "grad_norm": 14.216952823524933, "learning_rate": 1.8142007373159521e-07, "loss": 0.8521854281425476, "step": 2683 }, { "epoch": 2.7698658410732713, "grad_norm": 12.268212354249279, "learning_rate": 1.7982074949055794e-07, "loss": 0.24861863255500793, "step": 2684 }, { "epoch": 2.7708978328173375, "grad_norm": 5.890931718558767, "learning_rate": 1.7822837689158988e-07, "loss": 0.22426769137382507, "step": 2685 }, { "epoch": 2.7719298245614032, "grad_norm": 11.2628113902164, "learning_rate": 1.7664295823120347e-07, "loss": 0.2600301504135132, "step": 2686 }, { "epoch": 2.7729618163054695, "grad_norm": 9.251502944320855, "learning_rate": 1.7506449579588357e-07, "loss": 0.8601874709129333, "step": 2687 }, { "epoch": 2.7739938080495357, "grad_norm": 9.331768788971495, "learning_rate": 1.7349299186208258e-07, "loss": 0.32815781235694885, "step": 2688 }, { "epoch": 2.7750257997936014, "grad_norm": 10.322044875268688, "learning_rate": 1.7192844869621472e-07, "loss": 0.6990963220596313, "step": 2689 }, { "epoch": 2.7760577915376676, "grad_norm": 10.474923900476831, "learning_rate": 1.7037086855465902e-07, "loss": 0.7018382549285889, "step": 2690 }, { "epoch": 2.777089783281734, "grad_norm": 8.820928293879732, "learning_rate": 1.688202536837502e-07, "loss": 0.30639582872390747, "step": 2691 }, { "epoch": 2.7781217750257996, "grad_norm": 14.004852574406378, "learning_rate": 1.6727660631977894e-07, "loss": 0.5016225576400757, "step": 2692 }, { "epoch": 2.779153766769866, "grad_norm": 11.16794150458729, "learning_rate": 1.6573992868898714e-07, "loss": 0.49680233001708984, "step": 2693 }, { "epoch": 2.780185758513932, "grad_norm": 9.109794510400386, "learning_rate": 1.642102230075643e-07, "loss": 0.44969314336776733, "step": 2694 }, { "epoch": 2.781217750257998, "grad_norm": 9.912484510145516, "learning_rate": 1.6268749148164563e-07, "loss": 0.1183367371559143, "step": 2695 }, { "epoch": 2.782249742002064, "grad_norm": 8.344026321949334, "learning_rate": 1.6117173630730787e-07, "loss": 0.43249958753585815, "step": 2696 }, { "epoch": 2.78328173374613, "grad_norm": 13.014283356582448, "learning_rate": 1.5966295967056676e-07, "loss": 0.20627948641777039, "step": 2697 }, { "epoch": 2.784313725490196, "grad_norm": 9.620985232822974, "learning_rate": 1.5816116374737456e-07, "loss": 0.2142390012741089, "step": 2698 }, { "epoch": 2.785345717234262, "grad_norm": 10.793188846713067, "learning_rate": 1.5666635070361312e-07, "loss": 0.4890708327293396, "step": 2699 }, { "epoch": 2.7863777089783284, "grad_norm": 10.864902239809117, "learning_rate": 1.5517852269509692e-07, "loss": 0.3679332137107849, "step": 2700 }, { "epoch": 2.787409700722394, "grad_norm": 11.946551012179722, "learning_rate": 1.536976818675645e-07, "loss": 0.8791127800941467, "step": 2701 }, { "epoch": 2.7884416924664603, "grad_norm": 15.216087505144005, "learning_rate": 1.5222383035667866e-07, "loss": 0.6431314945220947, "step": 2702 }, { "epoch": 2.7894736842105265, "grad_norm": 11.35163488635866, "learning_rate": 1.5075697028802127e-07, "loss": 0.37764033675193787, "step": 2703 }, { "epoch": 2.7905056759545923, "grad_norm": 9.918993023831883, "learning_rate": 1.492971037770924e-07, "loss": 0.18792365491390228, "step": 2704 }, { "epoch": 2.7915376676986585, "grad_norm": 9.27459483814883, "learning_rate": 1.4784423292930505e-07, "loss": 0.17031517624855042, "step": 2705 }, { "epoch": 2.7925696594427247, "grad_norm": 11.747999342453264, "learning_rate": 1.463983598399832e-07, "loss": 0.6868878602981567, "step": 2706 }, { "epoch": 2.7936016511867905, "grad_norm": 13.424203722464256, "learning_rate": 1.4495948659435932e-07, "loss": 0.5683586597442627, "step": 2707 }, { "epoch": 2.7946336429308567, "grad_norm": 11.909445878210795, "learning_rate": 1.435276152675691e-07, "loss": 0.726739227771759, "step": 2708 }, { "epoch": 2.7956656346749225, "grad_norm": 10.0878594595676, "learning_rate": 1.4210274792465284e-07, "loss": 0.3911072015762329, "step": 2709 }, { "epoch": 2.7966976264189887, "grad_norm": 13.466899644240698, "learning_rate": 1.4068488662054735e-07, "loss": 0.34685128927230835, "step": 2710 }, { "epoch": 2.7977296181630544, "grad_norm": 13.176249426431637, "learning_rate": 1.3927403340008582e-07, "loss": 0.37769877910614014, "step": 2711 }, { "epoch": 2.7987616099071206, "grad_norm": 17.08716499957554, "learning_rate": 1.378701902979962e-07, "loss": 0.32527002692222595, "step": 2712 }, { "epoch": 2.799793601651187, "grad_norm": 10.11939347764081, "learning_rate": 1.364733593388934e-07, "loss": 0.15866559743881226, "step": 2713 }, { "epoch": 2.8008255933952526, "grad_norm": 10.646998032475292, "learning_rate": 1.3508354253728205e-07, "loss": 0.27378302812576294, "step": 2714 }, { "epoch": 2.801857585139319, "grad_norm": 16.93448402712155, "learning_rate": 1.3370074189755e-07, "loss": 0.41292816400527954, "step": 2715 }, { "epoch": 2.802889576883385, "grad_norm": 16.01106639724304, "learning_rate": 1.323249594139664e-07, "loss": 0.5351982712745667, "step": 2716 }, { "epoch": 2.803921568627451, "grad_norm": 11.68268860833618, "learning_rate": 1.3095619707067963e-07, "loss": 0.6094648241996765, "step": 2717 }, { "epoch": 2.804953560371517, "grad_norm": 14.556869333459348, "learning_rate": 1.2959445684171123e-07, "loss": 0.4445239305496216, "step": 2718 }, { "epoch": 2.805985552115583, "grad_norm": 10.363256530414061, "learning_rate": 1.2823974069095802e-07, "loss": 1.329785704612732, "step": 2719 }, { "epoch": 2.807017543859649, "grad_norm": 9.19755960663621, "learning_rate": 1.2689205057218602e-07, "loss": 0.22288936376571655, "step": 2720 }, { "epoch": 2.808049535603715, "grad_norm": 11.178463438858222, "learning_rate": 1.2555138842902826e-07, "loss": 0.2416641116142273, "step": 2721 }, { "epoch": 2.8090815273477814, "grad_norm": 8.724096486816755, "learning_rate": 1.24217756194982e-07, "loss": 0.3657599687576294, "step": 2722 }, { "epoch": 2.810113519091847, "grad_norm": 9.987512768985848, "learning_rate": 1.2289115579340538e-07, "loss": 0.27216145396232605, "step": 2723 }, { "epoch": 2.8111455108359134, "grad_norm": 12.572919253248884, "learning_rate": 1.2157158913751687e-07, "loss": 0.5774905681610107, "step": 2724 }, { "epoch": 2.8121775025799796, "grad_norm": 13.113996940511523, "learning_rate": 1.2025905813038917e-07, "loss": 0.3477795124053955, "step": 2725 }, { "epoch": 2.8132094943240453, "grad_norm": 11.919438155728603, "learning_rate": 1.1895356466494978e-07, "loss": 0.8780714869499207, "step": 2726 }, { "epoch": 2.8142414860681115, "grad_norm": 10.355575005545365, "learning_rate": 1.1765511062397483e-07, "loss": 0.7887623310089111, "step": 2727 }, { "epoch": 2.8152734778121777, "grad_norm": 13.82676737829339, "learning_rate": 1.1636369788008973e-07, "loss": 0.6320158839225769, "step": 2728 }, { "epoch": 2.8163054695562435, "grad_norm": 11.183921843773033, "learning_rate": 1.1507932829576407e-07, "loss": 0.43458500504493713, "step": 2729 }, { "epoch": 2.8173374613003097, "grad_norm": 11.564870667791334, "learning_rate": 1.1380200372331063e-07, "loss": 0.5087906718254089, "step": 2730 }, { "epoch": 2.818369453044376, "grad_norm": 13.832341375909081, "learning_rate": 1.1253172600488083e-07, "loss": 0.3639557659626007, "step": 2731 }, { "epoch": 2.8194014447884417, "grad_norm": 12.750370779773132, "learning_rate": 1.1126849697246533e-07, "loss": 0.32219162583351135, "step": 2732 }, { "epoch": 2.820433436532508, "grad_norm": 15.083575215367821, "learning_rate": 1.1001231844788574e-07, "loss": 0.5374658107757568, "step": 2733 }, { "epoch": 2.8214654282765737, "grad_norm": 11.60486251326375, "learning_rate": 1.0876319224279896e-07, "loss": 0.2404775768518448, "step": 2734 }, { "epoch": 2.82249742002064, "grad_norm": 8.905506133399465, "learning_rate": 1.0752112015868843e-07, "loss": 0.596005380153656, "step": 2735 }, { "epoch": 2.8235294117647056, "grad_norm": 18.932193446521126, "learning_rate": 1.0628610398686679e-07, "loss": 1.1185606718063354, "step": 2736 }, { "epoch": 2.824561403508772, "grad_norm": 10.874924961588212, "learning_rate": 1.0505814550846705e-07, "loss": 0.19293265044689178, "step": 2737 }, { "epoch": 2.825593395252838, "grad_norm": 7.561813823776279, "learning_rate": 1.0383724649444704e-07, "loss": 0.29149097204208374, "step": 2738 }, { "epoch": 2.826625386996904, "grad_norm": 9.496734760649641, "learning_rate": 1.0262340870558162e-07, "loss": 0.3492857813835144, "step": 2739 }, { "epoch": 2.82765737874097, "grad_norm": 14.643825367827347, "learning_rate": 1.014166338924627e-07, "loss": 0.3620374798774719, "step": 2740 }, { "epoch": 2.828689370485036, "grad_norm": 13.630359044704328, "learning_rate": 1.0021692379549585e-07, "loss": 1.076962947845459, "step": 2741 }, { "epoch": 2.829721362229102, "grad_norm": 8.35946293486137, "learning_rate": 9.902428014489762e-08, "loss": 0.445547878742218, "step": 2742 }, { "epoch": 2.830753353973168, "grad_norm": 10.391459307268816, "learning_rate": 9.783870466069433e-08, "loss": 0.39448314905166626, "step": 2743 }, { "epoch": 2.8317853457172344, "grad_norm": 13.087674033489241, "learning_rate": 9.666019905271662e-08, "loss": 0.6303298473358154, "step": 2744 }, { "epoch": 2.8328173374613, "grad_norm": 21.531957553952452, "learning_rate": 9.548876502060211e-08, "loss": 0.9480113387107849, "step": 2745 }, { "epoch": 2.8338493292053664, "grad_norm": 9.911494826358105, "learning_rate": 9.432440425378664e-08, "loss": 0.6038950085639954, "step": 2746 }, { "epoch": 2.8348813209494326, "grad_norm": 8.33541902845192, "learning_rate": 9.316711843150638e-08, "loss": 0.2861171066761017, "step": 2747 }, { "epoch": 2.8359133126934983, "grad_norm": 14.31287728036117, "learning_rate": 9.201690922279405e-08, "loss": 0.38812384009361267, "step": 2748 }, { "epoch": 2.8369453044375645, "grad_norm": 13.492626654791492, "learning_rate": 9.087377828647714e-08, "loss": 0.4195922613143921, "step": 2749 }, { "epoch": 2.8379772961816307, "grad_norm": 8.52565604085448, "learning_rate": 8.973772727117358e-08, "loss": 0.3792739808559418, "step": 2750 }, { "epoch": 2.8390092879256965, "grad_norm": 9.66940222976016, "learning_rate": 8.860875781529222e-08, "loss": 0.4448316693305969, "step": 2751 }, { "epoch": 2.8400412796697627, "grad_norm": 14.998287535953239, "learning_rate": 8.748687154702673e-08, "loss": 0.5362205505371094, "step": 2752 }, { "epoch": 2.841073271413829, "grad_norm": 7.2640553996033885, "learning_rate": 8.637207008435788e-08, "loss": 0.27778613567352295, "step": 2753 }, { "epoch": 2.8421052631578947, "grad_norm": 7.803788163827977, "learning_rate": 8.526435503504737e-08, "loss": 0.23669756948947906, "step": 2754 }, { "epoch": 2.843137254901961, "grad_norm": 33.20287493265629, "learning_rate": 8.416372799663674e-08, "loss": 1.3798854351043701, "step": 2755 }, { "epoch": 2.844169246646027, "grad_norm": 13.719617537778563, "learning_rate": 8.307019055644517e-08, "loss": 0.6489579677581787, "step": 2756 }, { "epoch": 2.845201238390093, "grad_norm": 12.182729633208371, "learning_rate": 8.198374429156886e-08, "loss": 0.501702606678009, "step": 2757 }, { "epoch": 2.846233230134159, "grad_norm": 11.014775544350565, "learning_rate": 8.090439076887557e-08, "loss": 0.6627386808395386, "step": 2758 }, { "epoch": 2.847265221878225, "grad_norm": 11.52891371708018, "learning_rate": 7.983213154500402e-08, "loss": 0.5250353813171387, "step": 2759 }, { "epoch": 2.848297213622291, "grad_norm": 10.62518915136225, "learning_rate": 7.876696816636276e-08, "loss": 0.19456374645233154, "step": 2760 }, { "epoch": 2.849329205366357, "grad_norm": 20.732355197414126, "learning_rate": 7.770890216912463e-08, "loss": 0.6740440130233765, "step": 2761 }, { "epoch": 2.850361197110423, "grad_norm": 9.981363789805053, "learning_rate": 7.665793507922903e-08, "loss": 0.417470782995224, "step": 2762 }, { "epoch": 2.8513931888544892, "grad_norm": 10.2390899038885, "learning_rate": 7.561406841237573e-08, "loss": 0.8772274255752563, "step": 2763 }, { "epoch": 2.852425180598555, "grad_norm": 10.049760714103, "learning_rate": 7.45773036740255e-08, "loss": 0.6738741397857666, "step": 2764 }, { "epoch": 2.853457172342621, "grad_norm": 11.496249432336205, "learning_rate": 7.354764235939505e-08, "loss": 0.24579696357250214, "step": 2765 }, { "epoch": 2.8544891640866874, "grad_norm": 11.221020100326134, "learning_rate": 7.252508595345765e-08, "loss": 0.3331579864025116, "step": 2766 }, { "epoch": 2.855521155830753, "grad_norm": 12.112232013450475, "learning_rate": 7.150963593094029e-08, "loss": 0.7456363439559937, "step": 2767 }, { "epoch": 2.8565531475748194, "grad_norm": 10.205895852941165, "learning_rate": 7.050129375632098e-08, "loss": 0.2590240240097046, "step": 2768 }, { "epoch": 2.8575851393188856, "grad_norm": 9.005970205024083, "learning_rate": 6.950006088382533e-08, "loss": 0.31267833709716797, "step": 2769 }, { "epoch": 2.8586171310629513, "grad_norm": 6.844001294261074, "learning_rate": 6.850593875742827e-08, "loss": 0.3022312819957733, "step": 2770 }, { "epoch": 2.8596491228070176, "grad_norm": 8.123381166455903, "learning_rate": 6.751892881084853e-08, "loss": 0.31355875730514526, "step": 2771 }, { "epoch": 2.8606811145510838, "grad_norm": 9.43281052399945, "learning_rate": 6.65390324675469e-08, "loss": 0.20084381103515625, "step": 2772 }, { "epoch": 2.8617131062951495, "grad_norm": 17.440180275290196, "learning_rate": 6.556625114072623e-08, "loss": 0.4890974760055542, "step": 2773 }, { "epoch": 2.8627450980392157, "grad_norm": 10.496821025738013, "learning_rate": 6.460058623332766e-08, "loss": 0.5973390340805054, "step": 2774 }, { "epoch": 2.863777089783282, "grad_norm": 26.173617084616886, "learning_rate": 6.364203913802824e-08, "loss": 0.5038038492202759, "step": 2775 }, { "epoch": 2.8648090815273477, "grad_norm": 18.962199958622488, "learning_rate": 6.269061123724163e-08, "loss": 0.7357000708580017, "step": 2776 }, { "epoch": 2.865841073271414, "grad_norm": 8.419033596286154, "learning_rate": 6.174630390311242e-08, "loss": 0.31707966327667236, "step": 2777 }, { "epoch": 2.86687306501548, "grad_norm": 11.399487419360666, "learning_rate": 6.080911849751681e-08, "loss": 0.5759315490722656, "step": 2778 }, { "epoch": 2.867905056759546, "grad_norm": 13.696280633619493, "learning_rate": 5.987905637206026e-08, "loss": 0.4270392060279846, "step": 2779 }, { "epoch": 2.868937048503612, "grad_norm": 8.507421922322656, "learning_rate": 5.895611886807317e-08, "loss": 0.40549927949905396, "step": 2780 }, { "epoch": 2.8699690402476783, "grad_norm": 10.040991890497093, "learning_rate": 5.804030731661303e-08, "loss": 0.8265923261642456, "step": 2781 }, { "epoch": 2.871001031991744, "grad_norm": 10.531228237547275, "learning_rate": 5.713162303845887e-08, "loss": 0.5311161279678345, "step": 2782 }, { "epoch": 2.8720330237358103, "grad_norm": 6.474633748136652, "learning_rate": 5.623006734411185e-08, "loss": 0.4658339321613312, "step": 2783 }, { "epoch": 2.873065015479876, "grad_norm": 13.523958814362693, "learning_rate": 5.533564153379134e-08, "loss": 0.641715407371521, "step": 2784 }, { "epoch": 2.8740970072239422, "grad_norm": 12.74107558480464, "learning_rate": 5.444834689743439e-08, "loss": 0.5655953288078308, "step": 2785 }, { "epoch": 2.875128998968008, "grad_norm": 8.181066484517263, "learning_rate": 5.356818471469405e-08, "loss": 0.29476261138916016, "step": 2786 }, { "epoch": 2.876160990712074, "grad_norm": 16.023616527724055, "learning_rate": 5.269515625493549e-08, "loss": 0.3077610433101654, "step": 2787 }, { "epoch": 2.8771929824561404, "grad_norm": 9.146095009743794, "learning_rate": 5.182926277723821e-08, "loss": 0.5303142666816711, "step": 2788 }, { "epoch": 2.878224974200206, "grad_norm": 7.830994742973659, "learning_rate": 5.097050553038829e-08, "loss": 0.3145953416824341, "step": 2789 }, { "epoch": 2.8792569659442724, "grad_norm": 12.407694830029483, "learning_rate": 5.0118885752883376e-08, "loss": 0.5515159368515015, "step": 2790 }, { "epoch": 2.8802889576883386, "grad_norm": 11.21357136261069, "learning_rate": 4.927440467292488e-08, "loss": 0.41922271251678467, "step": 2791 }, { "epoch": 2.8813209494324044, "grad_norm": 10.70014876987049, "learning_rate": 4.843706350842081e-08, "loss": 0.23146328330039978, "step": 2792 }, { "epoch": 2.8823529411764706, "grad_norm": 12.729873312487426, "learning_rate": 4.760686346698018e-08, "loss": 1.2350009679794312, "step": 2793 }, { "epoch": 2.8833849329205368, "grad_norm": 10.327084671696761, "learning_rate": 4.678380574591357e-08, "loss": 0.33487558364868164, "step": 2794 }, { "epoch": 2.8844169246646025, "grad_norm": 14.020291391129224, "learning_rate": 4.596789153223258e-08, "loss": 0.7674515247344971, "step": 2795 }, { "epoch": 2.8854489164086687, "grad_norm": 9.236506031541138, "learning_rate": 4.515912200264427e-08, "loss": 0.4201269745826721, "step": 2796 }, { "epoch": 2.886480908152735, "grad_norm": 11.388103105456102, "learning_rate": 4.43574983235534e-08, "loss": 0.35071414709091187, "step": 2797 }, { "epoch": 2.8875128998968007, "grad_norm": 11.519886506074421, "learning_rate": 4.356302165105741e-08, "loss": 0.4228086471557617, "step": 2798 }, { "epoch": 2.888544891640867, "grad_norm": 10.920468331420269, "learning_rate": 4.2775693130948094e-08, "loss": 0.4655839204788208, "step": 2799 }, { "epoch": 2.889576883384933, "grad_norm": 11.301364218639682, "learning_rate": 4.19955138987066e-08, "loss": 0.44735416769981384, "step": 2800 }, { "epoch": 2.890608875128999, "grad_norm": 17.013383304560165, "learning_rate": 4.122248507950399e-08, "loss": 0.8772158622741699, "step": 2801 }, { "epoch": 2.891640866873065, "grad_norm": 11.977124328109158, "learning_rate": 4.045660778820015e-08, "loss": 0.2992047369480133, "step": 2802 }, { "epoch": 2.8926728586171313, "grad_norm": 13.724193152652521, "learning_rate": 3.9697883129338756e-08, "loss": 0.29466360807418823, "step": 2803 }, { "epoch": 2.893704850361197, "grad_norm": 11.254521680601806, "learning_rate": 3.894631219715006e-08, "loss": 0.21574847400188446, "step": 2804 }, { "epoch": 2.8947368421052633, "grad_norm": 10.296319023427813, "learning_rate": 3.820189607554647e-08, "loss": 0.3829636871814728, "step": 2805 }, { "epoch": 2.8957688338493295, "grad_norm": 8.752116369115315, "learning_rate": 3.746463583812143e-08, "loss": 0.5248335599899292, "step": 2806 }, { "epoch": 2.8968008255933952, "grad_norm": 10.458602320218011, "learning_rate": 3.6734532548149405e-08, "loss": 0.42929303646087646, "step": 2807 }, { "epoch": 2.8978328173374615, "grad_norm": 13.063885625069268, "learning_rate": 3.601158725858034e-08, "loss": 0.19391657412052155, "step": 2808 }, { "epoch": 2.898864809081527, "grad_norm": 11.833405438652617, "learning_rate": 3.529580101204466e-08, "loss": 0.4269029498100281, "step": 2809 }, { "epoch": 2.8998968008255934, "grad_norm": 14.210076402805809, "learning_rate": 3.458717484084606e-08, "loss": 0.30730894207954407, "step": 2810 }, { "epoch": 2.900928792569659, "grad_norm": 12.170748254840527, "learning_rate": 3.3885709766962036e-08, "loss": 0.6975186467170715, "step": 2811 }, { "epoch": 2.9019607843137254, "grad_norm": 10.836316553045704, "learning_rate": 3.3191406802041693e-08, "loss": 0.3806256651878357, "step": 2812 }, { "epoch": 2.9029927760577916, "grad_norm": 9.015789403322383, "learning_rate": 3.2504266947406824e-08, "loss": 0.29094287753105164, "step": 2813 }, { "epoch": 2.9040247678018574, "grad_norm": 10.27225739186077, "learning_rate": 3.1824291194046954e-08, "loss": 0.4287331700325012, "step": 2814 }, { "epoch": 2.9050567595459236, "grad_norm": 10.577320468383352, "learning_rate": 3.11514805226204e-08, "loss": 0.982883095741272, "step": 2815 }, { "epoch": 2.90608875128999, "grad_norm": 12.498037060798291, "learning_rate": 3.048583590345266e-08, "loss": 0.6146286725997925, "step": 2816 }, { "epoch": 2.9071207430340555, "grad_norm": 14.050659575481342, "learning_rate": 2.982735829653249e-08, "loss": 0.6893696784973145, "step": 2817 }, { "epoch": 2.9081527347781218, "grad_norm": 10.410803532472595, "learning_rate": 2.9176048651513578e-08, "loss": 0.5689204931259155, "step": 2818 }, { "epoch": 2.909184726522188, "grad_norm": 10.927169952282387, "learning_rate": 2.8531907907712876e-08, "loss": 0.39179882407188416, "step": 2819 }, { "epoch": 2.9102167182662537, "grad_norm": 20.5922212081612, "learning_rate": 2.7894936994106724e-08, "loss": 1.1332855224609375, "step": 2820 }, { "epoch": 2.91124871001032, "grad_norm": 7.895624536417559, "learning_rate": 2.726513682933196e-08, "loss": 0.25301265716552734, "step": 2821 }, { "epoch": 2.912280701754386, "grad_norm": 8.380759870706228, "learning_rate": 2.6642508321683692e-08, "loss": 0.2217177003622055, "step": 2822 }, { "epoch": 2.913312693498452, "grad_norm": 13.769480574074626, "learning_rate": 2.602705236911418e-08, "loss": 0.8979390859603882, "step": 2823 }, { "epoch": 2.914344685242518, "grad_norm": 9.085418594008585, "learning_rate": 2.5418769859231194e-08, "loss": 0.658086895942688, "step": 2824 }, { "epoch": 2.9153766769865843, "grad_norm": 12.25697807718521, "learning_rate": 2.4817661669297445e-08, "loss": 0.5765917897224426, "step": 2825 }, { "epoch": 2.91640866873065, "grad_norm": 10.694665070101687, "learning_rate": 2.4223728666228906e-08, "loss": 0.5431925058364868, "step": 2826 }, { "epoch": 2.9174406604747163, "grad_norm": 11.889135343753276, "learning_rate": 2.3636971706592627e-08, "loss": 0.4332832098007202, "step": 2827 }, { "epoch": 2.9184726522187825, "grad_norm": 7.798765327554262, "learning_rate": 2.3057391636606698e-08, "loss": 0.8352420330047607, "step": 2828 }, { "epoch": 2.9195046439628483, "grad_norm": 9.417958904982532, "learning_rate": 2.248498929214027e-08, "loss": 0.28343185782432556, "step": 2829 }, { "epoch": 2.9205366357069145, "grad_norm": 14.648485052578973, "learning_rate": 2.1919765498708556e-08, "loss": 0.31452351808547974, "step": 2830 }, { "epoch": 2.9215686274509802, "grad_norm": 12.479013074380937, "learning_rate": 2.1361721071475605e-08, "loss": 0.39548927545547485, "step": 2831 }, { "epoch": 2.9226006191950464, "grad_norm": 11.940862196644698, "learning_rate": 2.081085681524986e-08, "loss": 0.5270460247993469, "step": 2832 }, { "epoch": 2.9236326109391126, "grad_norm": 14.506115481993955, "learning_rate": 2.0267173524485816e-08, "loss": 0.483257532119751, "step": 2833 }, { "epoch": 2.9246646026831784, "grad_norm": 24.618465765797602, "learning_rate": 1.9730671983281824e-08, "loss": 0.9315032362937927, "step": 2834 }, { "epoch": 2.9256965944272446, "grad_norm": 7.785308907165337, "learning_rate": 1.920135296537784e-08, "loss": 0.2826288342475891, "step": 2835 }, { "epoch": 2.9267285861713104, "grad_norm": 8.905394415808534, "learning_rate": 1.8679217234154335e-08, "loss": 0.14207813143730164, "step": 2836 }, { "epoch": 2.9277605779153766, "grad_norm": 9.642626790270166, "learning_rate": 1.8164265542634507e-08, "loss": 0.2834736406803131, "step": 2837 }, { "epoch": 2.928792569659443, "grad_norm": 12.736848130226655, "learning_rate": 1.765649863347929e-08, "loss": 0.3384333848953247, "step": 2838 }, { "epoch": 2.9298245614035086, "grad_norm": 9.98619549627037, "learning_rate": 1.7155917238987906e-08, "loss": 0.24947945773601532, "step": 2839 }, { "epoch": 2.9308565531475748, "grad_norm": 15.43292926706652, "learning_rate": 1.6662522081097308e-08, "loss": 0.3706369400024414, "step": 2840 }, { "epoch": 2.931888544891641, "grad_norm": 15.193315285621756, "learning_rate": 1.61763138713783e-08, "loss": 1.1745437383651733, "step": 2841 }, { "epoch": 2.9329205366357067, "grad_norm": 8.539014235999547, "learning_rate": 1.5697293311039973e-08, "loss": 0.6649677753448486, "step": 2842 }, { "epoch": 2.933952528379773, "grad_norm": 12.099623997731364, "learning_rate": 1.522546109092249e-08, "loss": 0.5370413064956665, "step": 2843 }, { "epoch": 2.934984520123839, "grad_norm": 10.079698858586877, "learning_rate": 1.4760817891500966e-08, "loss": 0.6036677956581116, "step": 2844 }, { "epoch": 2.936016511867905, "grad_norm": 22.78996695895161, "learning_rate": 1.4303364382881601e-08, "loss": 0.7218687534332275, "step": 2845 }, { "epoch": 2.937048503611971, "grad_norm": 10.605000934865663, "learning_rate": 1.3853101224802212e-08, "loss": 0.3760529160499573, "step": 2846 }, { "epoch": 2.9380804953560373, "grad_norm": 7.990920195942308, "learning_rate": 1.3410029066630025e-08, "loss": 0.3890661597251892, "step": 2847 }, { "epoch": 2.939112487100103, "grad_norm": 14.00545881468774, "learning_rate": 1.2974148547362231e-08, "loss": 0.7472094893455505, "step": 2848 }, { "epoch": 2.9401444788441693, "grad_norm": 18.041176052953638, "learning_rate": 1.2545460295623757e-08, "loss": 1.3969285488128662, "step": 2849 }, { "epoch": 2.9411764705882355, "grad_norm": 12.776565154910713, "learning_rate": 1.212396492966672e-08, "loss": 0.3700495660305023, "step": 2850 }, { "epoch": 2.9422084623323013, "grad_norm": 10.925723452827187, "learning_rate": 1.1709663057370424e-08, "loss": 0.40052279829978943, "step": 2851 }, { "epoch": 2.9432404540763675, "grad_norm": 7.389953137459436, "learning_rate": 1.1302555276238581e-08, "loss": 0.2983669340610504, "step": 2852 }, { "epoch": 2.9442724458204337, "grad_norm": 7.6412748058052635, "learning_rate": 1.0902642173400424e-08, "loss": 0.46939218044281006, "step": 2853 }, { "epoch": 2.9453044375644994, "grad_norm": 13.33103952638023, "learning_rate": 1.05099243256096e-08, "loss": 0.7096831202507019, "step": 2854 }, { "epoch": 2.9463364293085657, "grad_norm": 9.170467654663462, "learning_rate": 1.0124402299241943e-08, "loss": 0.3482118248939514, "step": 2855 }, { "epoch": 2.9473684210526314, "grad_norm": 11.277517652250038, "learning_rate": 9.746076650294922e-09, "loss": 0.5088193416595459, "step": 2856 }, { "epoch": 2.9484004127966976, "grad_norm": 10.021439312052443, "learning_rate": 9.374947924388755e-09, "loss": 0.39733967185020447, "step": 2857 }, { "epoch": 2.949432404540764, "grad_norm": 10.531017191140222, "learning_rate": 9.011016656764182e-09, "loss": 0.5305665731430054, "step": 2858 }, { "epoch": 2.9504643962848296, "grad_norm": 10.033494308935959, "learning_rate": 8.654283372280248e-09, "loss": 0.6603313684463501, "step": 2859 }, { "epoch": 2.951496388028896, "grad_norm": 9.636604443118136, "learning_rate": 8.304748585417077e-09, "loss": 0.45227867364883423, "step": 2860 }, { "epoch": 2.9525283797729616, "grad_norm": 15.953758922812748, "learning_rate": 7.96241280027199e-09, "loss": 0.6437937021255493, "step": 2861 }, { "epoch": 2.9535603715170278, "grad_norm": 8.005244278076123, "learning_rate": 7.627276510560056e-09, "loss": 0.29151588678359985, "step": 2862 }, { "epoch": 2.954592363261094, "grad_norm": 13.702876635129847, "learning_rate": 7.299340199613536e-09, "loss": 0.6355423927307129, "step": 2863 }, { "epoch": 2.9556243550051597, "grad_norm": 15.606539779621775, "learning_rate": 6.978604340380779e-09, "loss": 0.689186692237854, "step": 2864 }, { "epoch": 2.956656346749226, "grad_norm": 9.995480499282703, "learning_rate": 6.665069395425661e-09, "loss": 0.46941396594047546, "step": 2865 }, { "epoch": 2.957688338493292, "grad_norm": 8.071745754656114, "learning_rate": 6.3587358169264755e-09, "loss": 0.25395047664642334, "step": 2866 }, { "epoch": 2.958720330237358, "grad_norm": 14.390161317483425, "learning_rate": 6.059604046677603e-09, "loss": 0.1884629726409912, "step": 2867 }, { "epoch": 2.959752321981424, "grad_norm": 8.335728992300055, "learning_rate": 5.767674516083954e-09, "loss": 0.7414337992668152, "step": 2868 }, { "epoch": 2.9607843137254903, "grad_norm": 17.72468534614113, "learning_rate": 5.48294764616597e-09, "loss": 0.45233145356178284, "step": 2869 }, { "epoch": 2.961816305469556, "grad_norm": 9.074680053880547, "learning_rate": 5.205423847555735e-09, "loss": 0.5274835824966431, "step": 2870 }, { "epoch": 2.9628482972136223, "grad_norm": 10.471200128103057, "learning_rate": 4.9351035204964205e-09, "loss": 0.3120895326137543, "step": 2871 }, { "epoch": 2.9638802889576885, "grad_norm": 8.392739587257111, "learning_rate": 4.671987054842842e-09, "loss": 0.3116510808467865, "step": 2872 }, { "epoch": 2.9649122807017543, "grad_norm": 11.476475028393562, "learning_rate": 4.416074830060346e-09, "loss": 0.36004531383514404, "step": 2873 }, { "epoch": 2.9659442724458205, "grad_norm": 11.562375996795918, "learning_rate": 4.167367215224816e-09, "loss": 0.4337872862815857, "step": 2874 }, { "epoch": 2.9669762641898867, "grad_norm": 11.747325700622214, "learning_rate": 3.925864569021554e-09, "loss": 0.35795819759368896, "step": 2875 }, { "epoch": 2.9680082559339525, "grad_norm": 7.907415652265775, "learning_rate": 3.6915672397436208e-09, "loss": 0.29183316230773926, "step": 2876 }, { "epoch": 2.9690402476780187, "grad_norm": 11.88023342897466, "learning_rate": 3.4644755652946115e-09, "loss": 0.5190762877464294, "step": 2877 }, { "epoch": 2.970072239422085, "grad_norm": 9.961046006998368, "learning_rate": 3.244589873185322e-09, "loss": 0.26542696356773376, "step": 2878 }, { "epoch": 2.9711042311661506, "grad_norm": 8.78934589213461, "learning_rate": 3.0319104805326404e-09, "loss": 0.34915220737457275, "step": 2879 }, { "epoch": 2.972136222910217, "grad_norm": 12.178593168751178, "learning_rate": 2.8264376940634332e-09, "loss": 0.2416977733373642, "step": 2880 }, { "epoch": 2.9731682146542826, "grad_norm": 11.769541867178553, "learning_rate": 2.6281718101089927e-09, "loss": 0.4350973963737488, "step": 2881 }, { "epoch": 2.974200206398349, "grad_norm": 9.211479849303478, "learning_rate": 2.437113114607259e-09, "loss": 0.29845768213272095, "step": 2882 }, { "epoch": 2.975232198142415, "grad_norm": 12.049136286543924, "learning_rate": 2.2532618831022646e-09, "loss": 0.44155120849609375, "step": 2883 }, { "epoch": 2.976264189886481, "grad_norm": 7.992230482448386, "learning_rate": 2.076618380744133e-09, "loss": 0.24698218703269958, "step": 2884 }, { "epoch": 2.977296181630547, "grad_norm": 12.091285409429458, "learning_rate": 1.9071828622868603e-09, "loss": 0.4726845324039459, "step": 2885 }, { "epoch": 2.9783281733746128, "grad_norm": 4.444246393658765, "learning_rate": 1.7449555720899792e-09, "loss": 0.20463469624519348, "step": 2886 }, { "epoch": 2.979360165118679, "grad_norm": 9.529793198317492, "learning_rate": 1.5899367441168934e-09, "loss": 0.3188978433609009, "step": 2887 }, { "epoch": 2.980392156862745, "grad_norm": 13.147973989711732, "learning_rate": 1.4421266019348789e-09, "loss": 0.6265153288841248, "step": 2888 }, { "epoch": 2.981424148606811, "grad_norm": 15.723436943080621, "learning_rate": 1.3015253587150832e-09, "loss": 0.4962090253829956, "step": 2889 }, { "epoch": 2.982456140350877, "grad_norm": 10.55766498053424, "learning_rate": 1.1681332172319704e-09, "loss": 1.112623929977417, "step": 2890 }, { "epoch": 2.9834881320949433, "grad_norm": 11.392080474977103, "learning_rate": 1.0419503698633205e-09, "loss": 0.2539787292480469, "step": 2891 }, { "epoch": 2.984520123839009, "grad_norm": 12.631956110800884, "learning_rate": 9.229769985902304e-10, "loss": 0.36053264141082764, "step": 2892 }, { "epoch": 2.9855521155830753, "grad_norm": 12.749296952365622, "learning_rate": 8.11213274994338e-10, "loss": 0.3173477351665497, "step": 2893 }, { "epoch": 2.9865841073271415, "grad_norm": 10.105348450703012, "learning_rate": 7.066593602611527e-10, "loss": 0.5603840351104736, "step": 2894 }, { "epoch": 2.9876160990712073, "grad_norm": 8.958449767473246, "learning_rate": 6.09315405177835e-10, "loss": 0.4048866331577301, "step": 2895 }, { "epoch": 2.9886480908152735, "grad_norm": 10.931973199978893, "learning_rate": 5.191815501343067e-10, "loss": 0.4945456385612488, "step": 2896 }, { "epoch": 2.9896800825593397, "grad_norm": 9.277216206985075, "learning_rate": 4.362579251204757e-10, "loss": 0.703812837600708, "step": 2897 }, { "epoch": 2.9907120743034055, "grad_norm": 8.155754435970792, "learning_rate": 3.605446497279008e-10, "loss": 0.34644225239753723, "step": 2898 }, { "epoch": 2.9917440660474717, "grad_norm": 8.169228239958668, "learning_rate": 2.920418331514574e-10, "loss": 0.27196234464645386, "step": 2899 }, { "epoch": 2.992776057791538, "grad_norm": 7.080576518911739, "learning_rate": 2.307495741843413e-10, "loss": 0.19420018792152405, "step": 2900 }, { "epoch": 2.9938080495356036, "grad_norm": 28.391027360704857, "learning_rate": 1.766679612219546e-10, "loss": 1.4822670221328735, "step": 2901 }, { "epoch": 2.99484004127967, "grad_norm": 16.511890158796366, "learning_rate": 1.2979707226135063e-10, "loss": 2.818498373031616, "step": 2902 }, { "epoch": 2.995872033023736, "grad_norm": 7.117609068184708, "learning_rate": 9.013697489956841e-11, "loss": 0.45413410663604736, "step": 2903 }, { "epoch": 2.996904024767802, "grad_norm": 12.77729655477497, "learning_rate": 5.768772633363284e-11, "loss": 0.9320136308670044, "step": 2904 }, { "epoch": 2.997936016511868, "grad_norm": 13.631618562372143, "learning_rate": 3.244937336166487e-11, "loss": 0.9153477549552917, "step": 2905 }, { "epoch": 2.998968008255934, "grad_norm": 5.890317723699965, "learning_rate": 1.4421952382881466e-11, "loss": 0.17859113216400146, "step": 2906 }, { "epoch": 3.0, "grad_norm": 5.446335325117752, "learning_rate": 3.6054893953751947e-12, "loss": 0.2771958112716675, "step": 2907 }, { "epoch": 3.0, "step": 2907, "total_flos": 7768633466880.0, "train_loss": 1.6535469640083522, "train_runtime": 3364.7928, "train_samples_per_second": 3.453, "train_steps_per_second": 0.864 } ], "logging_steps": 1, "max_steps": 2907, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7768633466880.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }