{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1428, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0021008403361344537, "grad_norm": 46.99030458326992, "learning_rate": 0.0, "loss": 4.722589492797852, "step": 1 }, { "epoch": 0.004201680672268907, "grad_norm": 36.88474864262765, "learning_rate": 6.993006993006993e-08, "loss": 3.9686050415039062, "step": 2 }, { "epoch": 0.0063025210084033615, "grad_norm": 33.07457670969746, "learning_rate": 1.3986013986013987e-07, "loss": 4.086915969848633, "step": 3 }, { "epoch": 0.008403361344537815, "grad_norm": 31.625791708920385, "learning_rate": 2.097902097902098e-07, "loss": 4.020754814147949, "step": 4 }, { "epoch": 0.01050420168067227, "grad_norm": 43.0872077594366, "learning_rate": 2.7972027972027973e-07, "loss": 4.11073112487793, "step": 5 }, { "epoch": 0.012605042016806723, "grad_norm": 41.05815920768766, "learning_rate": 3.496503496503497e-07, "loss": 4.068889141082764, "step": 6 }, { "epoch": 0.014705882352941176, "grad_norm": 38.60187500165403, "learning_rate": 4.195804195804196e-07, "loss": 3.9590301513671875, "step": 7 }, { "epoch": 0.01680672268907563, "grad_norm": 39.6727205000689, "learning_rate": 4.895104895104896e-07, "loss": 3.7929787635803223, "step": 8 }, { "epoch": 0.018907563025210083, "grad_norm": 35.21152216885091, "learning_rate": 5.594405594405595e-07, "loss": 4.345971584320068, "step": 9 }, { "epoch": 0.02100840336134454, "grad_norm": 32.94311862999745, "learning_rate": 6.293706293706295e-07, "loss": 4.204797744750977, "step": 10 }, { "epoch": 0.023109243697478993, "grad_norm": 35.708967266069514, "learning_rate": 6.993006993006994e-07, "loss": 4.2924957275390625, "step": 11 }, { "epoch": 0.025210084033613446, "grad_norm": 35.068164747297715, "learning_rate": 7.692307692307694e-07, "loss": 3.6519017219543457, "step": 12 }, { "epoch": 0.0273109243697479, "grad_norm": 36.012069511225576, "learning_rate": 8.391608391608393e-07, "loss": 3.5902950763702393, "step": 13 }, { "epoch": 0.029411764705882353, "grad_norm": 31.883494522724174, "learning_rate": 9.090909090909091e-07, "loss": 3.751192092895508, "step": 14 }, { "epoch": 0.031512605042016806, "grad_norm": 28.67530148457089, "learning_rate": 9.790209790209791e-07, "loss": 4.330526351928711, "step": 15 }, { "epoch": 0.03361344537815126, "grad_norm": 36.87930031460684, "learning_rate": 1.0489510489510491e-06, "loss": 3.7747349739074707, "step": 16 }, { "epoch": 0.03571428571428571, "grad_norm": 34.94283148044396, "learning_rate": 1.118881118881119e-06, "loss": 3.6174468994140625, "step": 17 }, { "epoch": 0.037815126050420166, "grad_norm": 26.489554272646977, "learning_rate": 1.188811188811189e-06, "loss": 3.4348971843719482, "step": 18 }, { "epoch": 0.03991596638655462, "grad_norm": 25.603886661513403, "learning_rate": 1.258741258741259e-06, "loss": 3.4862470626831055, "step": 19 }, { "epoch": 0.04201680672268908, "grad_norm": 22.404814809342252, "learning_rate": 1.3286713286713287e-06, "loss": 3.7471625804901123, "step": 20 }, { "epoch": 0.04411764705882353, "grad_norm": 21.15277638751192, "learning_rate": 1.3986013986013987e-06, "loss": 3.6562182903289795, "step": 21 }, { "epoch": 0.046218487394957986, "grad_norm": 20.270881263670095, "learning_rate": 1.4685314685314685e-06, "loss": 2.944753408432007, "step": 22 }, { "epoch": 0.04831932773109244, "grad_norm": 22.812966816264836, "learning_rate": 1.5384615384615387e-06, "loss": 4.277539253234863, "step": 23 }, { "epoch": 0.05042016806722689, "grad_norm": 21.380979723581284, "learning_rate": 1.6083916083916085e-06, "loss": 4.103379726409912, "step": 24 }, { "epoch": 0.052521008403361345, "grad_norm": 13.548338819677783, "learning_rate": 1.6783216783216785e-06, "loss": 3.516192674636841, "step": 25 }, { "epoch": 0.0546218487394958, "grad_norm": 16.574184790133323, "learning_rate": 1.7482517482517483e-06, "loss": 3.054426908493042, "step": 26 }, { "epoch": 0.05672268907563025, "grad_norm": 14.817846975349166, "learning_rate": 1.8181818181818183e-06, "loss": 3.598344564437866, "step": 27 }, { "epoch": 0.058823529411764705, "grad_norm": 13.230202987729585, "learning_rate": 1.888111888111888e-06, "loss": 2.1566905975341797, "step": 28 }, { "epoch": 0.06092436974789916, "grad_norm": 19.612310968262104, "learning_rate": 1.9580419580419583e-06, "loss": 2.7493889331817627, "step": 29 }, { "epoch": 0.06302521008403361, "grad_norm": 18.334666543367657, "learning_rate": 2.027972027972028e-06, "loss": 3.7484190464019775, "step": 30 }, { "epoch": 0.06512605042016807, "grad_norm": 15.812972082251932, "learning_rate": 2.0979020979020983e-06, "loss": 3.4763312339782715, "step": 31 }, { "epoch": 0.06722689075630252, "grad_norm": 18.340243585590446, "learning_rate": 2.167832167832168e-06, "loss": 4.1537184715271, "step": 32 }, { "epoch": 0.06932773109243698, "grad_norm": 9.470566142580898, "learning_rate": 2.237762237762238e-06, "loss": 3.949978828430176, "step": 33 }, { "epoch": 0.07142857142857142, "grad_norm": 15.047162012043515, "learning_rate": 2.307692307692308e-06, "loss": 3.095123291015625, "step": 34 }, { "epoch": 0.07352941176470588, "grad_norm": 15.619490386855553, "learning_rate": 2.377622377622378e-06, "loss": 3.6232047080993652, "step": 35 }, { "epoch": 0.07563025210084033, "grad_norm": 12.996399577415676, "learning_rate": 2.4475524475524477e-06, "loss": 3.801804304122925, "step": 36 }, { "epoch": 0.07773109243697479, "grad_norm": 7.924270548246447, "learning_rate": 2.517482517482518e-06, "loss": 2.909287452697754, "step": 37 }, { "epoch": 0.07983193277310924, "grad_norm": 10.838167134028488, "learning_rate": 2.5874125874125877e-06, "loss": 3.283078670501709, "step": 38 }, { "epoch": 0.0819327731092437, "grad_norm": 12.236334826312302, "learning_rate": 2.6573426573426574e-06, "loss": 2.9224965572357178, "step": 39 }, { "epoch": 0.08403361344537816, "grad_norm": 10.59808836361908, "learning_rate": 2.7272727272727272e-06, "loss": 3.591977119445801, "step": 40 }, { "epoch": 0.0861344537815126, "grad_norm": 9.295669805450128, "learning_rate": 2.7972027972027974e-06, "loss": 3.0213565826416016, "step": 41 }, { "epoch": 0.08823529411764706, "grad_norm": 12.563423620415891, "learning_rate": 2.8671328671328672e-06, "loss": 2.9183509349823, "step": 42 }, { "epoch": 0.09033613445378151, "grad_norm": 13.858660538396043, "learning_rate": 2.937062937062937e-06, "loss": 3.4748919010162354, "step": 43 }, { "epoch": 0.09243697478991597, "grad_norm": 11.043040598415395, "learning_rate": 3.006993006993007e-06, "loss": 3.53951096534729, "step": 44 }, { "epoch": 0.09453781512605042, "grad_norm": 12.201335477546305, "learning_rate": 3.0769230769230774e-06, "loss": 3.2075607776641846, "step": 45 }, { "epoch": 0.09663865546218488, "grad_norm": 20.281483402633803, "learning_rate": 3.1468531468531472e-06, "loss": 3.2893571853637695, "step": 46 }, { "epoch": 0.09873949579831932, "grad_norm": 10.008468666819498, "learning_rate": 3.216783216783217e-06, "loss": 3.47295880317688, "step": 47 }, { "epoch": 0.10084033613445378, "grad_norm": 11.407538553004894, "learning_rate": 3.286713286713287e-06, "loss": 3.5495269298553467, "step": 48 }, { "epoch": 0.10294117647058823, "grad_norm": 16.252417297798132, "learning_rate": 3.356643356643357e-06, "loss": 3.218782901763916, "step": 49 }, { "epoch": 0.10504201680672269, "grad_norm": 8.752146553121406, "learning_rate": 3.426573426573427e-06, "loss": 2.612854242324829, "step": 50 }, { "epoch": 0.10714285714285714, "grad_norm": 14.782692853689836, "learning_rate": 3.4965034965034966e-06, "loss": 3.0805444717407227, "step": 51 }, { "epoch": 0.1092436974789916, "grad_norm": 14.664178996815842, "learning_rate": 3.566433566433567e-06, "loss": 3.1539719104766846, "step": 52 }, { "epoch": 0.11134453781512606, "grad_norm": 13.158498079025986, "learning_rate": 3.6363636363636366e-06, "loss": 3.5745811462402344, "step": 53 }, { "epoch": 0.1134453781512605, "grad_norm": 9.661944205457672, "learning_rate": 3.7062937062937064e-06, "loss": 3.033264398574829, "step": 54 }, { "epoch": 0.11554621848739496, "grad_norm": 8.534767379388418, "learning_rate": 3.776223776223776e-06, "loss": 2.5727319717407227, "step": 55 }, { "epoch": 0.11764705882352941, "grad_norm": 10.446726865588245, "learning_rate": 3.846153846153847e-06, "loss": 3.4801394939422607, "step": 56 }, { "epoch": 0.11974789915966387, "grad_norm": 11.510497882977212, "learning_rate": 3.916083916083917e-06, "loss": 3.253239631652832, "step": 57 }, { "epoch": 0.12184873949579832, "grad_norm": 12.480969279334285, "learning_rate": 3.986013986013986e-06, "loss": 3.0049266815185547, "step": 58 }, { "epoch": 0.12394957983193278, "grad_norm": 10.926998541566615, "learning_rate": 4.055944055944056e-06, "loss": 3.13586688041687, "step": 59 }, { "epoch": 0.12605042016806722, "grad_norm": 15.080151132986066, "learning_rate": 4.125874125874127e-06, "loss": 3.5970468521118164, "step": 60 }, { "epoch": 0.12815126050420167, "grad_norm": 11.40302094802426, "learning_rate": 4.195804195804197e-06, "loss": 3.0423130989074707, "step": 61 }, { "epoch": 0.13025210084033614, "grad_norm": 27.877774734458356, "learning_rate": 4.265734265734266e-06, "loss": 3.270495891571045, "step": 62 }, { "epoch": 0.1323529411764706, "grad_norm": 14.517043785366944, "learning_rate": 4.335664335664336e-06, "loss": 3.3109726905822754, "step": 63 }, { "epoch": 0.13445378151260504, "grad_norm": 9.703645186786849, "learning_rate": 4.405594405594406e-06, "loss": 2.8192973136901855, "step": 64 }, { "epoch": 0.13655462184873948, "grad_norm": 7.165620671720677, "learning_rate": 4.475524475524476e-06, "loss": 2.6368956565856934, "step": 65 }, { "epoch": 0.13865546218487396, "grad_norm": 8.390508554521247, "learning_rate": 4.5454545454545455e-06, "loss": 3.2420871257781982, "step": 66 }, { "epoch": 0.1407563025210084, "grad_norm": 13.495244668273582, "learning_rate": 4.615384615384616e-06, "loss": 3.4662106037139893, "step": 67 }, { "epoch": 0.14285714285714285, "grad_norm": 8.727556576037161, "learning_rate": 4.685314685314686e-06, "loss": 2.52485728263855, "step": 68 }, { "epoch": 0.14495798319327732, "grad_norm": 7.972842185352863, "learning_rate": 4.755244755244756e-06, "loss": 2.94364595413208, "step": 69 }, { "epoch": 0.14705882352941177, "grad_norm": 9.543376366698592, "learning_rate": 4.8251748251748255e-06, "loss": 3.080875873565674, "step": 70 }, { "epoch": 0.14915966386554622, "grad_norm": 9.776294776088129, "learning_rate": 4.895104895104895e-06, "loss": 2.779900550842285, "step": 71 }, { "epoch": 0.15126050420168066, "grad_norm": 13.30903798143632, "learning_rate": 4.965034965034965e-06, "loss": 2.5541608333587646, "step": 72 }, { "epoch": 0.15336134453781514, "grad_norm": 15.821626595005261, "learning_rate": 5.034965034965036e-06, "loss": 3.3032145500183105, "step": 73 }, { "epoch": 0.15546218487394958, "grad_norm": 16.2233191932233, "learning_rate": 5.1048951048951055e-06, "loss": 3.302570104598999, "step": 74 }, { "epoch": 0.15756302521008403, "grad_norm": 12.108052548372182, "learning_rate": 5.174825174825175e-06, "loss": 3.084743022918701, "step": 75 }, { "epoch": 0.15966386554621848, "grad_norm": 12.123207907469205, "learning_rate": 5.244755244755245e-06, "loss": 2.839994430541992, "step": 76 }, { "epoch": 0.16176470588235295, "grad_norm": 27.162201978657112, "learning_rate": 5.314685314685315e-06, "loss": 2.8261585235595703, "step": 77 }, { "epoch": 0.1638655462184874, "grad_norm": 9.26220027446702, "learning_rate": 5.384615384615385e-06, "loss": 3.022369861602783, "step": 78 }, { "epoch": 0.16596638655462184, "grad_norm": 34.01052504369158, "learning_rate": 5.4545454545454545e-06, "loss": 3.11270809173584, "step": 79 }, { "epoch": 0.16806722689075632, "grad_norm": 10.318191696420305, "learning_rate": 5.524475524475524e-06, "loss": 2.8419973850250244, "step": 80 }, { "epoch": 0.17016806722689076, "grad_norm": 8.227880656419073, "learning_rate": 5.594405594405595e-06, "loss": 3.14296555519104, "step": 81 }, { "epoch": 0.1722689075630252, "grad_norm": 9.40271889928186, "learning_rate": 5.664335664335665e-06, "loss": 2.8033950328826904, "step": 82 }, { "epoch": 0.17436974789915966, "grad_norm": 9.95443701525972, "learning_rate": 5.7342657342657345e-06, "loss": 3.087614059448242, "step": 83 }, { "epoch": 0.17647058823529413, "grad_norm": 9.899264562788812, "learning_rate": 5.804195804195804e-06, "loss": 2.7504851818084717, "step": 84 }, { "epoch": 0.17857142857142858, "grad_norm": 14.065492890913543, "learning_rate": 5.874125874125874e-06, "loss": 2.701443672180176, "step": 85 }, { "epoch": 0.18067226890756302, "grad_norm": 12.602747808400954, "learning_rate": 5.944055944055944e-06, "loss": 2.8965351581573486, "step": 86 }, { "epoch": 0.18277310924369747, "grad_norm": 10.478287423381614, "learning_rate": 6.013986013986014e-06, "loss": 2.9607667922973633, "step": 87 }, { "epoch": 0.18487394957983194, "grad_norm": 25.168903954415445, "learning_rate": 6.083916083916085e-06, "loss": 3.2360849380493164, "step": 88 }, { "epoch": 0.1869747899159664, "grad_norm": 9.706474901305377, "learning_rate": 6.153846153846155e-06, "loss": 3.146829605102539, "step": 89 }, { "epoch": 0.18907563025210083, "grad_norm": 16.976357238619705, "learning_rate": 6.223776223776225e-06, "loss": 3.017669200897217, "step": 90 }, { "epoch": 0.19117647058823528, "grad_norm": 22.76924701111939, "learning_rate": 6.2937062937062944e-06, "loss": 3.4739527702331543, "step": 91 }, { "epoch": 0.19327731092436976, "grad_norm": 10.061400086768733, "learning_rate": 6.363636363636364e-06, "loss": 2.8482136726379395, "step": 92 }, { "epoch": 0.1953781512605042, "grad_norm": 12.711145684012218, "learning_rate": 6.433566433566434e-06, "loss": 2.7700202465057373, "step": 93 }, { "epoch": 0.19747899159663865, "grad_norm": 19.156479556141598, "learning_rate": 6.503496503496504e-06, "loss": 3.109806537628174, "step": 94 }, { "epoch": 0.19957983193277312, "grad_norm": 7.981944912040402, "learning_rate": 6.573426573426574e-06, "loss": 2.9637131690979004, "step": 95 }, { "epoch": 0.20168067226890757, "grad_norm": 12.84352715723152, "learning_rate": 6.643356643356644e-06, "loss": 2.846522808074951, "step": 96 }, { "epoch": 0.20378151260504201, "grad_norm": 17.72741270084134, "learning_rate": 6.713286713286714e-06, "loss": 3.3485140800476074, "step": 97 }, { "epoch": 0.20588235294117646, "grad_norm": 13.533003488049717, "learning_rate": 6.783216783216784e-06, "loss": 3.014303207397461, "step": 98 }, { "epoch": 0.20798319327731093, "grad_norm": 8.522856642426069, "learning_rate": 6.853146853146854e-06, "loss": 2.6768596172332764, "step": 99 }, { "epoch": 0.21008403361344538, "grad_norm": 28.089483697370316, "learning_rate": 6.923076923076923e-06, "loss": 2.9336276054382324, "step": 100 }, { "epoch": 0.21218487394957983, "grad_norm": 10.595985912398088, "learning_rate": 6.993006993006993e-06, "loss": 3.103717803955078, "step": 101 }, { "epoch": 0.21428571428571427, "grad_norm": 9.980132596619391, "learning_rate": 7.062937062937063e-06, "loss": 2.7759556770324707, "step": 102 }, { "epoch": 0.21638655462184875, "grad_norm": 9.86026405652693, "learning_rate": 7.132867132867134e-06, "loss": 2.586292266845703, "step": 103 }, { "epoch": 0.2184873949579832, "grad_norm": 15.473317115776915, "learning_rate": 7.202797202797203e-06, "loss": 3.109880208969116, "step": 104 }, { "epoch": 0.22058823529411764, "grad_norm": 12.647734541011893, "learning_rate": 7.272727272727273e-06, "loss": 2.7075915336608887, "step": 105 }, { "epoch": 0.22268907563025211, "grad_norm": 13.506863668083897, "learning_rate": 7.342657342657343e-06, "loss": 3.034566879272461, "step": 106 }, { "epoch": 0.22478991596638656, "grad_norm": 10.413965863492892, "learning_rate": 7.412587412587413e-06, "loss": 2.3479254245758057, "step": 107 }, { "epoch": 0.226890756302521, "grad_norm": 11.563038534176888, "learning_rate": 7.4825174825174825e-06, "loss": 2.9856462478637695, "step": 108 }, { "epoch": 0.22899159663865545, "grad_norm": 12.710111708974129, "learning_rate": 7.552447552447552e-06, "loss": 3.462696075439453, "step": 109 }, { "epoch": 0.23109243697478993, "grad_norm": 11.076816381042432, "learning_rate": 7.622377622377622e-06, "loss": 3.270888566970825, "step": 110 }, { "epoch": 0.23319327731092437, "grad_norm": 11.212503376143607, "learning_rate": 7.692307692307694e-06, "loss": 3.048227310180664, "step": 111 }, { "epoch": 0.23529411764705882, "grad_norm": 12.400573941878047, "learning_rate": 7.762237762237763e-06, "loss": 3.2194204330444336, "step": 112 }, { "epoch": 0.23739495798319327, "grad_norm": 15.219804312233611, "learning_rate": 7.832167832167833e-06, "loss": 3.6809778213500977, "step": 113 }, { "epoch": 0.23949579831932774, "grad_norm": 14.217254787332546, "learning_rate": 7.902097902097902e-06, "loss": 2.912044048309326, "step": 114 }, { "epoch": 0.2415966386554622, "grad_norm": 16.623326169718574, "learning_rate": 7.972027972027973e-06, "loss": 3.1021275520324707, "step": 115 }, { "epoch": 0.24369747899159663, "grad_norm": 9.18687643151976, "learning_rate": 8.041958041958042e-06, "loss": 3.089829683303833, "step": 116 }, { "epoch": 0.24579831932773108, "grad_norm": 16.772079088582387, "learning_rate": 8.111888111888112e-06, "loss": 3.4016504287719727, "step": 117 }, { "epoch": 0.24789915966386555, "grad_norm": 68.62004336442155, "learning_rate": 8.181818181818183e-06, "loss": 3.502598285675049, "step": 118 }, { "epoch": 0.25, "grad_norm": 9.091493777708147, "learning_rate": 8.251748251748254e-06, "loss": 3.0750184059143066, "step": 119 }, { "epoch": 0.25210084033613445, "grad_norm": 13.607326745790957, "learning_rate": 8.321678321678323e-06, "loss": 2.8168656826019287, "step": 120 }, { "epoch": 0.2542016806722689, "grad_norm": 8.683002515816812, "learning_rate": 8.391608391608393e-06, "loss": 2.3565826416015625, "step": 121 }, { "epoch": 0.25630252100840334, "grad_norm": 23.36617403509855, "learning_rate": 8.461538461538462e-06, "loss": 2.972810745239258, "step": 122 }, { "epoch": 0.25840336134453784, "grad_norm": 8.174637732136828, "learning_rate": 8.531468531468533e-06, "loss": 2.9700140953063965, "step": 123 }, { "epoch": 0.2605042016806723, "grad_norm": 10.47900789596826, "learning_rate": 8.601398601398602e-06, "loss": 3.0162484645843506, "step": 124 }, { "epoch": 0.26260504201680673, "grad_norm": 16.076782876444586, "learning_rate": 8.671328671328672e-06, "loss": 3.109422445297241, "step": 125 }, { "epoch": 0.2647058823529412, "grad_norm": 33.666522199585756, "learning_rate": 8.741258741258743e-06, "loss": 2.6802124977111816, "step": 126 }, { "epoch": 0.2668067226890756, "grad_norm": 12.537622047835336, "learning_rate": 8.811188811188812e-06, "loss": 2.6609840393066406, "step": 127 }, { "epoch": 0.2689075630252101, "grad_norm": 11.767487098574284, "learning_rate": 8.881118881118883e-06, "loss": 3.0896430015563965, "step": 128 }, { "epoch": 0.2710084033613445, "grad_norm": 7.737680571917604, "learning_rate": 8.951048951048951e-06, "loss": 3.2370247840881348, "step": 129 }, { "epoch": 0.27310924369747897, "grad_norm": 13.8395022595692, "learning_rate": 9.020979020979022e-06, "loss": 2.8461947441101074, "step": 130 }, { "epoch": 0.27521008403361347, "grad_norm": 27.87627626250655, "learning_rate": 9.090909090909091e-06, "loss": 3.480252742767334, "step": 131 }, { "epoch": 0.2773109243697479, "grad_norm": 8.067136701179228, "learning_rate": 9.160839160839162e-06, "loss": 2.8424923419952393, "step": 132 }, { "epoch": 0.27941176470588236, "grad_norm": 12.474203656062087, "learning_rate": 9.230769230769232e-06, "loss": 3.4489340782165527, "step": 133 }, { "epoch": 0.2815126050420168, "grad_norm": 9.56092760411321, "learning_rate": 9.300699300699301e-06, "loss": 2.48683500289917, "step": 134 }, { "epoch": 0.28361344537815125, "grad_norm": 10.45857911102664, "learning_rate": 9.370629370629372e-06, "loss": 2.975668430328369, "step": 135 }, { "epoch": 0.2857142857142857, "grad_norm": 9.79706691198192, "learning_rate": 9.44055944055944e-06, "loss": 3.163745403289795, "step": 136 }, { "epoch": 0.28781512605042014, "grad_norm": 16.663615728677826, "learning_rate": 9.510489510489511e-06, "loss": 3.3047399520874023, "step": 137 }, { "epoch": 0.28991596638655465, "grad_norm": 10.093105336690149, "learning_rate": 9.58041958041958e-06, "loss": 2.901014804840088, "step": 138 }, { "epoch": 0.2920168067226891, "grad_norm": 10.712099293339499, "learning_rate": 9.650349650349651e-06, "loss": 2.4749934673309326, "step": 139 }, { "epoch": 0.29411764705882354, "grad_norm": 12.306067699743261, "learning_rate": 9.72027972027972e-06, "loss": 2.735682964324951, "step": 140 }, { "epoch": 0.296218487394958, "grad_norm": 8.899689488937057, "learning_rate": 9.79020979020979e-06, "loss": 1.6851799488067627, "step": 141 }, { "epoch": 0.29831932773109243, "grad_norm": 15.412895468970188, "learning_rate": 9.860139860139861e-06, "loss": 2.4892358779907227, "step": 142 }, { "epoch": 0.3004201680672269, "grad_norm": 13.718632928552148, "learning_rate": 9.93006993006993e-06, "loss": 3.152186870574951, "step": 143 }, { "epoch": 0.3025210084033613, "grad_norm": 9.63303607414013, "learning_rate": 1e-05, "loss": 2.4623451232910156, "step": 144 }, { "epoch": 0.30462184873949577, "grad_norm": 9.020782417307544, "learning_rate": 9.999985057155316e-06, "loss": 2.3573660850524902, "step": 145 }, { "epoch": 0.3067226890756303, "grad_norm": 15.431206065267094, "learning_rate": 9.999940228710581e-06, "loss": 3.248166561126709, "step": 146 }, { "epoch": 0.3088235294117647, "grad_norm": 9.624481227031932, "learning_rate": 9.99986551493374e-06, "loss": 3.073438882827759, "step": 147 }, { "epoch": 0.31092436974789917, "grad_norm": 12.24535420873494, "learning_rate": 9.999760916271368e-06, "loss": 3.175532579421997, "step": 148 }, { "epoch": 0.3130252100840336, "grad_norm": 8.43469444061833, "learning_rate": 9.999626433348664e-06, "loss": 2.2849655151367188, "step": 149 }, { "epoch": 0.31512605042016806, "grad_norm": 13.307775899632185, "learning_rate": 9.999462066969451e-06, "loss": 2.7922751903533936, "step": 150 }, { "epoch": 0.3172268907563025, "grad_norm": 11.454291564861384, "learning_rate": 9.999267818116173e-06, "loss": 3.03188419342041, "step": 151 }, { "epoch": 0.31932773109243695, "grad_norm": 16.712527557096042, "learning_rate": 9.999043687949878e-06, "loss": 3.3826239109039307, "step": 152 }, { "epoch": 0.32142857142857145, "grad_norm": 22.000641429064785, "learning_rate": 9.998789677810226e-06, "loss": 3.103822708129883, "step": 153 }, { "epoch": 0.3235294117647059, "grad_norm": 11.206024089957094, "learning_rate": 9.998505789215469e-06, "loss": 2.633566379547119, "step": 154 }, { "epoch": 0.32563025210084034, "grad_norm": 17.0566593574694, "learning_rate": 9.998192023862448e-06, "loss": 2.937821388244629, "step": 155 }, { "epoch": 0.3277310924369748, "grad_norm": 10.638495096316019, "learning_rate": 9.997848383626583e-06, "loss": 3.0057592391967773, "step": 156 }, { "epoch": 0.32983193277310924, "grad_norm": 13.891998906384215, "learning_rate": 9.997474870561858e-06, "loss": 3.4198083877563477, "step": 157 }, { "epoch": 0.3319327731092437, "grad_norm": 7.77313705300237, "learning_rate": 9.997071486900813e-06, "loss": 2.748509407043457, "step": 158 }, { "epoch": 0.33403361344537813, "grad_norm": 11.432910137348301, "learning_rate": 9.996638235054527e-06, "loss": 3.3422679901123047, "step": 159 }, { "epoch": 0.33613445378151263, "grad_norm": 9.407520098068266, "learning_rate": 9.996175117612608e-06, "loss": 3.2214763164520264, "step": 160 }, { "epoch": 0.3382352941176471, "grad_norm": 9.207535688673886, "learning_rate": 9.99568213734317e-06, "loss": 2.5538628101348877, "step": 161 }, { "epoch": 0.3403361344537815, "grad_norm": 10.755277234400435, "learning_rate": 9.995159297192824e-06, "loss": 2.781787872314453, "step": 162 }, { "epoch": 0.34243697478991597, "grad_norm": 13.097444619561006, "learning_rate": 9.99460660028666e-06, "loss": 3.3784282207489014, "step": 163 }, { "epoch": 0.3445378151260504, "grad_norm": 9.022751200279867, "learning_rate": 9.994024049928222e-06, "loss": 3.2824249267578125, "step": 164 }, { "epoch": 0.34663865546218486, "grad_norm": 7.521996605994801, "learning_rate": 9.993411649599494e-06, "loss": 2.589594841003418, "step": 165 }, { "epoch": 0.3487394957983193, "grad_norm": 19.97411284039417, "learning_rate": 9.992769402960878e-06, "loss": 3.7193164825439453, "step": 166 }, { "epoch": 0.35084033613445376, "grad_norm": 8.085906363590569, "learning_rate": 9.99209731385117e-06, "loss": 2.823063611984253, "step": 167 }, { "epoch": 0.35294117647058826, "grad_norm": 11.903719879092119, "learning_rate": 9.99139538628754e-06, "loss": 3.1389951705932617, "step": 168 }, { "epoch": 0.3550420168067227, "grad_norm": 21.923086009804106, "learning_rate": 9.990663624465504e-06, "loss": 2.9536495208740234, "step": 169 }, { "epoch": 0.35714285714285715, "grad_norm": 13.50586631618126, "learning_rate": 9.989902032758904e-06, "loss": 2.6355466842651367, "step": 170 }, { "epoch": 0.3592436974789916, "grad_norm": 5.578596081707914, "learning_rate": 9.989110615719882e-06, "loss": 1.1800763607025146, "step": 171 }, { "epoch": 0.36134453781512604, "grad_norm": 16.042050675579503, "learning_rate": 9.988289378078842e-06, "loss": 2.679232358932495, "step": 172 }, { "epoch": 0.3634453781512605, "grad_norm": 13.382798743317503, "learning_rate": 9.987438324744437e-06, "loss": 2.3583908081054688, "step": 173 }, { "epoch": 0.36554621848739494, "grad_norm": 9.108315025108485, "learning_rate": 9.986557460803527e-06, "loss": 2.748077392578125, "step": 174 }, { "epoch": 0.36764705882352944, "grad_norm": 9.67015040715346, "learning_rate": 9.985646791521165e-06, "loss": 3.2660067081451416, "step": 175 }, { "epoch": 0.3697478991596639, "grad_norm": 11.678263700428246, "learning_rate": 9.984706322340539e-06, "loss": 2.9270148277282715, "step": 176 }, { "epoch": 0.37184873949579833, "grad_norm": 9.845183071879623, "learning_rate": 9.983736058882965e-06, "loss": 2.455327033996582, "step": 177 }, { "epoch": 0.3739495798319328, "grad_norm": 9.630887189931224, "learning_rate": 9.982736006947842e-06, "loss": 3.171403169631958, "step": 178 }, { "epoch": 0.3760504201680672, "grad_norm": 7.217019473795253, "learning_rate": 9.98170617251262e-06, "loss": 2.6023473739624023, "step": 179 }, { "epoch": 0.37815126050420167, "grad_norm": 7.438957257707156, "learning_rate": 9.98064656173276e-06, "loss": 2.7492432594299316, "step": 180 }, { "epoch": 0.3802521008403361, "grad_norm": 12.314225953456766, "learning_rate": 9.979557180941702e-06, "loss": 3.520758628845215, "step": 181 }, { "epoch": 0.38235294117647056, "grad_norm": 7.754983445761027, "learning_rate": 9.978438036650822e-06, "loss": 2.7245442867279053, "step": 182 }, { "epoch": 0.38445378151260506, "grad_norm": 15.124443991385633, "learning_rate": 9.977289135549404e-06, "loss": 2.790768623352051, "step": 183 }, { "epoch": 0.3865546218487395, "grad_norm": 15.169071975047261, "learning_rate": 9.976110484504587e-06, "loss": 2.5588126182556152, "step": 184 }, { "epoch": 0.38865546218487396, "grad_norm": 18.50299410182784, "learning_rate": 9.974902090561331e-06, "loss": 3.0367865562438965, "step": 185 }, { "epoch": 0.3907563025210084, "grad_norm": 12.853534690634186, "learning_rate": 9.973663960942373e-06, "loss": 3.1013669967651367, "step": 186 }, { "epoch": 0.39285714285714285, "grad_norm": 11.962180171730763, "learning_rate": 9.972396103048184e-06, "loss": 2.678436279296875, "step": 187 }, { "epoch": 0.3949579831932773, "grad_norm": 14.345031935763927, "learning_rate": 9.971098524456925e-06, "loss": 2.866910696029663, "step": 188 }, { "epoch": 0.39705882352941174, "grad_norm": 24.927874908872194, "learning_rate": 9.969771232924404e-06, "loss": 2.6690807342529297, "step": 189 }, { "epoch": 0.39915966386554624, "grad_norm": 13.232716463146705, "learning_rate": 9.968414236384022e-06, "loss": 2.615846633911133, "step": 190 }, { "epoch": 0.4012605042016807, "grad_norm": 19.24597028348177, "learning_rate": 9.967027542946739e-06, "loss": 3.197604179382324, "step": 191 }, { "epoch": 0.40336134453781514, "grad_norm": 19.57923793430777, "learning_rate": 9.965611160901008e-06, "loss": 1.584808349609375, "step": 192 }, { "epoch": 0.4054621848739496, "grad_norm": 9.313854254132917, "learning_rate": 9.964165098712745e-06, "loss": 2.7913365364074707, "step": 193 }, { "epoch": 0.40756302521008403, "grad_norm": 15.764914604292455, "learning_rate": 9.962689365025259e-06, "loss": 3.42575740814209, "step": 194 }, { "epoch": 0.4096638655462185, "grad_norm": 9.662424511151881, "learning_rate": 9.961183968659217e-06, "loss": 2.6931188106536865, "step": 195 }, { "epoch": 0.4117647058823529, "grad_norm": 13.117904635638109, "learning_rate": 9.959648918612576e-06, "loss": 2.4463605880737305, "step": 196 }, { "epoch": 0.41386554621848737, "grad_norm": 8.434614198562612, "learning_rate": 9.958084224060547e-06, "loss": 2.647773265838623, "step": 197 }, { "epoch": 0.41596638655462187, "grad_norm": 26.520590112059157, "learning_rate": 9.956489894355521e-06, "loss": 2.660770893096924, "step": 198 }, { "epoch": 0.4180672268907563, "grad_norm": 28.510323184410662, "learning_rate": 9.954865939027028e-06, "loss": 3.627254009246826, "step": 199 }, { "epoch": 0.42016806722689076, "grad_norm": 7.679364921262506, "learning_rate": 9.953212367781675e-06, "loss": 2.683685779571533, "step": 200 }, { "epoch": 0.4222689075630252, "grad_norm": 13.123862369544378, "learning_rate": 9.95152919050308e-06, "loss": 2.7249388694763184, "step": 201 }, { "epoch": 0.42436974789915966, "grad_norm": 6.985824973864478, "learning_rate": 9.949816417251831e-06, "loss": 2.933401107788086, "step": 202 }, { "epoch": 0.4264705882352941, "grad_norm": 13.569070375050062, "learning_rate": 9.948074058265409e-06, "loss": 3.5457630157470703, "step": 203 }, { "epoch": 0.42857142857142855, "grad_norm": 7.335673995298351, "learning_rate": 9.94630212395813e-06, "loss": 2.483736038208008, "step": 204 }, { "epoch": 0.43067226890756305, "grad_norm": 73.9554577496319, "learning_rate": 9.944500624921094e-06, "loss": 2.470374584197998, "step": 205 }, { "epoch": 0.4327731092436975, "grad_norm": 11.27254717083641, "learning_rate": 9.942669571922108e-06, "loss": 3.2255494594573975, "step": 206 }, { "epoch": 0.43487394957983194, "grad_norm": 11.257221010364708, "learning_rate": 9.940808975905627e-06, "loss": 3.4820542335510254, "step": 207 }, { "epoch": 0.4369747899159664, "grad_norm": 16.32933603207297, "learning_rate": 9.93891884799269e-06, "loss": 3.218539237976074, "step": 208 }, { "epoch": 0.43907563025210083, "grad_norm": 27.30232213883322, "learning_rate": 9.936999199480854e-06, "loss": 2.8428990840911865, "step": 209 }, { "epoch": 0.4411764705882353, "grad_norm": 8.340720464987514, "learning_rate": 9.935050041844121e-06, "loss": 3.661019802093506, "step": 210 }, { "epoch": 0.4432773109243697, "grad_norm": 18.410105558121085, "learning_rate": 9.933071386732874e-06, "loss": 3.330902338027954, "step": 211 }, { "epoch": 0.44537815126050423, "grad_norm": 10.649860943280096, "learning_rate": 9.931063245973812e-06, "loss": 2.7754883766174316, "step": 212 }, { "epoch": 0.4474789915966387, "grad_norm": 13.898816541841864, "learning_rate": 9.929025631569864e-06, "loss": 2.3284661769866943, "step": 213 }, { "epoch": 0.4495798319327731, "grad_norm": 11.170546252681195, "learning_rate": 9.926958555700134e-06, "loss": 2.599228858947754, "step": 214 }, { "epoch": 0.45168067226890757, "grad_norm": 11.76779841923458, "learning_rate": 9.924862030719821e-06, "loss": 3.174004077911377, "step": 215 }, { "epoch": 0.453781512605042, "grad_norm": 12.943887425672324, "learning_rate": 9.922736069160141e-06, "loss": 2.7390694618225098, "step": 216 }, { "epoch": 0.45588235294117646, "grad_norm": 11.55413590289726, "learning_rate": 9.920580683728263e-06, "loss": 2.7388081550598145, "step": 217 }, { "epoch": 0.4579831932773109, "grad_norm": 22.67934856569803, "learning_rate": 9.918395887307219e-06, "loss": 2.4359140396118164, "step": 218 }, { "epoch": 0.46008403361344535, "grad_norm": 16.89880489289811, "learning_rate": 9.916181692955841e-06, "loss": 2.9688220024108887, "step": 219 }, { "epoch": 0.46218487394957986, "grad_norm": 19.071787734842648, "learning_rate": 9.913938113908675e-06, "loss": 3.1534006595611572, "step": 220 }, { "epoch": 0.4642857142857143, "grad_norm": 15.85242809267351, "learning_rate": 9.9116651635759e-06, "loss": 2.618938684463501, "step": 221 }, { "epoch": 0.46638655462184875, "grad_norm": 16.618677645763935, "learning_rate": 9.909362855543253e-06, "loss": 2.844968318939209, "step": 222 }, { "epoch": 0.4684873949579832, "grad_norm": 10.671971882677827, "learning_rate": 9.907031203571948e-06, "loss": 2.4792628288269043, "step": 223 }, { "epoch": 0.47058823529411764, "grad_norm": 19.241816484552377, "learning_rate": 9.90467022159859e-06, "loss": 2.894502639770508, "step": 224 }, { "epoch": 0.4726890756302521, "grad_norm": 23.569212607106817, "learning_rate": 9.902279923735093e-06, "loss": 2.792015552520752, "step": 225 }, { "epoch": 0.47478991596638653, "grad_norm": 9.680153288005078, "learning_rate": 9.899860324268599e-06, "loss": 2.9171247482299805, "step": 226 }, { "epoch": 0.47689075630252103, "grad_norm": 12.955899131578942, "learning_rate": 9.897411437661386e-06, "loss": 2.560214042663574, "step": 227 }, { "epoch": 0.4789915966386555, "grad_norm": 8.404115741492017, "learning_rate": 9.894933278550785e-06, "loss": 3.2796883583068848, "step": 228 }, { "epoch": 0.4810924369747899, "grad_norm": 8.318847487560761, "learning_rate": 9.8924258617491e-06, "loss": 3.0324971675872803, "step": 229 }, { "epoch": 0.4831932773109244, "grad_norm": 18.51611171900766, "learning_rate": 9.8898892022435e-06, "loss": 3.3899683952331543, "step": 230 }, { "epoch": 0.4852941176470588, "grad_norm": 11.1091069250737, "learning_rate": 9.887323315195956e-06, "loss": 2.742903709411621, "step": 231 }, { "epoch": 0.48739495798319327, "grad_norm": 7.80795476246885, "learning_rate": 9.884728215943122e-06, "loss": 3.230966806411743, "step": 232 }, { "epoch": 0.4894957983193277, "grad_norm": 13.293388527053166, "learning_rate": 9.882103919996268e-06, "loss": 2.8818302154541016, "step": 233 }, { "epoch": 0.49159663865546216, "grad_norm": 6.043647907341577, "learning_rate": 9.879450443041172e-06, "loss": 2.358765125274658, "step": 234 }, { "epoch": 0.49369747899159666, "grad_norm": 8.169920329828493, "learning_rate": 9.876767800938032e-06, "loss": 3.0420098304748535, "step": 235 }, { "epoch": 0.4957983193277311, "grad_norm": 18.681067639331463, "learning_rate": 9.874056009721367e-06, "loss": 2.9595160484313965, "step": 236 }, { "epoch": 0.49789915966386555, "grad_norm": 7.792831708992119, "learning_rate": 9.87131508559993e-06, "loss": 2.9571242332458496, "step": 237 }, { "epoch": 0.5, "grad_norm": 13.533405695746444, "learning_rate": 9.868545044956603e-06, "loss": 2.798694610595703, "step": 238 }, { "epoch": 0.5021008403361344, "grad_norm": 7.855798585235136, "learning_rate": 9.865745904348296e-06, "loss": 2.9430432319641113, "step": 239 }, { "epoch": 0.5042016806722689, "grad_norm": 12.596029584158895, "learning_rate": 9.862917680505863e-06, "loss": 3.065462112426758, "step": 240 }, { "epoch": 0.5063025210084033, "grad_norm": 11.793626015707394, "learning_rate": 9.860060390333988e-06, "loss": 3.8562116622924805, "step": 241 }, { "epoch": 0.5084033613445378, "grad_norm": 8.660405200484282, "learning_rate": 9.857174050911085e-06, "loss": 2.645123243331909, "step": 242 }, { "epoch": 0.5105042016806722, "grad_norm": 11.950071539791612, "learning_rate": 9.854258679489203e-06, "loss": 2.500267744064331, "step": 243 }, { "epoch": 0.5126050420168067, "grad_norm": 14.029861713702717, "learning_rate": 9.851314293493923e-06, "loss": 2.553537368774414, "step": 244 }, { "epoch": 0.5147058823529411, "grad_norm": 21.40352382596275, "learning_rate": 9.848340910524243e-06, "loss": 2.694528102874756, "step": 245 }, { "epoch": 0.5168067226890757, "grad_norm": 11.756867034830558, "learning_rate": 9.845338548352482e-06, "loss": 3.2089271545410156, "step": 246 }, { "epoch": 0.5189075630252101, "grad_norm": 13.513723971793041, "learning_rate": 9.842307224924174e-06, "loss": 2.443826198577881, "step": 247 }, { "epoch": 0.5210084033613446, "grad_norm": 8.839705225157738, "learning_rate": 9.839246958357957e-06, "loss": 2.9329233169555664, "step": 248 }, { "epoch": 0.523109243697479, "grad_norm": 14.107087922274081, "learning_rate": 9.836157766945467e-06, "loss": 2.5171399116516113, "step": 249 }, { "epoch": 0.5252100840336135, "grad_norm": 8.285926532283062, "learning_rate": 9.833039669151225e-06, "loss": 3.0069408416748047, "step": 250 }, { "epoch": 0.5273109243697479, "grad_norm": 9.58371718621674, "learning_rate": 9.829892683612535e-06, "loss": 2.5816359519958496, "step": 251 }, { "epoch": 0.5294117647058824, "grad_norm": 20.00310864922347, "learning_rate": 9.826716829139358e-06, "loss": 2.3982670307159424, "step": 252 }, { "epoch": 0.5315126050420168, "grad_norm": 10.648220658525108, "learning_rate": 9.82351212471422e-06, "loss": 2.975574016571045, "step": 253 }, { "epoch": 0.5336134453781513, "grad_norm": 17.551242772865887, "learning_rate": 9.820278589492076e-06, "loss": 2.4827775955200195, "step": 254 }, { "epoch": 0.5357142857142857, "grad_norm": 17.09184171751482, "learning_rate": 9.817016242800215e-06, "loss": 2.690033197402954, "step": 255 }, { "epoch": 0.5378151260504201, "grad_norm": 14.722560106056354, "learning_rate": 9.813725104138133e-06, "loss": 3.346949338912964, "step": 256 }, { "epoch": 0.5399159663865546, "grad_norm": 17.505076110573757, "learning_rate": 9.810405193177418e-06, "loss": 2.6791281700134277, "step": 257 }, { "epoch": 0.542016806722689, "grad_norm": 17.808558357662132, "learning_rate": 9.807056529761637e-06, "loss": 2.853158950805664, "step": 258 }, { "epoch": 0.5441176470588235, "grad_norm": 20.31515982195739, "learning_rate": 9.80367913390621e-06, "loss": 3.1636295318603516, "step": 259 }, { "epoch": 0.5462184873949579, "grad_norm": 12.64467693447632, "learning_rate": 9.800273025798302e-06, "loss": 2.5055313110351562, "step": 260 }, { "epoch": 0.5483193277310925, "grad_norm": 9.570949964132296, "learning_rate": 9.796838225796688e-06, "loss": 2.9986414909362793, "step": 261 }, { "epoch": 0.5504201680672269, "grad_norm": 14.832124263006255, "learning_rate": 9.793374754431642e-06, "loss": 2.419975757598877, "step": 262 }, { "epoch": 0.5525210084033614, "grad_norm": 22.395098332172758, "learning_rate": 9.789882632404809e-06, "loss": 3.0301923751831055, "step": 263 }, { "epoch": 0.5546218487394958, "grad_norm": 8.680917615796206, "learning_rate": 9.786361880589084e-06, "loss": 2.846034526824951, "step": 264 }, { "epoch": 0.5567226890756303, "grad_norm": 16.17159732015871, "learning_rate": 9.782812520028487e-06, "loss": 3.250943183898926, "step": 265 }, { "epoch": 0.5588235294117647, "grad_norm": 12.800181347711561, "learning_rate": 9.779234571938034e-06, "loss": 2.5069515705108643, "step": 266 }, { "epoch": 0.5609243697478992, "grad_norm": 20.58760178113823, "learning_rate": 9.775628057703616e-06, "loss": 2.2883377075195312, "step": 267 }, { "epoch": 0.5630252100840336, "grad_norm": 19.3122933281468, "learning_rate": 9.771992998881865e-06, "loss": 1.8844149112701416, "step": 268 }, { "epoch": 0.5651260504201681, "grad_norm": 11.766785955468544, "learning_rate": 9.768329417200029e-06, "loss": 2.608553409576416, "step": 269 }, { "epoch": 0.5672268907563025, "grad_norm": 9.015634942296078, "learning_rate": 9.76463733455584e-06, "loss": 2.8849685192108154, "step": 270 }, { "epoch": 0.569327731092437, "grad_norm": 10.71605416834433, "learning_rate": 9.760916773017386e-06, "loss": 2.83829402923584, "step": 271 }, { "epoch": 0.5714285714285714, "grad_norm": 8.102503833940233, "learning_rate": 9.757167754822974e-06, "loss": 2.6053004264831543, "step": 272 }, { "epoch": 0.5735294117647058, "grad_norm": 18.62135736056985, "learning_rate": 9.753390302381006e-06, "loss": 2.8338804244995117, "step": 273 }, { "epoch": 0.5756302521008403, "grad_norm": 34.82348840659483, "learning_rate": 9.749584438269833e-06, "loss": 2.979978084564209, "step": 274 }, { "epoch": 0.5777310924369747, "grad_norm": 6.726547081859168, "learning_rate": 9.74575018523763e-06, "loss": 1.8241777420043945, "step": 275 }, { "epoch": 0.5798319327731093, "grad_norm": 12.206262847267514, "learning_rate": 9.741887566202259e-06, "loss": 3.2140274047851562, "step": 276 }, { "epoch": 0.5819327731092437, "grad_norm": 43.432328207654045, "learning_rate": 9.737996604251124e-06, "loss": 3.074397325515747, "step": 277 }, { "epoch": 0.5840336134453782, "grad_norm": 19.844157721727896, "learning_rate": 9.73407732264104e-06, "loss": 2.527010679244995, "step": 278 }, { "epoch": 0.5861344537815126, "grad_norm": 10.112570131000647, "learning_rate": 9.730129744798096e-06, "loss": 2.6019768714904785, "step": 279 }, { "epoch": 0.5882352941176471, "grad_norm": 216.6539557731807, "learning_rate": 9.726153894317508e-06, "loss": 2.848952293395996, "step": 280 }, { "epoch": 0.5903361344537815, "grad_norm": 18.730728554973695, "learning_rate": 9.722149794963483e-06, "loss": 3.120556354522705, "step": 281 }, { "epoch": 0.592436974789916, "grad_norm": 7.236837867364418, "learning_rate": 9.718117470669072e-06, "loss": 2.8926405906677246, "step": 282 }, { "epoch": 0.5945378151260504, "grad_norm": 8.247663007399707, "learning_rate": 9.714056945536039e-06, "loss": 3.2854347229003906, "step": 283 }, { "epoch": 0.5966386554621849, "grad_norm": 7.729125572796969, "learning_rate": 9.709968243834698e-06, "loss": 2.856870651245117, "step": 284 }, { "epoch": 0.5987394957983193, "grad_norm": 20.951434970442865, "learning_rate": 9.705851390003783e-06, "loss": 3.3881802558898926, "step": 285 }, { "epoch": 0.6008403361344538, "grad_norm": 8.671814837426174, "learning_rate": 9.7017064086503e-06, "loss": 2.6102542877197266, "step": 286 }, { "epoch": 0.6029411764705882, "grad_norm": 8.644019718162792, "learning_rate": 9.697533324549371e-06, "loss": 2.7697243690490723, "step": 287 }, { "epoch": 0.6050420168067226, "grad_norm": 12.279613000984195, "learning_rate": 9.693332162644095e-06, "loss": 2.568695545196533, "step": 288 }, { "epoch": 0.6071428571428571, "grad_norm": 13.384358670021655, "learning_rate": 9.689102948045398e-06, "loss": 2.922543525695801, "step": 289 }, { "epoch": 0.6092436974789915, "grad_norm": 15.250277694133263, "learning_rate": 9.684845706031878e-06, "loss": 3.1011314392089844, "step": 290 }, { "epoch": 0.6113445378151261, "grad_norm": 9.840291260984259, "learning_rate": 9.680560462049657e-06, "loss": 2.627528429031372, "step": 291 }, { "epoch": 0.6134453781512605, "grad_norm": 13.648735567431437, "learning_rate": 9.676247241712228e-06, "loss": 2.8417811393737793, "step": 292 }, { "epoch": 0.615546218487395, "grad_norm": 8.931356705581003, "learning_rate": 9.671906070800307e-06, "loss": 2.3787314891815186, "step": 293 }, { "epoch": 0.6176470588235294, "grad_norm": 7.6270227976464895, "learning_rate": 9.667536975261667e-06, "loss": 2.751317024230957, "step": 294 }, { "epoch": 0.6197478991596639, "grad_norm": 7.016417787785432, "learning_rate": 9.663139981210998e-06, "loss": 2.6910929679870605, "step": 295 }, { "epoch": 0.6218487394957983, "grad_norm": 11.206285204533946, "learning_rate": 9.658715114929737e-06, "loss": 2.801499366760254, "step": 296 }, { "epoch": 0.6239495798319328, "grad_norm": 18.427453742915965, "learning_rate": 9.654262402865922e-06, "loss": 2.885946273803711, "step": 297 }, { "epoch": 0.6260504201680672, "grad_norm": 13.114557836832477, "learning_rate": 9.649781871634025e-06, "loss": 3.1485133171081543, "step": 298 }, { "epoch": 0.6281512605042017, "grad_norm": 8.349893932720915, "learning_rate": 9.6452735480148e-06, "loss": 3.174015998840332, "step": 299 }, { "epoch": 0.6302521008403361, "grad_norm": 11.762326882141835, "learning_rate": 9.64073745895512e-06, "loss": 3.339445114135742, "step": 300 }, { "epoch": 0.6323529411764706, "grad_norm": 7.555546243601591, "learning_rate": 9.636173631567812e-06, "loss": 2.9448843002319336, "step": 301 }, { "epoch": 0.634453781512605, "grad_norm": 5.95180683932207, "learning_rate": 9.631582093131501e-06, "loss": 2.6363561153411865, "step": 302 }, { "epoch": 0.6365546218487395, "grad_norm": 11.371944122058592, "learning_rate": 9.62696287109045e-06, "loss": 2.4621901512145996, "step": 303 }, { "epoch": 0.6386554621848739, "grad_norm": 11.742524524874973, "learning_rate": 9.622315993054384e-06, "loss": 2.8623251914978027, "step": 304 }, { "epoch": 0.6407563025210085, "grad_norm": 12.39315277601619, "learning_rate": 9.61764148679833e-06, "loss": 2.191575765609741, "step": 305 }, { "epoch": 0.6428571428571429, "grad_norm": 6.720496031064891, "learning_rate": 9.61293938026246e-06, "loss": 2.018388271331787, "step": 306 }, { "epoch": 0.6449579831932774, "grad_norm": 9.624398589362118, "learning_rate": 9.608209701551913e-06, "loss": 2.756854772567749, "step": 307 }, { "epoch": 0.6470588235294118, "grad_norm": 21.622075822614562, "learning_rate": 9.60345247893663e-06, "loss": 2.6668529510498047, "step": 308 }, { "epoch": 0.6491596638655462, "grad_norm": 14.926878160653533, "learning_rate": 9.598667740851187e-06, "loss": 2.6617343425750732, "step": 309 }, { "epoch": 0.6512605042016807, "grad_norm": 9.499741494871419, "learning_rate": 9.59385551589462e-06, "loss": 3.1460976600646973, "step": 310 }, { "epoch": 0.6533613445378151, "grad_norm": 17.910724072364676, "learning_rate": 9.589015832830267e-06, "loss": 2.7566354274749756, "step": 311 }, { "epoch": 0.6554621848739496, "grad_norm": 15.059990921253526, "learning_rate": 9.584148720585575e-06, "loss": 3.3112881183624268, "step": 312 }, { "epoch": 0.657563025210084, "grad_norm": 12.27519040871759, "learning_rate": 9.57925420825195e-06, "loss": 2.8563618659973145, "step": 313 }, { "epoch": 0.6596638655462185, "grad_norm": 14.53433732237354, "learning_rate": 9.574332325084564e-06, "loss": 3.5544567108154297, "step": 314 }, { "epoch": 0.6617647058823529, "grad_norm": 9.662661721128384, "learning_rate": 9.569383100502193e-06, "loss": 2.924015998840332, "step": 315 }, { "epoch": 0.6638655462184874, "grad_norm": 11.360356839234715, "learning_rate": 9.564406564087032e-06, "loss": 2.7250008583068848, "step": 316 }, { "epoch": 0.6659663865546218, "grad_norm": 6.265433825569306, "learning_rate": 9.559402745584527e-06, "loss": 2.9229238033294678, "step": 317 }, { "epoch": 0.6680672268907563, "grad_norm": 10.23047238705242, "learning_rate": 9.554371674903191e-06, "loss": 3.4867515563964844, "step": 318 }, { "epoch": 0.6701680672268907, "grad_norm": 13.446172115002414, "learning_rate": 9.549313382114427e-06, "loss": 2.4049417972564697, "step": 319 }, { "epoch": 0.6722689075630253, "grad_norm": 14.135159230227343, "learning_rate": 9.54422789745235e-06, "loss": 3.1008338928222656, "step": 320 }, { "epoch": 0.6743697478991597, "grad_norm": 9.140604791680513, "learning_rate": 9.5391152513136e-06, "loss": 2.6114342212677, "step": 321 }, { "epoch": 0.6764705882352942, "grad_norm": 9.779919855511938, "learning_rate": 9.533975474257171e-06, "loss": 2.7165164947509766, "step": 322 }, { "epoch": 0.6785714285714286, "grad_norm": 9.275622947642706, "learning_rate": 9.528808597004216e-06, "loss": 2.8122520446777344, "step": 323 }, { "epoch": 0.680672268907563, "grad_norm": 14.779154717919877, "learning_rate": 9.523614650437876e-06, "loss": 2.862661838531494, "step": 324 }, { "epoch": 0.6827731092436975, "grad_norm": 12.377273208842894, "learning_rate": 9.518393665603084e-06, "loss": 2.9812843799591064, "step": 325 }, { "epoch": 0.6848739495798319, "grad_norm": 11.010658732376989, "learning_rate": 9.513145673706383e-06, "loss": 2.9455337524414062, "step": 326 }, { "epoch": 0.6869747899159664, "grad_norm": 14.806340169845868, "learning_rate": 9.507870706115749e-06, "loss": 3.1577422618865967, "step": 327 }, { "epoch": 0.6890756302521008, "grad_norm": 13.616368197529665, "learning_rate": 9.50256879436039e-06, "loss": 2.545835018157959, "step": 328 }, { "epoch": 0.6911764705882353, "grad_norm": 8.987871015734585, "learning_rate": 9.497239970130561e-06, "loss": 2.559062957763672, "step": 329 }, { "epoch": 0.6932773109243697, "grad_norm": 8.587992072590101, "learning_rate": 9.491884265277383e-06, "loss": 2.932499647140503, "step": 330 }, { "epoch": 0.6953781512605042, "grad_norm": 7.463276523398998, "learning_rate": 9.486501711812637e-06, "loss": 2.967616558074951, "step": 331 }, { "epoch": 0.6974789915966386, "grad_norm": 14.475511521289118, "learning_rate": 9.481092341908591e-06, "loss": 2.4604697227478027, "step": 332 }, { "epoch": 0.6995798319327731, "grad_norm": 9.02600045542574, "learning_rate": 9.475656187897794e-06, "loss": 3.146969795227051, "step": 333 }, { "epoch": 0.7016806722689075, "grad_norm": 7.639638057540197, "learning_rate": 9.470193282272886e-06, "loss": 3.337083339691162, "step": 334 }, { "epoch": 0.7037815126050421, "grad_norm": 16.397307515268395, "learning_rate": 9.464703657686412e-06, "loss": 2.7829766273498535, "step": 335 }, { "epoch": 0.7058823529411765, "grad_norm": 11.048022152868258, "learning_rate": 9.45918734695061e-06, "loss": 3.095449447631836, "step": 336 }, { "epoch": 0.707983193277311, "grad_norm": 10.20138527201031, "learning_rate": 9.453644383037232e-06, "loss": 2.6790573596954346, "step": 337 }, { "epoch": 0.7100840336134454, "grad_norm": 9.93895349514755, "learning_rate": 9.448074799077337e-06, "loss": 2.9844274520874023, "step": 338 }, { "epoch": 0.7121848739495799, "grad_norm": 171.53953866617377, "learning_rate": 9.442478628361098e-06, "loss": 2.256910562515259, "step": 339 }, { "epoch": 0.7142857142857143, "grad_norm": 12.351676724137773, "learning_rate": 9.436855904337596e-06, "loss": 2.9464545249938965, "step": 340 }, { "epoch": 0.7163865546218487, "grad_norm": 19.404123629754835, "learning_rate": 9.43120666061463e-06, "loss": 2.23644757270813, "step": 341 }, { "epoch": 0.7184873949579832, "grad_norm": 11.246236929808724, "learning_rate": 9.425530930958507e-06, "loss": 2.85072660446167, "step": 342 }, { "epoch": 0.7205882352941176, "grad_norm": 17.642986778414265, "learning_rate": 9.419828749293845e-06, "loss": 3.09238862991333, "step": 343 }, { "epoch": 0.7226890756302521, "grad_norm": 8.1418179714146, "learning_rate": 9.414100149703373e-06, "loss": 2.7548587322235107, "step": 344 }, { "epoch": 0.7247899159663865, "grad_norm": 11.258932741699391, "learning_rate": 9.40834516642772e-06, "loss": 2.487452507019043, "step": 345 }, { "epoch": 0.726890756302521, "grad_norm": 10.082639156310133, "learning_rate": 9.402563833865213e-06, "loss": 3.077296257019043, "step": 346 }, { "epoch": 0.7289915966386554, "grad_norm": 11.506257708160296, "learning_rate": 9.396756186571672e-06, "loss": 2.6188814640045166, "step": 347 }, { "epoch": 0.7310924369747899, "grad_norm": 11.743812268831451, "learning_rate": 9.39092225926021e-06, "loss": 3.150355815887451, "step": 348 }, { "epoch": 0.7331932773109243, "grad_norm": 10.613109994526992, "learning_rate": 9.385062086801013e-06, "loss": 2.6666879653930664, "step": 349 }, { "epoch": 0.7352941176470589, "grad_norm": 11.1137083326389, "learning_rate": 9.379175704221139e-06, "loss": 2.885680675506592, "step": 350 }, { "epoch": 0.7373949579831933, "grad_norm": 10.845634322034954, "learning_rate": 9.37326314670431e-06, "loss": 2.948115110397339, "step": 351 }, { "epoch": 0.7394957983193278, "grad_norm": 20.12834911912162, "learning_rate": 9.367324449590694e-06, "loss": 2.743468761444092, "step": 352 }, { "epoch": 0.7415966386554622, "grad_norm": 10.20324180750042, "learning_rate": 9.361359648376707e-06, "loss": 3.0895063877105713, "step": 353 }, { "epoch": 0.7436974789915967, "grad_norm": 16.159497011872574, "learning_rate": 9.355368778714784e-06, "loss": 2.808818817138672, "step": 354 }, { "epoch": 0.7457983193277311, "grad_norm": 14.111122417184372, "learning_rate": 9.349351876413181e-06, "loss": 2.889227867126465, "step": 355 }, { "epoch": 0.7478991596638656, "grad_norm": 10.685634708452614, "learning_rate": 9.343308977435754e-06, "loss": 3.021900177001953, "step": 356 }, { "epoch": 0.75, "grad_norm": 29.116347517828004, "learning_rate": 9.337240117901742e-06, "loss": 2.4112629890441895, "step": 357 }, { "epoch": 0.7521008403361344, "grad_norm": 8.824096565810732, "learning_rate": 9.331145334085554e-06, "loss": 2.898515224456787, "step": 358 }, { "epoch": 0.7542016806722689, "grad_norm": 21.430509149211513, "learning_rate": 9.325024662416553e-06, "loss": 2.683413028717041, "step": 359 }, { "epoch": 0.7563025210084033, "grad_norm": 10.321295794427858, "learning_rate": 9.318878139478842e-06, "loss": 2.890808582305908, "step": 360 }, { "epoch": 0.7584033613445378, "grad_norm": 20.795050786572304, "learning_rate": 9.312705802011029e-06, "loss": 2.9919955730438232, "step": 361 }, { "epoch": 0.7605042016806722, "grad_norm": 9.83932446467153, "learning_rate": 9.306507686906033e-06, "loss": 2.7725915908813477, "step": 362 }, { "epoch": 0.7626050420168067, "grad_norm": 9.916160263978837, "learning_rate": 9.300283831210838e-06, "loss": 2.9397757053375244, "step": 363 }, { "epoch": 0.7647058823529411, "grad_norm": 9.312450032530169, "learning_rate": 9.294034272126286e-06, "loss": 2.770698070526123, "step": 364 }, { "epoch": 0.7668067226890757, "grad_norm": 13.885653883484695, "learning_rate": 9.28775904700686e-06, "loss": 2.5156445503234863, "step": 365 }, { "epoch": 0.7689075630252101, "grad_norm": 13.084004538001976, "learning_rate": 9.281458193360442e-06, "loss": 2.597851276397705, "step": 366 }, { "epoch": 0.7710084033613446, "grad_norm": 17.679727525867335, "learning_rate": 9.2751317488481e-06, "loss": 2.4659290313720703, "step": 367 }, { "epoch": 0.773109243697479, "grad_norm": 9.199345804679885, "learning_rate": 9.26877975128387e-06, "loss": 3.0518131256103516, "step": 368 }, { "epoch": 0.7752100840336135, "grad_norm": 15.824344742656248, "learning_rate": 9.262402238634514e-06, "loss": 2.0272233486175537, "step": 369 }, { "epoch": 0.7773109243697479, "grad_norm": 9.874132429438818, "learning_rate": 9.255999249019307e-06, "loss": 2.282167911529541, "step": 370 }, { "epoch": 0.7794117647058824, "grad_norm": 8.044430179764902, "learning_rate": 9.2495708207098e-06, "loss": 2.447831869125366, "step": 371 }, { "epoch": 0.7815126050420168, "grad_norm": 15.289268393319317, "learning_rate": 9.243116992129593e-06, "loss": 2.5548458099365234, "step": 372 }, { "epoch": 0.7836134453781513, "grad_norm": 18.576142639391133, "learning_rate": 9.23663780185411e-06, "loss": 2.2244365215301514, "step": 373 }, { "epoch": 0.7857142857142857, "grad_norm": 8.55234069521718, "learning_rate": 9.230133288610366e-06, "loss": 3.044992208480835, "step": 374 }, { "epoch": 0.7878151260504201, "grad_norm": 11.142079035862414, "learning_rate": 9.223603491276733e-06, "loss": 2.545569896697998, "step": 375 }, { "epoch": 0.7899159663865546, "grad_norm": 14.123674718701432, "learning_rate": 9.217048448882711e-06, "loss": 3.337583541870117, "step": 376 }, { "epoch": 0.792016806722689, "grad_norm": 16.146080651689587, "learning_rate": 9.210468200608691e-06, "loss": 3.1922380924224854, "step": 377 }, { "epoch": 0.7941176470588235, "grad_norm": 12.28600079308305, "learning_rate": 9.203862785785724e-06, "loss": 2.5922632217407227, "step": 378 }, { "epoch": 0.7962184873949579, "grad_norm": 18.04398024676097, "learning_rate": 9.197232243895285e-06, "loss": 2.876894474029541, "step": 379 }, { "epoch": 0.7983193277310925, "grad_norm": 13.494043036714963, "learning_rate": 9.190576614569035e-06, "loss": 2.7677531242370605, "step": 380 }, { "epoch": 0.8004201680672269, "grad_norm": 23.463052019031387, "learning_rate": 9.183895937588594e-06, "loss": 1.9870229959487915, "step": 381 }, { "epoch": 0.8025210084033614, "grad_norm": 7.7476580634838665, "learning_rate": 9.177190252885285e-06, "loss": 2.784242868423462, "step": 382 }, { "epoch": 0.8046218487394958, "grad_norm": 6.086395137680743, "learning_rate": 9.17045960053991e-06, "loss": 2.878697395324707, "step": 383 }, { "epoch": 0.8067226890756303, "grad_norm": 16.59316957110638, "learning_rate": 9.163704020782507e-06, "loss": 2.7685139179229736, "step": 384 }, { "epoch": 0.8088235294117647, "grad_norm": 15.470438153645851, "learning_rate": 9.156923553992107e-06, "loss": 2.8312299251556396, "step": 385 }, { "epoch": 0.8109243697478992, "grad_norm": 8.00902098985157, "learning_rate": 9.150118240696497e-06, "loss": 1.7165706157684326, "step": 386 }, { "epoch": 0.8130252100840336, "grad_norm": 14.0610194690077, "learning_rate": 9.14328812157197e-06, "loss": 3.451162815093994, "step": 387 }, { "epoch": 0.8151260504201681, "grad_norm": 10.174053809556211, "learning_rate": 9.136433237443093e-06, "loss": 3.455259084701538, "step": 388 }, { "epoch": 0.8172268907563025, "grad_norm": 14.076181600112081, "learning_rate": 9.129553629282448e-06, "loss": 3.3125205039978027, "step": 389 }, { "epoch": 0.819327731092437, "grad_norm": 11.760967038966463, "learning_rate": 9.122649338210407e-06, "loss": 3.175715923309326, "step": 390 }, { "epoch": 0.8214285714285714, "grad_norm": 12.215337173611072, "learning_rate": 9.115720405494868e-06, "loss": 3.426882743835449, "step": 391 }, { "epoch": 0.8235294117647058, "grad_norm": 16.884819154921146, "learning_rate": 9.108766872551016e-06, "loss": 2.693225860595703, "step": 392 }, { "epoch": 0.8256302521008403, "grad_norm": 11.991779005638564, "learning_rate": 9.101788780941076e-06, "loss": 2.8251726627349854, "step": 393 }, { "epoch": 0.8277310924369747, "grad_norm": 8.901523397999386, "learning_rate": 9.094786172374066e-06, "loss": 2.845076560974121, "step": 394 }, { "epoch": 0.8298319327731093, "grad_norm": 15.559813600064993, "learning_rate": 9.087759088705541e-06, "loss": 2.9212491512298584, "step": 395 }, { "epoch": 0.8319327731092437, "grad_norm": 12.334218057409931, "learning_rate": 9.08070757193735e-06, "loss": 2.752890110015869, "step": 396 }, { "epoch": 0.8340336134453782, "grad_norm": 20.040022595533, "learning_rate": 9.07363166421738e-06, "loss": 3.1292171478271484, "step": 397 }, { "epoch": 0.8361344537815126, "grad_norm": 9.339997691276547, "learning_rate": 9.066531407839307e-06, "loss": 2.2926840782165527, "step": 398 }, { "epoch": 0.8382352941176471, "grad_norm": 9.210411213235453, "learning_rate": 9.059406845242343e-06, "loss": 2.7644119262695312, "step": 399 }, { "epoch": 0.8403361344537815, "grad_norm": 13.484928949211756, "learning_rate": 9.05225801901098e-06, "loss": 2.9096150398254395, "step": 400 }, { "epoch": 0.842436974789916, "grad_norm": 21.901892899759964, "learning_rate": 9.045084971874738e-06, "loss": 4.536911964416504, "step": 401 }, { "epoch": 0.8445378151260504, "grad_norm": 8.027798710835631, "learning_rate": 9.03788774670791e-06, "loss": 3.3775062561035156, "step": 402 }, { "epoch": 0.8466386554621849, "grad_norm": 11.22841391004864, "learning_rate": 9.030666386529303e-06, "loss": 2.755703926086426, "step": 403 }, { "epoch": 0.8487394957983193, "grad_norm": 9.698938581529527, "learning_rate": 9.023420934501981e-06, "loss": 2.812281608581543, "step": 404 }, { "epoch": 0.8508403361344538, "grad_norm": 9.495702557416454, "learning_rate": 9.01615143393301e-06, "loss": 2.9015493392944336, "step": 405 }, { "epoch": 0.8529411764705882, "grad_norm": 8.59480884978166, "learning_rate": 9.008857928273199e-06, "loss": 2.8743391036987305, "step": 406 }, { "epoch": 0.8550420168067226, "grad_norm": 14.060855102265236, "learning_rate": 9.001540461116835e-06, "loss": 2.7400550842285156, "step": 407 }, { "epoch": 0.8571428571428571, "grad_norm": 9.670354596798553, "learning_rate": 8.994199076201428e-06, "loss": 3.788983106613159, "step": 408 }, { "epoch": 0.8592436974789915, "grad_norm": 10.094582977623446, "learning_rate": 8.98683381740745e-06, "loss": 2.426604747772217, "step": 409 }, { "epoch": 0.8613445378151261, "grad_norm": 6.42119276092813, "learning_rate": 8.979444728758067e-06, "loss": 2.467769145965576, "step": 410 }, { "epoch": 0.8634453781512605, "grad_norm": 101.25120998420752, "learning_rate": 8.97203185441888e-06, "loss": 2.878884792327881, "step": 411 }, { "epoch": 0.865546218487395, "grad_norm": 10.063927366400284, "learning_rate": 8.964595238697659e-06, "loss": 3.323913812637329, "step": 412 }, { "epoch": 0.8676470588235294, "grad_norm": 8.176196947638319, "learning_rate": 8.957134926044088e-06, "loss": 2.2674732208251953, "step": 413 }, { "epoch": 0.8697478991596639, "grad_norm": 7.688045397272728, "learning_rate": 8.949650961049479e-06, "loss": 2.6359667778015137, "step": 414 }, { "epoch": 0.8718487394957983, "grad_norm": 12.061723837223782, "learning_rate": 8.942143388446522e-06, "loss": 4.3965678215026855, "step": 415 }, { "epoch": 0.8739495798319328, "grad_norm": 13.801014710596668, "learning_rate": 8.934612253109017e-06, "loss": 3.584599733352661, "step": 416 }, { "epoch": 0.8760504201680672, "grad_norm": 11.465324791085347, "learning_rate": 8.927057600051594e-06, "loss": 2.9781904220581055, "step": 417 }, { "epoch": 0.8781512605042017, "grad_norm": 32.19803137859573, "learning_rate": 8.919479474429462e-06, "loss": 3.3312220573425293, "step": 418 }, { "epoch": 0.8802521008403361, "grad_norm": 23.418640662777587, "learning_rate": 8.911877921538117e-06, "loss": 3.8054161071777344, "step": 419 }, { "epoch": 0.8823529411764706, "grad_norm": 7.760210305795623, "learning_rate": 8.904252986813091e-06, "loss": 2.8041489124298096, "step": 420 }, { "epoch": 0.884453781512605, "grad_norm": 13.790720201964906, "learning_rate": 8.896604715829671e-06, "loss": 2.8391265869140625, "step": 421 }, { "epoch": 0.8865546218487395, "grad_norm": 8.558877313925247, "learning_rate": 8.888933154302626e-06, "loss": 2.6835553646087646, "step": 422 }, { "epoch": 0.8886554621848739, "grad_norm": 21.689551042379083, "learning_rate": 8.881238348085936e-06, "loss": 2.6738481521606445, "step": 423 }, { "epoch": 0.8907563025210085, "grad_norm": 7.686758427886692, "learning_rate": 8.87352034317252e-06, "loss": 2.619101047515869, "step": 424 }, { "epoch": 0.8928571428571429, "grad_norm": 20.40695143594997, "learning_rate": 8.865779185693957e-06, "loss": 3.3444905281066895, "step": 425 }, { "epoch": 0.8949579831932774, "grad_norm": 27.431337065110313, "learning_rate": 8.858014921920215e-06, "loss": 2.1527421474456787, "step": 426 }, { "epoch": 0.8970588235294118, "grad_norm": 20.606507987678672, "learning_rate": 8.850227598259365e-06, "loss": 2.6689836978912354, "step": 427 }, { "epoch": 0.8991596638655462, "grad_norm": 8.968995022440353, "learning_rate": 8.842417261257316e-06, "loss": 3.0119547843933105, "step": 428 }, { "epoch": 0.9012605042016807, "grad_norm": 28.528232969469133, "learning_rate": 8.83458395759753e-06, "loss": 2.482861042022705, "step": 429 }, { "epoch": 0.9033613445378151, "grad_norm": 10.074031458183692, "learning_rate": 8.826727734100742e-06, "loss": 2.8982067108154297, "step": 430 }, { "epoch": 0.9054621848739496, "grad_norm": 8.399253353390154, "learning_rate": 8.818848637724681e-06, "loss": 2.5004382133483887, "step": 431 }, { "epoch": 0.907563025210084, "grad_norm": 8.747805949968082, "learning_rate": 8.810946715563798e-06, "loss": 2.612011194229126, "step": 432 }, { "epoch": 0.9096638655462185, "grad_norm": 10.425702565789909, "learning_rate": 8.803022014848966e-06, "loss": 2.9700820446014404, "step": 433 }, { "epoch": 0.9117647058823529, "grad_norm": 11.029401754074971, "learning_rate": 8.795074582947214e-06, "loss": 3.248368263244629, "step": 434 }, { "epoch": 0.9138655462184874, "grad_norm": 9.336382488449228, "learning_rate": 8.787104467361442e-06, "loss": 2.993704319000244, "step": 435 }, { "epoch": 0.9159663865546218, "grad_norm": 13.327453834983904, "learning_rate": 8.779111715730127e-06, "loss": 2.6930155754089355, "step": 436 }, { "epoch": 0.9180672268907563, "grad_norm": 10.570908488031245, "learning_rate": 8.771096375827047e-06, "loss": 3.069434404373169, "step": 437 }, { "epoch": 0.9201680672268907, "grad_norm": 19.381962817436207, "learning_rate": 8.763058495560994e-06, "loss": 3.1358611583709717, "step": 438 }, { "epoch": 0.9222689075630253, "grad_norm": 18.418237048785702, "learning_rate": 8.754998122975489e-06, "loss": 3.2987184524536133, "step": 439 }, { "epoch": 0.9243697478991597, "grad_norm": 8.737803987239646, "learning_rate": 8.746915306248488e-06, "loss": 2.9279255867004395, "step": 440 }, { "epoch": 0.9264705882352942, "grad_norm": 13.117095498271222, "learning_rate": 8.7388100936921e-06, "loss": 2.795942783355713, "step": 441 }, { "epoch": 0.9285714285714286, "grad_norm": 25.973728201733575, "learning_rate": 8.730682533752301e-06, "loss": 2.7590699195861816, "step": 442 }, { "epoch": 0.930672268907563, "grad_norm": 9.543199289400748, "learning_rate": 8.722532675008635e-06, "loss": 2.6571459770202637, "step": 443 }, { "epoch": 0.9327731092436975, "grad_norm": 10.69198569405724, "learning_rate": 8.714360566173932e-06, "loss": 2.7342920303344727, "step": 444 }, { "epoch": 0.9348739495798319, "grad_norm": 13.298135717649288, "learning_rate": 8.706166256094013e-06, "loss": 2.9492366313934326, "step": 445 }, { "epoch": 0.9369747899159664, "grad_norm": 18.5856782117513, "learning_rate": 8.6979497937474e-06, "loss": 2.937699317932129, "step": 446 }, { "epoch": 0.9390756302521008, "grad_norm": 10.292297569389804, "learning_rate": 8.689711228245021e-06, "loss": 3.23824405670166, "step": 447 }, { "epoch": 0.9411764705882353, "grad_norm": 19.90454431534383, "learning_rate": 8.681450608829916e-06, "loss": 2.542668581008911, "step": 448 }, { "epoch": 0.9432773109243697, "grad_norm": 14.413143934794212, "learning_rate": 8.67316798487695e-06, "loss": 3.257632255554199, "step": 449 }, { "epoch": 0.9453781512605042, "grad_norm": 10.80231465762936, "learning_rate": 8.664863405892506e-06, "loss": 2.7072958946228027, "step": 450 }, { "epoch": 0.9474789915966386, "grad_norm": 18.020582485094227, "learning_rate": 8.656536921514195e-06, "loss": 2.532301664352417, "step": 451 }, { "epoch": 0.9495798319327731, "grad_norm": 12.503896279810512, "learning_rate": 8.648188581510567e-06, "loss": 2.726604461669922, "step": 452 }, { "epoch": 0.9516806722689075, "grad_norm": 18.785189447389097, "learning_rate": 8.639818435780797e-06, "loss": 2.516594886779785, "step": 453 }, { "epoch": 0.9537815126050421, "grad_norm": 12.0120687102085, "learning_rate": 8.631426534354404e-06, "loss": 2.7706644535064697, "step": 454 }, { "epoch": 0.9558823529411765, "grad_norm": 11.506720081337315, "learning_rate": 8.623012927390936e-06, "loss": 3.2427144050598145, "step": 455 }, { "epoch": 0.957983193277311, "grad_norm": 10.11083550503784, "learning_rate": 8.614577665179684e-06, "loss": 3.1202523708343506, "step": 456 }, { "epoch": 0.9600840336134454, "grad_norm": 15.945109216294865, "learning_rate": 8.606120798139375e-06, "loss": 2.6210598945617676, "step": 457 }, { "epoch": 0.9621848739495799, "grad_norm": 9.09618149788864, "learning_rate": 8.597642376817865e-06, "loss": 2.669271469116211, "step": 458 }, { "epoch": 0.9642857142857143, "grad_norm": 8.714640631605363, "learning_rate": 8.589142451891849e-06, "loss": 2.6489734649658203, "step": 459 }, { "epoch": 0.9663865546218487, "grad_norm": 7.855597298788909, "learning_rate": 8.580621074166553e-06, "loss": 3.10178804397583, "step": 460 }, { "epoch": 0.9684873949579832, "grad_norm": 10.502691052340555, "learning_rate": 8.572078294575423e-06, "loss": 2.589158296585083, "step": 461 }, { "epoch": 0.9705882352941176, "grad_norm": 10.459968052493494, "learning_rate": 8.56351416417983e-06, "loss": 2.5543792247772217, "step": 462 }, { "epoch": 0.9726890756302521, "grad_norm": 12.885512846289808, "learning_rate": 8.554928734168767e-06, "loss": 2.65985369682312, "step": 463 }, { "epoch": 0.9747899159663865, "grad_norm": 9.639047199230617, "learning_rate": 8.546322055858526e-06, "loss": 3.0177440643310547, "step": 464 }, { "epoch": 0.976890756302521, "grad_norm": 9.494268049756599, "learning_rate": 8.537694180692416e-06, "loss": 2.2767248153686523, "step": 465 }, { "epoch": 0.9789915966386554, "grad_norm": 12.56887928459161, "learning_rate": 8.529045160240433e-06, "loss": 2.7835707664489746, "step": 466 }, { "epoch": 0.9810924369747899, "grad_norm": 10.580355179128095, "learning_rate": 8.520375046198965e-06, "loss": 2.4373722076416016, "step": 467 }, { "epoch": 0.9831932773109243, "grad_norm": 10.13582135951574, "learning_rate": 8.51168389039048e-06, "loss": 2.464303731918335, "step": 468 }, { "epoch": 0.9852941176470589, "grad_norm": 12.209700818401375, "learning_rate": 8.502971744763216e-06, "loss": 2.2609100341796875, "step": 469 }, { "epoch": 0.9873949579831933, "grad_norm": 21.359445929891656, "learning_rate": 8.494238661390865e-06, "loss": 3.0135858058929443, "step": 470 }, { "epoch": 0.9894957983193278, "grad_norm": 15.087072293517004, "learning_rate": 8.485484692472272e-06, "loss": 2.770965099334717, "step": 471 }, { "epoch": 0.9915966386554622, "grad_norm": 8.181199645745421, "learning_rate": 8.476709890331116e-06, "loss": 2.6243722438812256, "step": 472 }, { "epoch": 0.9936974789915967, "grad_norm": 7.527423998031555, "learning_rate": 8.467914307415601e-06, "loss": 2.9319207668304443, "step": 473 }, { "epoch": 0.9957983193277311, "grad_norm": 9.424234237676545, "learning_rate": 8.459097996298137e-06, "loss": 3.0626072883605957, "step": 474 }, { "epoch": 0.9978991596638656, "grad_norm": 14.444274317338678, "learning_rate": 8.45026100967503e-06, "loss": 3.000889778137207, "step": 475 }, { "epoch": 1.0, "grad_norm": 8.45019782867115, "learning_rate": 8.441403400366169e-06, "loss": 3.112825393676758, "step": 476 }, { "epoch": 1.0021008403361344, "grad_norm": 19.596775314152666, "learning_rate": 8.432525221314708e-06, "loss": 1.4137624502182007, "step": 477 }, { "epoch": 1.004201680672269, "grad_norm": 9.233023882113994, "learning_rate": 8.423626525586744e-06, "loss": 1.6808059215545654, "step": 478 }, { "epoch": 1.0063025210084033, "grad_norm": 9.789186389046735, "learning_rate": 8.414707366371006e-06, "loss": 1.8797330856323242, "step": 479 }, { "epoch": 1.0084033613445378, "grad_norm": 7.894274079237724, "learning_rate": 8.405767796978546e-06, "loss": 1.9548699855804443, "step": 480 }, { "epoch": 1.0105042016806722, "grad_norm": 11.882995555931503, "learning_rate": 8.396807870842396e-06, "loss": 1.5713114738464355, "step": 481 }, { "epoch": 1.0126050420168067, "grad_norm": 14.948396348319923, "learning_rate": 8.387827641517274e-06, "loss": 1.69504976272583, "step": 482 }, { "epoch": 1.0147058823529411, "grad_norm": 6.935744624929541, "learning_rate": 8.378827162679248e-06, "loss": 1.3813257217407227, "step": 483 }, { "epoch": 1.0168067226890756, "grad_norm": 9.50729885231966, "learning_rate": 8.369806488125418e-06, "loss": 2.4568567276000977, "step": 484 }, { "epoch": 1.01890756302521, "grad_norm": 11.62592077082348, "learning_rate": 8.360765671773603e-06, "loss": 2.602184534072876, "step": 485 }, { "epoch": 1.0210084033613445, "grad_norm": 15.469624436922395, "learning_rate": 8.351704767662005e-06, "loss": 1.8193070888519287, "step": 486 }, { "epoch": 1.023109243697479, "grad_norm": 12.389371131721145, "learning_rate": 8.3426238299489e-06, "loss": 1.4549766778945923, "step": 487 }, { "epoch": 1.0252100840336134, "grad_norm": 7.898711913261212, "learning_rate": 8.333522912912308e-06, "loss": 1.4681106805801392, "step": 488 }, { "epoch": 1.0273109243697478, "grad_norm": 14.553557605821632, "learning_rate": 8.324402070949658e-06, "loss": 1.4224164485931396, "step": 489 }, { "epoch": 1.0294117647058822, "grad_norm": 21.0322684953627, "learning_rate": 8.315261358577485e-06, "loss": 2.200676441192627, "step": 490 }, { "epoch": 1.0315126050420167, "grad_norm": 14.230965851092702, "learning_rate": 8.306100830431085e-06, "loss": 1.867397665977478, "step": 491 }, { "epoch": 1.0336134453781514, "grad_norm": 11.330315084805383, "learning_rate": 8.296920541264197e-06, "loss": 1.4270985126495361, "step": 492 }, { "epoch": 1.0357142857142858, "grad_norm": 11.452248734086307, "learning_rate": 8.287720545948676e-06, "loss": 1.464069128036499, "step": 493 }, { "epoch": 1.0378151260504203, "grad_norm": 18.476525141242952, "learning_rate": 8.278500899474162e-06, "loss": 1.192551612854004, "step": 494 }, { "epoch": 1.0399159663865547, "grad_norm": 13.695173322132312, "learning_rate": 8.269261656947755e-06, "loss": 2.367762327194214, "step": 495 }, { "epoch": 1.0420168067226891, "grad_norm": 12.101022572223535, "learning_rate": 8.260002873593679e-06, "loss": 1.6752372980117798, "step": 496 }, { "epoch": 1.0441176470588236, "grad_norm": 14.763270168918805, "learning_rate": 8.25072460475296e-06, "loss": 1.409712314605713, "step": 497 }, { "epoch": 1.046218487394958, "grad_norm": 12.622229054224464, "learning_rate": 8.24142690588309e-06, "loss": 1.6270588636398315, "step": 498 }, { "epoch": 1.0483193277310925, "grad_norm": 7.889964988601032, "learning_rate": 8.232109832557696e-06, "loss": 1.4294947385787964, "step": 499 }, { "epoch": 1.050420168067227, "grad_norm": 9.640341277497848, "learning_rate": 8.222773440466213e-06, "loss": 1.2340010404586792, "step": 500 }, { "epoch": 1.0525210084033614, "grad_norm": 9.361065825268032, "learning_rate": 8.213417785413538e-06, "loss": 1.451041340827942, "step": 501 }, { "epoch": 1.0546218487394958, "grad_norm": 10.851800895184763, "learning_rate": 8.204042923319717e-06, "loss": 0.8124719858169556, "step": 502 }, { "epoch": 1.0567226890756303, "grad_norm": 13.939415896202156, "learning_rate": 8.19464891021959e-06, "loss": 1.5310864448547363, "step": 503 }, { "epoch": 1.0588235294117647, "grad_norm": 12.545903899817956, "learning_rate": 8.18523580226247e-06, "loss": 1.2139228582382202, "step": 504 }, { "epoch": 1.0609243697478992, "grad_norm": 7.8688457688530455, "learning_rate": 8.1758036557118e-06, "loss": 1.3573241233825684, "step": 505 }, { "epoch": 1.0630252100840336, "grad_norm": 29.274148786110516, "learning_rate": 8.166352526944821e-06, "loss": 1.9899749755859375, "step": 506 }, { "epoch": 1.065126050420168, "grad_norm": 12.789841758713314, "learning_rate": 8.156882472452232e-06, "loss": 1.4103593826293945, "step": 507 }, { "epoch": 1.0672268907563025, "grad_norm": 11.46688535188232, "learning_rate": 8.147393548837856e-06, "loss": 1.227393627166748, "step": 508 }, { "epoch": 1.069327731092437, "grad_norm": 11.67493017233716, "learning_rate": 8.137885812818296e-06, "loss": 1.7060927152633667, "step": 509 }, { "epoch": 1.0714285714285714, "grad_norm": 13.183390423963338, "learning_rate": 8.128359321222601e-06, "loss": 1.890432357788086, "step": 510 }, { "epoch": 1.0735294117647058, "grad_norm": 8.769195455641308, "learning_rate": 8.118814130991925e-06, "loss": 1.8258857727050781, "step": 511 }, { "epoch": 1.0756302521008403, "grad_norm": 9.016866647141889, "learning_rate": 8.109250299179188e-06, "loss": 0.9584097862243652, "step": 512 }, { "epoch": 1.0777310924369747, "grad_norm": 8.866656672277916, "learning_rate": 8.09966788294873e-06, "loss": 1.4017150402069092, "step": 513 }, { "epoch": 1.0798319327731092, "grad_norm": 12.12920225890514, "learning_rate": 8.090066939575972e-06, "loss": 1.3034381866455078, "step": 514 }, { "epoch": 1.0819327731092436, "grad_norm": 11.169332765461306, "learning_rate": 8.080447526447079e-06, "loss": 1.0734150409698486, "step": 515 }, { "epoch": 1.084033613445378, "grad_norm": 15.988980575396647, "learning_rate": 8.070809701058606e-06, "loss": 0.8819087743759155, "step": 516 }, { "epoch": 1.0861344537815125, "grad_norm": 10.445041930863859, "learning_rate": 8.061153521017169e-06, "loss": 1.3253920078277588, "step": 517 }, { "epoch": 1.088235294117647, "grad_norm": 7.477532974278996, "learning_rate": 8.051479044039086e-06, "loss": 1.0912744998931885, "step": 518 }, { "epoch": 1.0903361344537814, "grad_norm": 15.386742532344485, "learning_rate": 8.041786327950037e-06, "loss": 1.6941767930984497, "step": 519 }, { "epoch": 1.092436974789916, "grad_norm": 13.631587045212196, "learning_rate": 8.032075430684724e-06, "loss": 1.058671236038208, "step": 520 }, { "epoch": 1.0945378151260505, "grad_norm": 9.174394889796707, "learning_rate": 8.02234641028652e-06, "loss": 1.1603420972824097, "step": 521 }, { "epoch": 1.096638655462185, "grad_norm": 18.009634618634845, "learning_rate": 8.012599324907121e-06, "loss": 1.4285218715667725, "step": 522 }, { "epoch": 1.0987394957983194, "grad_norm": 18.317588738929096, "learning_rate": 8.0028342328062e-06, "loss": 1.3041057586669922, "step": 523 }, { "epoch": 1.1008403361344539, "grad_norm": 17.245361771703262, "learning_rate": 7.993051192351056e-06, "loss": 2.329005718231201, "step": 524 }, { "epoch": 1.1029411764705883, "grad_norm": 5.466501144551759, "learning_rate": 7.983250262016276e-06, "loss": 0.7331016063690186, "step": 525 }, { "epoch": 1.1050420168067228, "grad_norm": 19.76792957260025, "learning_rate": 7.973431500383366e-06, "loss": 2.193528175354004, "step": 526 }, { "epoch": 1.1071428571428572, "grad_norm": 11.04973790435175, "learning_rate": 7.963594966140423e-06, "loss": 1.3245251178741455, "step": 527 }, { "epoch": 1.1092436974789917, "grad_norm": 14.50002827076454, "learning_rate": 7.953740718081765e-06, "loss": 1.1308670043945312, "step": 528 }, { "epoch": 1.111344537815126, "grad_norm": 8.457254255014693, "learning_rate": 7.943868815107594e-06, "loss": 1.3318034410476685, "step": 529 }, { "epoch": 1.1134453781512605, "grad_norm": 12.48006901565296, "learning_rate": 7.933979316223632e-06, "loss": 1.2564438581466675, "step": 530 }, { "epoch": 1.115546218487395, "grad_norm": 13.952521489657013, "learning_rate": 7.92407228054078e-06, "loss": 1.2420412302017212, "step": 531 }, { "epoch": 1.1176470588235294, "grad_norm": 11.927118732913993, "learning_rate": 7.914147767274756e-06, "loss": 1.9582582712173462, "step": 532 }, { "epoch": 1.1197478991596639, "grad_norm": 29.1836862977554, "learning_rate": 7.904205835745744e-06, "loss": 1.7057411670684814, "step": 533 }, { "epoch": 1.1218487394957983, "grad_norm": 8.77699695792644, "learning_rate": 7.894246545378037e-06, "loss": 1.810387134552002, "step": 534 }, { "epoch": 1.1239495798319328, "grad_norm": 11.812154757139437, "learning_rate": 7.884269955699689e-06, "loss": 1.6038577556610107, "step": 535 }, { "epoch": 1.1260504201680672, "grad_norm": 11.347334970124107, "learning_rate": 7.874276126342151e-06, "loss": 1.1410393714904785, "step": 536 }, { "epoch": 1.1281512605042017, "grad_norm": 23.969457981422316, "learning_rate": 7.86426511703992e-06, "loss": 2.28239369392395, "step": 537 }, { "epoch": 1.1302521008403361, "grad_norm": 11.3793937172999, "learning_rate": 7.854236987630178e-06, "loss": 2.1672444343566895, "step": 538 }, { "epoch": 1.1323529411764706, "grad_norm": 8.571185039369908, "learning_rate": 7.844191798052438e-06, "loss": 1.7712535858154297, "step": 539 }, { "epoch": 1.134453781512605, "grad_norm": 8.155743104110897, "learning_rate": 7.834129608348183e-06, "loss": 1.4109793901443481, "step": 540 }, { "epoch": 1.1365546218487395, "grad_norm": 12.006945471100122, "learning_rate": 7.824050478660506e-06, "loss": 1.4405725002288818, "step": 541 }, { "epoch": 1.138655462184874, "grad_norm": 16.24385934265993, "learning_rate": 7.813954469233758e-06, "loss": 2.2450976371765137, "step": 542 }, { "epoch": 1.1407563025210083, "grad_norm": 63.00358955157523, "learning_rate": 7.803841640413177e-06, "loss": 2.16367244720459, "step": 543 }, { "epoch": 1.1428571428571428, "grad_norm": 10.995277933527825, "learning_rate": 7.793712052644535e-06, "loss": 2.3919224739074707, "step": 544 }, { "epoch": 1.1449579831932772, "grad_norm": 9.931645247221951, "learning_rate": 7.783565766473777e-06, "loss": 1.4211726188659668, "step": 545 }, { "epoch": 1.1470588235294117, "grad_norm": 12.106564772704573, "learning_rate": 7.773402842546654e-06, "loss": 1.2502498626708984, "step": 546 }, { "epoch": 1.1491596638655461, "grad_norm": 8.144149987908426, "learning_rate": 7.76322334160836e-06, "loss": 1.423762321472168, "step": 547 }, { "epoch": 1.1512605042016806, "grad_norm": 10.152738619426868, "learning_rate": 7.75302732450318e-06, "loss": 1.1090279817581177, "step": 548 }, { "epoch": 1.153361344537815, "grad_norm": 11.024880610484013, "learning_rate": 7.742814852174112e-06, "loss": 1.0321426391601562, "step": 549 }, { "epoch": 1.1554621848739495, "grad_norm": 10.4112886492949, "learning_rate": 7.73258598566251e-06, "loss": 1.0928632020950317, "step": 550 }, { "epoch": 1.157563025210084, "grad_norm": 17.17079853756711, "learning_rate": 7.72234078610772e-06, "loss": 1.2369472980499268, "step": 551 }, { "epoch": 1.1596638655462184, "grad_norm": 12.662228894532866, "learning_rate": 7.712079314746716e-06, "loss": 1.2957392930984497, "step": 552 }, { "epoch": 1.161764705882353, "grad_norm": 8.967923305212855, "learning_rate": 7.701801632913722e-06, "loss": 1.6709070205688477, "step": 553 }, { "epoch": 1.1638655462184875, "grad_norm": 9.520057506790387, "learning_rate": 7.691507802039861e-06, "loss": 1.6091077327728271, "step": 554 }, { "epoch": 1.165966386554622, "grad_norm": 12.924582534581134, "learning_rate": 7.68119788365278e-06, "loss": 1.8003133535385132, "step": 555 }, { "epoch": 1.1680672268907564, "grad_norm": 8.027840739484652, "learning_rate": 7.670871939376281e-06, "loss": 1.0151593685150146, "step": 556 }, { "epoch": 1.1701680672268908, "grad_norm": 10.792867985796137, "learning_rate": 7.660530030929961e-06, "loss": 1.3084783554077148, "step": 557 }, { "epoch": 1.1722689075630253, "grad_norm": 10.336895443268714, "learning_rate": 7.650172220128828e-06, "loss": 1.3882572650909424, "step": 558 }, { "epoch": 1.1743697478991597, "grad_norm": 11.46121788240209, "learning_rate": 7.639798568882947e-06, "loss": 1.3919298648834229, "step": 559 }, { "epoch": 1.1764705882352942, "grad_norm": 11.442052901701038, "learning_rate": 7.629409139197063e-06, "loss": 1.3745830059051514, "step": 560 }, { "epoch": 1.1785714285714286, "grad_norm": 22.18812336562329, "learning_rate": 7.619003993170226e-06, "loss": 1.2964568138122559, "step": 561 }, { "epoch": 1.180672268907563, "grad_norm": 14.305068677598294, "learning_rate": 7.608583192995433e-06, "loss": 1.75518798828125, "step": 562 }, { "epoch": 1.1827731092436975, "grad_norm": 38.68186634407232, "learning_rate": 7.598146800959238e-06, "loss": 2.156588554382324, "step": 563 }, { "epoch": 1.184873949579832, "grad_norm": 15.824079652626462, "learning_rate": 7.5876948794414015e-06, "loss": 1.3602566719055176, "step": 564 }, { "epoch": 1.1869747899159664, "grad_norm": 13.134376075413467, "learning_rate": 7.577227490914495e-06, "loss": 1.5620733499526978, "step": 565 }, { "epoch": 1.1890756302521008, "grad_norm": 9.860309886809128, "learning_rate": 7.5667446979435445e-06, "loss": 0.971282422542572, "step": 566 }, { "epoch": 1.1911764705882353, "grad_norm": 11.617960075857892, "learning_rate": 7.556246563185648e-06, "loss": 1.1717581748962402, "step": 567 }, { "epoch": 1.1932773109243697, "grad_norm": 14.78629106010037, "learning_rate": 7.545733149389605e-06, "loss": 1.8813025951385498, "step": 568 }, { "epoch": 1.1953781512605042, "grad_norm": 11.92994585452875, "learning_rate": 7.535204519395538e-06, "loss": 1.280207633972168, "step": 569 }, { "epoch": 1.1974789915966386, "grad_norm": 14.537731397359755, "learning_rate": 7.5246607361345215e-06, "loss": 1.5685778856277466, "step": 570 }, { "epoch": 1.199579831932773, "grad_norm": 9.978722079402786, "learning_rate": 7.514101862628203e-06, "loss": 2.2011172771453857, "step": 571 }, { "epoch": 1.2016806722689075, "grad_norm": 14.328584272935853, "learning_rate": 7.503527961988422e-06, "loss": 2.0038180351257324, "step": 572 }, { "epoch": 1.203781512605042, "grad_norm": 11.49676437218398, "learning_rate": 7.492939097416842e-06, "loss": 1.1275922060012817, "step": 573 }, { "epoch": 1.2058823529411764, "grad_norm": 13.603928637496292, "learning_rate": 7.482335332204568e-06, "loss": 1.208678960800171, "step": 574 }, { "epoch": 1.2079831932773109, "grad_norm": 10.710849924738463, "learning_rate": 7.471716729731764e-06, "loss": 1.7450125217437744, "step": 575 }, { "epoch": 1.2100840336134453, "grad_norm": 10.8408813790809, "learning_rate": 7.461083353467283e-06, "loss": 1.5381510257720947, "step": 576 }, { "epoch": 1.2121848739495797, "grad_norm": 10.502717838660322, "learning_rate": 7.450435266968279e-06, "loss": 1.6857651472091675, "step": 577 }, { "epoch": 1.2142857142857142, "grad_norm": 10.194196645130454, "learning_rate": 7.4397725338798365e-06, "loss": 1.9049471616744995, "step": 578 }, { "epoch": 1.2163865546218489, "grad_norm": 8.336901180250376, "learning_rate": 7.429095217934578e-06, "loss": 2.2398974895477295, "step": 579 }, { "epoch": 1.2184873949579833, "grad_norm": 8.289301563947674, "learning_rate": 7.4184033829522935e-06, "loss": 1.8767409324645996, "step": 580 }, { "epoch": 1.2205882352941178, "grad_norm": 7.83258681688038, "learning_rate": 7.4076970928395565e-06, "loss": 1.4787061214447021, "step": 581 }, { "epoch": 1.2226890756302522, "grad_norm": 11.288493150816146, "learning_rate": 7.396976411589338e-06, "loss": 1.1055876016616821, "step": 582 }, { "epoch": 1.2247899159663866, "grad_norm": 8.627197279612671, "learning_rate": 7.386241403280629e-06, "loss": 1.668757438659668, "step": 583 }, { "epoch": 1.226890756302521, "grad_norm": 7.9829732080808276, "learning_rate": 7.375492132078051e-06, "loss": 1.2818783521652222, "step": 584 }, { "epoch": 1.2289915966386555, "grad_norm": 9.132163063845432, "learning_rate": 7.364728662231484e-06, "loss": 1.578829050064087, "step": 585 }, { "epoch": 1.23109243697479, "grad_norm": 9.541187433357738, "learning_rate": 7.353951058075669e-06, "loss": 1.572939157485962, "step": 586 }, { "epoch": 1.2331932773109244, "grad_norm": 29.472008336805924, "learning_rate": 7.343159384029833e-06, "loss": 3.977992057800293, "step": 587 }, { "epoch": 1.2352941176470589, "grad_norm": 12.577355177733914, "learning_rate": 7.332353704597299e-06, "loss": 1.955003023147583, "step": 588 }, { "epoch": 1.2373949579831933, "grad_norm": 10.61755598072498, "learning_rate": 7.321534084365101e-06, "loss": 1.5401737689971924, "step": 589 }, { "epoch": 1.2394957983193278, "grad_norm": 17.052134953118316, "learning_rate": 7.310700588003605e-06, "loss": 1.895308017730713, "step": 590 }, { "epoch": 1.2415966386554622, "grad_norm": 13.914617942504853, "learning_rate": 7.299853280266109e-06, "loss": 1.6920474767684937, "step": 591 }, { "epoch": 1.2436974789915967, "grad_norm": 18.300626922757814, "learning_rate": 7.28899222598847e-06, "loss": 1.9865736961364746, "step": 592 }, { "epoch": 1.245798319327731, "grad_norm": 8.604822405832417, "learning_rate": 7.278117490088703e-06, "loss": 1.2350941896438599, "step": 593 }, { "epoch": 1.2478991596638656, "grad_norm": 15.714181520858954, "learning_rate": 7.267229137566607e-06, "loss": 1.800095558166504, "step": 594 }, { "epoch": 1.25, "grad_norm": 7.625924941471246, "learning_rate": 7.256327233503365e-06, "loss": 1.848137617111206, "step": 595 }, { "epoch": 1.2521008403361344, "grad_norm": 8.480492494477819, "learning_rate": 7.24541184306116e-06, "loss": 1.7656617164611816, "step": 596 }, { "epoch": 1.254201680672269, "grad_norm": 11.960261973795399, "learning_rate": 7.234483031482787e-06, "loss": 1.0096323490142822, "step": 597 }, { "epoch": 1.2563025210084033, "grad_norm": 9.709001923888373, "learning_rate": 7.223540864091259e-06, "loss": 1.428197979927063, "step": 598 }, { "epoch": 1.2584033613445378, "grad_norm": 11.628908186348927, "learning_rate": 7.2125854062894184e-06, "loss": 1.0703970193862915, "step": 599 }, { "epoch": 1.2605042016806722, "grad_norm": 14.20204722362147, "learning_rate": 7.201616723559548e-06, "loss": 1.7873646020889282, "step": 600 }, { "epoch": 1.2626050420168067, "grad_norm": 12.326258967391198, "learning_rate": 7.190634881462976e-06, "loss": 1.3262135982513428, "step": 601 }, { "epoch": 1.2647058823529411, "grad_norm": 13.762619560991299, "learning_rate": 7.179639945639688e-06, "loss": 1.6294150352478027, "step": 602 }, { "epoch": 1.2668067226890756, "grad_norm": 12.793929462404881, "learning_rate": 7.168631981807931e-06, "loss": 2.6409220695495605, "step": 603 }, { "epoch": 1.26890756302521, "grad_norm": 12.75285051440542, "learning_rate": 7.15761105576382e-06, "loss": 1.3407433032989502, "step": 604 }, { "epoch": 1.2710084033613445, "grad_norm": 11.811026706721915, "learning_rate": 7.1465772333809524e-06, "loss": 1.1475789546966553, "step": 605 }, { "epoch": 1.273109243697479, "grad_norm": 16.182274466548407, "learning_rate": 7.1355305806100036e-06, "loss": 1.8270117044448853, "step": 606 }, { "epoch": 1.2752100840336134, "grad_norm": 9.390889705782493, "learning_rate": 7.124471163478344e-06, "loss": 2.168900489807129, "step": 607 }, { "epoch": 1.2773109243697478, "grad_norm": 14.960557905830523, "learning_rate": 7.113399048089631e-06, "loss": 2.0142345428466797, "step": 608 }, { "epoch": 1.2794117647058822, "grad_norm": 14.63642311907181, "learning_rate": 7.102314300623425e-06, "loss": 2.015444755554199, "step": 609 }, { "epoch": 1.2815126050420167, "grad_norm": 13.291155405094099, "learning_rate": 7.091216987334792e-06, "loss": 1.5882906913757324, "step": 610 }, { "epoch": 1.2836134453781511, "grad_norm": 17.727064634923273, "learning_rate": 7.080107174553903e-06, "loss": 1.4543545246124268, "step": 611 }, { "epoch": 1.2857142857142856, "grad_norm": 13.123573018342379, "learning_rate": 7.068984928685638e-06, "loss": 1.3196444511413574, "step": 612 }, { "epoch": 1.28781512605042, "grad_norm": 11.204963124082711, "learning_rate": 7.057850316209198e-06, "loss": 0.8601089715957642, "step": 613 }, { "epoch": 1.2899159663865547, "grad_norm": 11.507041064870066, "learning_rate": 7.0467034036776945e-06, "loss": 1.334380865097046, "step": 614 }, { "epoch": 1.2920168067226891, "grad_norm": 9.153184893600336, "learning_rate": 7.035544257717761e-06, "loss": 1.4980111122131348, "step": 615 }, { "epoch": 1.2941176470588236, "grad_norm": 11.208470095807519, "learning_rate": 7.024372945029152e-06, "loss": 1.9393174648284912, "step": 616 }, { "epoch": 1.296218487394958, "grad_norm": 9.33539024674701, "learning_rate": 7.013189532384343e-06, "loss": 1.1070374250411987, "step": 617 }, { "epoch": 1.2983193277310925, "grad_norm": 22.088040059228636, "learning_rate": 7.001994086628133e-06, "loss": 2.146557331085205, "step": 618 }, { "epoch": 1.300420168067227, "grad_norm": 12.461539796415895, "learning_rate": 6.990786674677246e-06, "loss": 1.097703456878662, "step": 619 }, { "epoch": 1.3025210084033614, "grad_norm": 10.337144677645794, "learning_rate": 6.979567363519927e-06, "loss": 1.9619685411453247, "step": 620 }, { "epoch": 1.3046218487394958, "grad_norm": 8.583774398203186, "learning_rate": 6.9683362202155465e-06, "loss": 1.2424434423446655, "step": 621 }, { "epoch": 1.3067226890756303, "grad_norm": 10.235846664061171, "learning_rate": 6.957093311894199e-06, "loss": 1.8912100791931152, "step": 622 }, { "epoch": 1.3088235294117647, "grad_norm": 12.7496233438477, "learning_rate": 6.945838705756293e-06, "loss": 1.4234580993652344, "step": 623 }, { "epoch": 1.3109243697478992, "grad_norm": 12.664108172155123, "learning_rate": 6.934572469072163e-06, "loss": 1.7631306648254395, "step": 624 }, { "epoch": 1.3130252100840336, "grad_norm": 9.043940926283064, "learning_rate": 6.923294669181659e-06, "loss": 1.275686264038086, "step": 625 }, { "epoch": 1.315126050420168, "grad_norm": 7.7562010562396155, "learning_rate": 6.912005373493747e-06, "loss": 1.8493428230285645, "step": 626 }, { "epoch": 1.3172268907563025, "grad_norm": 10.778946101337466, "learning_rate": 6.900704649486103e-06, "loss": 1.0401699542999268, "step": 627 }, { "epoch": 1.319327731092437, "grad_norm": 9.474741424665671, "learning_rate": 6.889392564704712e-06, "loss": 1.932092547416687, "step": 628 }, { "epoch": 1.3214285714285714, "grad_norm": 18.187204049633937, "learning_rate": 6.878069186763466e-06, "loss": 2.0269484519958496, "step": 629 }, { "epoch": 1.3235294117647058, "grad_norm": 12.371698082139902, "learning_rate": 6.866734583343753e-06, "loss": 1.6765419244766235, "step": 630 }, { "epoch": 1.3256302521008403, "grad_norm": 20.859173211033255, "learning_rate": 6.855388822194061e-06, "loss": 1.7931967973709106, "step": 631 }, { "epoch": 1.3277310924369747, "grad_norm": 10.82122547870125, "learning_rate": 6.844031971129571e-06, "loss": 0.9582860469818115, "step": 632 }, { "epoch": 1.3298319327731092, "grad_norm": 13.255068667352083, "learning_rate": 6.8326640980317475e-06, "loss": 1.7692348957061768, "step": 633 }, { "epoch": 1.3319327731092436, "grad_norm": 12.632729369596628, "learning_rate": 6.821285270847934e-06, "loss": 2.143463373184204, "step": 634 }, { "epoch": 1.334033613445378, "grad_norm": 31.76404047719635, "learning_rate": 6.80989555759095e-06, "loss": 2.290733814239502, "step": 635 }, { "epoch": 1.3361344537815127, "grad_norm": 18.9996916353526, "learning_rate": 6.79849502633868e-06, "loss": 1.4548063278198242, "step": 636 }, { "epoch": 1.3382352941176472, "grad_norm": 10.423656653462372, "learning_rate": 6.787083745233674e-06, "loss": 1.6137502193450928, "step": 637 }, { "epoch": 1.3403361344537816, "grad_norm": 9.260024233354208, "learning_rate": 6.775661782482732e-06, "loss": 1.277546763420105, "step": 638 }, { "epoch": 1.342436974789916, "grad_norm": 17.22623817552147, "learning_rate": 6.764229206356498e-06, "loss": 1.4183297157287598, "step": 639 }, { "epoch": 1.3445378151260505, "grad_norm": 13.655193467078059, "learning_rate": 6.752786085189059e-06, "loss": 2.352818012237549, "step": 640 }, { "epoch": 1.346638655462185, "grad_norm": 10.832229231352626, "learning_rate": 6.741332487377525e-06, "loss": 1.1966056823730469, "step": 641 }, { "epoch": 1.3487394957983194, "grad_norm": 16.450581846244585, "learning_rate": 6.729868481381632e-06, "loss": 2.1670610904693604, "step": 642 }, { "epoch": 1.3508403361344539, "grad_norm": 14.092922253172704, "learning_rate": 6.718394135723321e-06, "loss": 1.4478580951690674, "step": 643 }, { "epoch": 1.3529411764705883, "grad_norm": 9.381957347730207, "learning_rate": 6.706909518986341e-06, "loss": 1.1712067127227783, "step": 644 }, { "epoch": 1.3550420168067228, "grad_norm": 10.457393453015948, "learning_rate": 6.695414699815828e-06, "loss": 1.241437315940857, "step": 645 }, { "epoch": 1.3571428571428572, "grad_norm": 26.13803770478073, "learning_rate": 6.6839097469179e-06, "loss": 1.5295310020446777, "step": 646 }, { "epoch": 1.3592436974789917, "grad_norm": 7.790998799935961, "learning_rate": 6.6723947290592505e-06, "loss": 1.3555617332458496, "step": 647 }, { "epoch": 1.361344537815126, "grad_norm": 14.276342203489932, "learning_rate": 6.660869715066725e-06, "loss": 1.3158948421478271, "step": 648 }, { "epoch": 1.3634453781512605, "grad_norm": 7.962812237225353, "learning_rate": 6.649334773826924e-06, "loss": 1.7540979385375977, "step": 649 }, { "epoch": 1.365546218487395, "grad_norm": 12.139617079516373, "learning_rate": 6.63778997428578e-06, "loss": 1.7170000076293945, "step": 650 }, { "epoch": 1.3676470588235294, "grad_norm": 9.424987040512477, "learning_rate": 6.626235385448152e-06, "loss": 1.2551283836364746, "step": 651 }, { "epoch": 1.3697478991596639, "grad_norm": 11.731648378217931, "learning_rate": 6.61467107637741e-06, "loss": 1.468104362487793, "step": 652 }, { "epoch": 1.3718487394957983, "grad_norm": 22.089135904446437, "learning_rate": 6.603097116195026e-06, "loss": 1.3832511901855469, "step": 653 }, { "epoch": 1.3739495798319328, "grad_norm": 13.408397747285187, "learning_rate": 6.591513574080152e-06, "loss": 1.1895179748535156, "step": 654 }, { "epoch": 1.3760504201680672, "grad_norm": 10.506415017764112, "learning_rate": 6.579920519269218e-06, "loss": 1.57008957862854, "step": 655 }, { "epoch": 1.3781512605042017, "grad_norm": 7.3609644144158315, "learning_rate": 6.568318021055512e-06, "loss": 1.1686642169952393, "step": 656 }, { "epoch": 1.3802521008403361, "grad_norm": 19.968281534972263, "learning_rate": 6.556706148788765e-06, "loss": 1.831925392150879, "step": 657 }, { "epoch": 1.3823529411764706, "grad_norm": 14.858290457520233, "learning_rate": 6.545084971874738e-06, "loss": 1.1927814483642578, "step": 658 }, { "epoch": 1.384453781512605, "grad_norm": 13.471589913344788, "learning_rate": 6.5334545597748075e-06, "loss": 1.225053310394287, "step": 659 }, { "epoch": 1.3865546218487395, "grad_norm": 10.64391058893006, "learning_rate": 6.521814982005552e-06, "loss": 1.489911437034607, "step": 660 }, { "epoch": 1.388655462184874, "grad_norm": 14.084203318094486, "learning_rate": 6.510166308138328e-06, "loss": 1.3653918504714966, "step": 661 }, { "epoch": 1.3907563025210083, "grad_norm": 10.331380068295612, "learning_rate": 6.498508607798872e-06, "loss": 1.7082477807998657, "step": 662 }, { "epoch": 1.3928571428571428, "grad_norm": 10.828390377137284, "learning_rate": 6.48684195066686e-06, "loss": 1.1122634410858154, "step": 663 }, { "epoch": 1.3949579831932772, "grad_norm": 14.858926439296923, "learning_rate": 6.475166406475515e-06, "loss": 0.9572471380233765, "step": 664 }, { "epoch": 1.3970588235294117, "grad_norm": 17.02414371173566, "learning_rate": 6.4634820450111715e-06, "loss": 1.8282674551010132, "step": 665 }, { "epoch": 1.3991596638655461, "grad_norm": 12.803154660225488, "learning_rate": 6.451788936112868e-06, "loss": 1.2026221752166748, "step": 666 }, { "epoch": 1.4012605042016806, "grad_norm": 10.424391789653072, "learning_rate": 6.440087149671932e-06, "loss": 1.3183879852294922, "step": 667 }, { "epoch": 1.403361344537815, "grad_norm": 8.925077970843816, "learning_rate": 6.428376755631553e-06, "loss": 1.216771125793457, "step": 668 }, { "epoch": 1.4054621848739495, "grad_norm": 12.5613140309092, "learning_rate": 6.41665782398637e-06, "loss": 1.6759852170944214, "step": 669 }, { "epoch": 1.407563025210084, "grad_norm": 13.278399152081807, "learning_rate": 6.404930424782052e-06, "loss": 1.6593937873840332, "step": 670 }, { "epoch": 1.4096638655462184, "grad_norm": 11.405836988138063, "learning_rate": 6.393194628114885e-06, "loss": 1.672929286956787, "step": 671 }, { "epoch": 1.4117647058823528, "grad_norm": 13.1586681372233, "learning_rate": 6.381450504131339e-06, "loss": 1.2778139114379883, "step": 672 }, { "epoch": 1.4138655462184873, "grad_norm": 9.462564203496632, "learning_rate": 6.369698123027664e-06, "loss": 1.6472318172454834, "step": 673 }, { "epoch": 1.415966386554622, "grad_norm": 12.367075684146661, "learning_rate": 6.357937555049465e-06, "loss": 1.5301233530044556, "step": 674 }, { "epoch": 1.4180672268907564, "grad_norm": 9.502388277835697, "learning_rate": 6.3461688704912735e-06, "loss": 1.5423755645751953, "step": 675 }, { "epoch": 1.4201680672268908, "grad_norm": 13.340625777023925, "learning_rate": 6.334392139696144e-06, "loss": 0.8435590863227844, "step": 676 }, { "epoch": 1.4222689075630253, "grad_norm": 9.498703079540906, "learning_rate": 6.322607433055217e-06, "loss": 0.9243001937866211, "step": 677 }, { "epoch": 1.4243697478991597, "grad_norm": 16.692002135074148, "learning_rate": 6.310814821007312e-06, "loss": 1.1370623111724854, "step": 678 }, { "epoch": 1.4264705882352942, "grad_norm": 14.252581322539957, "learning_rate": 6.299014374038493e-06, "loss": 1.8121721744537354, "step": 679 }, { "epoch": 1.4285714285714286, "grad_norm": 12.146719665307664, "learning_rate": 6.287206162681663e-06, "loss": 1.5701857805252075, "step": 680 }, { "epoch": 1.430672268907563, "grad_norm": 11.383072024184132, "learning_rate": 6.275390257516125e-06, "loss": 1.7376922369003296, "step": 681 }, { "epoch": 1.4327731092436975, "grad_norm": 7.389859803918485, "learning_rate": 6.263566729167177e-06, "loss": 1.722080111503601, "step": 682 }, { "epoch": 1.434873949579832, "grad_norm": 11.051749495669629, "learning_rate": 6.251735648305676e-06, "loss": 1.8646998405456543, "step": 683 }, { "epoch": 1.4369747899159664, "grad_norm": 8.056416794494698, "learning_rate": 6.239897085647624e-06, "loss": 1.6373791694641113, "step": 684 }, { "epoch": 1.4390756302521008, "grad_norm": 14.269272004271027, "learning_rate": 6.228051111953742e-06, "loss": 1.5332825183868408, "step": 685 }, { "epoch": 1.4411764705882353, "grad_norm": 11.569261548147155, "learning_rate": 6.216197798029049e-06, "loss": 1.7713117599487305, "step": 686 }, { "epoch": 1.4432773109243697, "grad_norm": 8.013171505509781, "learning_rate": 6.204337214722435e-06, "loss": 1.3197343349456787, "step": 687 }, { "epoch": 1.4453781512605042, "grad_norm": 7.988425778687254, "learning_rate": 6.192469432926241e-06, "loss": 1.3940856456756592, "step": 688 }, { "epoch": 1.4474789915966386, "grad_norm": 14.591944041736712, "learning_rate": 6.180594523575838e-06, "loss": 2.0876762866973877, "step": 689 }, { "epoch": 1.449579831932773, "grad_norm": 13.421486753054541, "learning_rate": 6.1687125576491945e-06, "loss": 2.5141618251800537, "step": 690 }, { "epoch": 1.4516806722689075, "grad_norm": 11.284766831118931, "learning_rate": 6.156823606166461e-06, "loss": 0.9575009942054749, "step": 691 }, { "epoch": 1.453781512605042, "grad_norm": 10.180747973970707, "learning_rate": 6.144927740189537e-06, "loss": 1.2732771635055542, "step": 692 }, { "epoch": 1.4558823529411764, "grad_norm": 18.346962905469923, "learning_rate": 6.133025030821656e-06, "loss": 1.0447793006896973, "step": 693 }, { "epoch": 1.4579831932773109, "grad_norm": 9.72623535803224, "learning_rate": 6.12111554920695e-06, "loss": 2.069892406463623, "step": 694 }, { "epoch": 1.4600840336134453, "grad_norm": 12.298140767363686, "learning_rate": 6.1091993665300354e-06, "loss": 1.4193060398101807, "step": 695 }, { "epoch": 1.46218487394958, "grad_norm": 13.962463696814781, "learning_rate": 6.0972765540155764e-06, "loss": 1.8489269018173218, "step": 696 }, { "epoch": 1.4642857142857144, "grad_norm": 28.27872673921732, "learning_rate": 6.08534718292787e-06, "loss": 1.8245782852172852, "step": 697 }, { "epoch": 1.4663865546218489, "grad_norm": 11.704836274205533, "learning_rate": 6.07341132457041e-06, "loss": 1.8135966062545776, "step": 698 }, { "epoch": 1.4684873949579833, "grad_norm": 10.329932140617693, "learning_rate": 6.061469050285469e-06, "loss": 1.2886388301849365, "step": 699 }, { "epoch": 1.4705882352941178, "grad_norm": 7.515435335019253, "learning_rate": 6.049520431453666e-06, "loss": 1.8994669914245605, "step": 700 }, { "epoch": 1.4726890756302522, "grad_norm": 8.559344765158919, "learning_rate": 6.037565539493542e-06, "loss": 1.830640196800232, "step": 701 }, { "epoch": 1.4747899159663866, "grad_norm": 11.041841545301232, "learning_rate": 6.025604445861137e-06, "loss": 1.253919005393982, "step": 702 }, { "epoch": 1.476890756302521, "grad_norm": 8.205733723815058, "learning_rate": 6.013637222049554e-06, "loss": 1.4687739610671997, "step": 703 }, { "epoch": 1.4789915966386555, "grad_norm": 11.178455036225843, "learning_rate": 6.0016639395885424e-06, "loss": 0.8241528272628784, "step": 704 }, { "epoch": 1.48109243697479, "grad_norm": 9.38917529173767, "learning_rate": 5.98968467004406e-06, "loss": 0.9833969473838806, "step": 705 }, { "epoch": 1.4831932773109244, "grad_norm": 12.194704863353925, "learning_rate": 5.977699485017855e-06, "loss": 1.4603691101074219, "step": 706 }, { "epoch": 1.4852941176470589, "grad_norm": 15.962403885996371, "learning_rate": 5.965708456147028e-06, "loss": 1.2566254138946533, "step": 707 }, { "epoch": 1.4873949579831933, "grad_norm": 12.538339280369007, "learning_rate": 5.953711655103615e-06, "loss": 1.1779121160507202, "step": 708 }, { "epoch": 1.4894957983193278, "grad_norm": 13.11480387819097, "learning_rate": 5.941709153594146e-06, "loss": 2.1752524375915527, "step": 709 }, { "epoch": 1.4915966386554622, "grad_norm": 13.674164350381794, "learning_rate": 5.92970102335923e-06, "loss": 1.368391752243042, "step": 710 }, { "epoch": 1.4936974789915967, "grad_norm": 14.25334329729132, "learning_rate": 5.917687336173116e-06, "loss": 1.4870836734771729, "step": 711 }, { "epoch": 1.495798319327731, "grad_norm": 6.953756120767862, "learning_rate": 5.905668163843269e-06, "loss": 1.5822714567184448, "step": 712 }, { "epoch": 1.4978991596638656, "grad_norm": 11.644576591550592, "learning_rate": 5.893643578209939e-06, "loss": 1.5158865451812744, "step": 713 }, { "epoch": 1.5, "grad_norm": 7.891690515863711, "learning_rate": 5.881613651145732e-06, "loss": 1.0833930969238281, "step": 714 }, { "epoch": 1.5021008403361344, "grad_norm": 10.374951659973064, "learning_rate": 5.8695784545551815e-06, "loss": 1.2957074642181396, "step": 715 }, { "epoch": 1.504201680672269, "grad_norm": 11.404797448034625, "learning_rate": 5.8575380603743155e-06, "loss": 1.541155457496643, "step": 716 }, { "epoch": 1.5063025210084033, "grad_norm": 9.289021596431404, "learning_rate": 5.8454925405702326e-06, "loss": 1.7509238719940186, "step": 717 }, { "epoch": 1.5084033613445378, "grad_norm": 15.014764167830913, "learning_rate": 5.833441967140662e-06, "loss": 1.8062071800231934, "step": 718 }, { "epoch": 1.5105042016806722, "grad_norm": 11.714801378063543, "learning_rate": 5.821386412113546e-06, "loss": 1.4850780963897705, "step": 719 }, { "epoch": 1.5126050420168067, "grad_norm": 12.362413690497235, "learning_rate": 5.809325947546596e-06, "loss": 1.1842257976531982, "step": 720 }, { "epoch": 1.5147058823529411, "grad_norm": 18.21069319599996, "learning_rate": 5.797260645526873e-06, "loss": 1.396120548248291, "step": 721 }, { "epoch": 1.5168067226890756, "grad_norm": 9.57576869626496, "learning_rate": 5.785190578170351e-06, "loss": 1.2990989685058594, "step": 722 }, { "epoch": 1.51890756302521, "grad_norm": 9.618803676236782, "learning_rate": 5.773115817621487e-06, "loss": 1.8467388153076172, "step": 723 }, { "epoch": 1.5210084033613445, "grad_norm": 11.794758813101549, "learning_rate": 5.761036436052788e-06, "loss": 1.6907732486724854, "step": 724 }, { "epoch": 1.523109243697479, "grad_norm": 10.45395101852907, "learning_rate": 5.748952505664385e-06, "loss": 1.265946388244629, "step": 725 }, { "epoch": 1.5252100840336134, "grad_norm": 39.03800117968252, "learning_rate": 5.736864098683595e-06, "loss": 1.2473053932189941, "step": 726 }, { "epoch": 1.5273109243697478, "grad_norm": 12.736788173749753, "learning_rate": 5.724771287364492e-06, "loss": 0.8382349014282227, "step": 727 }, { "epoch": 1.5294117647058822, "grad_norm": 9.695123568985625, "learning_rate": 5.712674143987478e-06, "loss": 0.6312862038612366, "step": 728 }, { "epoch": 1.5315126050420167, "grad_norm": 19.807295601128907, "learning_rate": 5.700572740858847e-06, "loss": 2.154848575592041, "step": 729 }, { "epoch": 1.5336134453781511, "grad_norm": 8.348583198098744, "learning_rate": 5.688467150310353e-06, "loss": 2.034533739089966, "step": 730 }, { "epoch": 1.5357142857142856, "grad_norm": 17.36525324904992, "learning_rate": 5.67635744469878e-06, "loss": 1.1331748962402344, "step": 731 }, { "epoch": 1.53781512605042, "grad_norm": 16.447167153207392, "learning_rate": 5.664243696405509e-06, "loss": 2.139069080352783, "step": 732 }, { "epoch": 1.5399159663865545, "grad_norm": 8.422362967066016, "learning_rate": 5.652125977836083e-06, "loss": 1.5174198150634766, "step": 733 }, { "epoch": 1.542016806722689, "grad_norm": 11.885541322376927, "learning_rate": 5.640004361419776e-06, "loss": 1.4445990324020386, "step": 734 }, { "epoch": 1.5441176470588234, "grad_norm": 13.004468722411309, "learning_rate": 5.627878919609162e-06, "loss": 1.3474795818328857, "step": 735 }, { "epoch": 1.5462184873949578, "grad_norm": 8.116038341885554, "learning_rate": 5.615749724879677e-06, "loss": 1.4871881008148193, "step": 736 }, { "epoch": 1.5483193277310925, "grad_norm": 9.971232266174457, "learning_rate": 5.603616849729191e-06, "loss": 1.308741569519043, "step": 737 }, { "epoch": 1.550420168067227, "grad_norm": 9.853281920667216, "learning_rate": 5.591480366677571e-06, "loss": 1.712050199508667, "step": 738 }, { "epoch": 1.5525210084033614, "grad_norm": 10.993711611399497, "learning_rate": 5.579340348266251e-06, "loss": 1.5636662244796753, "step": 739 }, { "epoch": 1.5546218487394958, "grad_norm": 9.673522828347147, "learning_rate": 5.5671968670577935e-06, "loss": 2.132948160171509, "step": 740 }, { "epoch": 1.5567226890756303, "grad_norm": 8.375544989800046, "learning_rate": 5.55504999563546e-06, "loss": 1.3193635940551758, "step": 741 }, { "epoch": 1.5588235294117647, "grad_norm": 17.452697703036844, "learning_rate": 5.542899806602776e-06, "loss": 2.288175582885742, "step": 742 }, { "epoch": 1.5609243697478992, "grad_norm": 9.46737262414516, "learning_rate": 5.530746372583097e-06, "loss": 0.9925522804260254, "step": 743 }, { "epoch": 1.5630252100840336, "grad_norm": 11.56785515084437, "learning_rate": 5.518589766219173e-06, "loss": 1.0975109338760376, "step": 744 }, { "epoch": 1.565126050420168, "grad_norm": 7.691252772883318, "learning_rate": 5.506430060172714e-06, "loss": 1.5021933317184448, "step": 745 }, { "epoch": 1.5672268907563025, "grad_norm": 18.209072269714934, "learning_rate": 5.494267327123965e-06, "loss": 1.3946982622146606, "step": 746 }, { "epoch": 1.569327731092437, "grad_norm": 8.665816006385743, "learning_rate": 5.482101639771255e-06, "loss": 1.1381559371948242, "step": 747 }, { "epoch": 1.5714285714285714, "grad_norm": 8.44106615949262, "learning_rate": 5.469933070830574e-06, "loss": 1.6340922117233276, "step": 748 }, { "epoch": 1.5735294117647058, "grad_norm": 15.525808686682517, "learning_rate": 5.457761693035139e-06, "loss": 1.3076329231262207, "step": 749 }, { "epoch": 1.5756302521008403, "grad_norm": 17.23937998928761, "learning_rate": 5.44558757913495e-06, "loss": 1.4544854164123535, "step": 750 }, { "epoch": 1.5777310924369747, "grad_norm": 12.32276892072033, "learning_rate": 5.433410801896366e-06, "loss": 1.254534363746643, "step": 751 }, { "epoch": 1.5798319327731094, "grad_norm": 15.375685102500293, "learning_rate": 5.4212314341016645e-06, "loss": 1.6915278434753418, "step": 752 }, { "epoch": 1.5819327731092439, "grad_norm": 11.585921032972996, "learning_rate": 5.409049548548604e-06, "loss": 1.9941121339797974, "step": 753 }, { "epoch": 1.5840336134453783, "grad_norm": 20.541716513837855, "learning_rate": 5.396865218049995e-06, "loss": 1.8611130714416504, "step": 754 }, { "epoch": 1.5861344537815127, "grad_norm": 8.615776619323707, "learning_rate": 5.38467851543326e-06, "loss": 1.5740795135498047, "step": 755 }, { "epoch": 1.5882352941176472, "grad_norm": 11.309502746225869, "learning_rate": 5.3724895135400015e-06, "loss": 2.713351249694824, "step": 756 }, { "epoch": 1.5903361344537816, "grad_norm": 10.253542895381939, "learning_rate": 5.360298285225564e-06, "loss": 1.3163414001464844, "step": 757 }, { "epoch": 1.592436974789916, "grad_norm": 9.938347580214066, "learning_rate": 5.3481049033586e-06, "loss": 1.2127149105072021, "step": 758 }, { "epoch": 1.5945378151260505, "grad_norm": 8.342953472089572, "learning_rate": 5.335909440820635e-06, "loss": 1.7897974252700806, "step": 759 }, { "epoch": 1.596638655462185, "grad_norm": 15.069858504573101, "learning_rate": 5.323711970505627e-06, "loss": 1.1387288570404053, "step": 760 }, { "epoch": 1.5987394957983194, "grad_norm": 11.085133444710992, "learning_rate": 5.311512565319542e-06, "loss": 1.3364837169647217, "step": 761 }, { "epoch": 1.6008403361344539, "grad_norm": 32.930598343392575, "learning_rate": 5.299311298179904e-06, "loss": 1.5517654418945312, "step": 762 }, { "epoch": 1.6029411764705883, "grad_norm": 8.444149682468657, "learning_rate": 5.287108242015371e-06, "loss": 1.3167724609375, "step": 763 }, { "epoch": 1.6050420168067228, "grad_norm": 11.755149351980176, "learning_rate": 5.27490346976529e-06, "loss": 0.9891781210899353, "step": 764 }, { "epoch": 1.6071428571428572, "grad_norm": 7.559294905407309, "learning_rate": 5.2626970543792685e-06, "loss": 1.4272327423095703, "step": 765 }, { "epoch": 1.6092436974789917, "grad_norm": 15.95519407006552, "learning_rate": 5.250489068816734e-06, "loss": 1.5374692678451538, "step": 766 }, { "epoch": 1.611344537815126, "grad_norm": 11.13469591726469, "learning_rate": 5.238279586046499e-06, "loss": 2.130378484725952, "step": 767 }, { "epoch": 1.6134453781512605, "grad_norm": 7.655252597687492, "learning_rate": 5.226068679046327e-06, "loss": 1.0989816188812256, "step": 768 }, { "epoch": 1.615546218487395, "grad_norm": 13.788475789937872, "learning_rate": 5.21385642080249e-06, "loss": 1.4945666790008545, "step": 769 }, { "epoch": 1.6176470588235294, "grad_norm": 10.474320077857882, "learning_rate": 5.201642884309341e-06, "loss": 2.0057296752929688, "step": 770 }, { "epoch": 1.6197478991596639, "grad_norm": 15.324254986466453, "learning_rate": 5.189428142568872e-06, "loss": 1.1791839599609375, "step": 771 }, { "epoch": 1.6218487394957983, "grad_norm": 9.947837053439045, "learning_rate": 5.177212268590277e-06, "loss": 1.836449384689331, "step": 772 }, { "epoch": 1.6239495798319328, "grad_norm": 13.279110268267903, "learning_rate": 5.16499533538952e-06, "loss": 1.711057424545288, "step": 773 }, { "epoch": 1.6260504201680672, "grad_norm": 8.331904700424623, "learning_rate": 5.152777415988894e-06, "loss": 1.5274529457092285, "step": 774 }, { "epoch": 1.6281512605042017, "grad_norm": 8.55624828912363, "learning_rate": 5.140558583416591e-06, "loss": 1.687756061553955, "step": 775 }, { "epoch": 1.6302521008403361, "grad_norm": 13.622302699665928, "learning_rate": 5.128338910706254e-06, "loss": 1.857285976409912, "step": 776 }, { "epoch": 1.6323529411764706, "grad_norm": 10.381639920961357, "learning_rate": 5.1161184708965525e-06, "loss": 2.2893779277801514, "step": 777 }, { "epoch": 1.634453781512605, "grad_norm": 13.136015822018155, "learning_rate": 5.103897337030742e-06, "loss": 1.299177646636963, "step": 778 }, { "epoch": 1.6365546218487395, "grad_norm": 12.717300364122215, "learning_rate": 5.091675582156224e-06, "loss": 1.156067132949829, "step": 779 }, { "epoch": 1.638655462184874, "grad_norm": 12.064277910399447, "learning_rate": 5.07945327932411e-06, "loss": 1.6888867616653442, "step": 780 }, { "epoch": 1.6407563025210083, "grad_norm": 18.58505757321674, "learning_rate": 5.067230501588792e-06, "loss": 2.480485677719116, "step": 781 }, { "epoch": 1.6428571428571428, "grad_norm": 10.797297449293279, "learning_rate": 5.055007322007497e-06, "loss": 1.1827846765518188, "step": 782 }, { "epoch": 1.6449579831932772, "grad_norm": 8.451797960661173, "learning_rate": 5.0427838136398545e-06, "loss": 0.9974920749664307, "step": 783 }, { "epoch": 1.6470588235294117, "grad_norm": 9.35430238275204, "learning_rate": 5.0305600495474586e-06, "loss": 1.3341560363769531, "step": 784 }, { "epoch": 1.6491596638655461, "grad_norm": 12.853381185509344, "learning_rate": 5.018336102793433e-06, "loss": 1.7801398038864136, "step": 785 }, { "epoch": 1.6512605042016806, "grad_norm": 9.421435792652487, "learning_rate": 5.006112046441993e-06, "loss": 1.5409959554672241, "step": 786 }, { "epoch": 1.653361344537815, "grad_norm": 17.569411033601973, "learning_rate": 4.993887953558008e-06, "loss": 1.7089118957519531, "step": 787 }, { "epoch": 1.6554621848739495, "grad_norm": 7.577368880962854, "learning_rate": 4.981663897206568e-06, "loss": 1.5989807844161987, "step": 788 }, { "epoch": 1.657563025210084, "grad_norm": 8.077886545894733, "learning_rate": 4.969439950452543e-06, "loss": 1.4693567752838135, "step": 789 }, { "epoch": 1.6596638655462184, "grad_norm": 9.785283960809164, "learning_rate": 4.957216186360147e-06, "loss": 1.7319889068603516, "step": 790 }, { "epoch": 1.6617647058823528, "grad_norm": 9.333867287657988, "learning_rate": 4.944992677992505e-06, "loss": 1.38368558883667, "step": 791 }, { "epoch": 1.6638655462184873, "grad_norm": 12.124257442308217, "learning_rate": 4.932769498411209e-06, "loss": 1.9276368618011475, "step": 792 }, { "epoch": 1.6659663865546217, "grad_norm": 11.917503250417354, "learning_rate": 4.9205467206758914e-06, "loss": 1.5189965963363647, "step": 793 }, { "epoch": 1.6680672268907561, "grad_norm": 10.697092148525952, "learning_rate": 4.908324417843779e-06, "loss": 1.8080897331237793, "step": 794 }, { "epoch": 1.6701680672268906, "grad_norm": 13.667659355410398, "learning_rate": 4.896102662969259e-06, "loss": 1.3283686637878418, "step": 795 }, { "epoch": 1.6722689075630253, "grad_norm": 10.250252779637346, "learning_rate": 4.883881529103448e-06, "loss": 1.7254778146743774, "step": 796 }, { "epoch": 1.6743697478991597, "grad_norm": 23.09605024251299, "learning_rate": 4.8716610892937486e-06, "loss": 1.882294774055481, "step": 797 }, { "epoch": 1.6764705882352942, "grad_norm": 9.75260822435192, "learning_rate": 4.859441416583412e-06, "loss": 1.955444574356079, "step": 798 }, { "epoch": 1.6785714285714286, "grad_norm": 9.214097989478232, "learning_rate": 4.847222584011107e-06, "loss": 1.3442355394363403, "step": 799 }, { "epoch": 1.680672268907563, "grad_norm": 7.0621430973608295, "learning_rate": 4.8350046646104815e-06, "loss": 1.3532618284225464, "step": 800 }, { "epoch": 1.6827731092436975, "grad_norm": 9.43032823639271, "learning_rate": 4.8227877314097245e-06, "loss": 1.8527226448059082, "step": 801 }, { "epoch": 1.684873949579832, "grad_norm": 42.238546118436666, "learning_rate": 4.81057185743113e-06, "loss": 2.1312098503112793, "step": 802 }, { "epoch": 1.6869747899159664, "grad_norm": 19.238236915867475, "learning_rate": 4.798357115690661e-06, "loss": 1.3131635189056396, "step": 803 }, { "epoch": 1.6890756302521008, "grad_norm": 11.839412971623531, "learning_rate": 4.7861435791975124e-06, "loss": 1.057523488998413, "step": 804 }, { "epoch": 1.6911764705882353, "grad_norm": 16.330203992434377, "learning_rate": 4.7739313209536755e-06, "loss": 1.510682225227356, "step": 805 }, { "epoch": 1.6932773109243697, "grad_norm": 9.476255130895225, "learning_rate": 4.761720413953503e-06, "loss": 2.0877933502197266, "step": 806 }, { "epoch": 1.6953781512605042, "grad_norm": 18.00296013944893, "learning_rate": 4.7495109311832665e-06, "loss": 0.9936963319778442, "step": 807 }, { "epoch": 1.6974789915966386, "grad_norm": 12.213588808955969, "learning_rate": 4.737302945620732e-06, "loss": 1.4148988723754883, "step": 808 }, { "epoch": 1.699579831932773, "grad_norm": 16.652878791868638, "learning_rate": 4.72509653023471e-06, "loss": 1.5457355976104736, "step": 809 }, { "epoch": 1.7016806722689075, "grad_norm": 12.137844142300786, "learning_rate": 4.712891757984629e-06, "loss": 1.1069682836532593, "step": 810 }, { "epoch": 1.7037815126050422, "grad_norm": 10.383790506526612, "learning_rate": 4.700688701820096e-06, "loss": 2.270923376083374, "step": 811 }, { "epoch": 1.7058823529411766, "grad_norm": 9.870143573450948, "learning_rate": 4.688487434680459e-06, "loss": 2.1212430000305176, "step": 812 }, { "epoch": 1.707983193277311, "grad_norm": 7.378583815892385, "learning_rate": 4.6762880294943734e-06, "loss": 1.340724229812622, "step": 813 }, { "epoch": 1.7100840336134455, "grad_norm": 10.60633944294548, "learning_rate": 4.664090559179367e-06, "loss": 1.1250860691070557, "step": 814 }, { "epoch": 1.71218487394958, "grad_norm": 10.395256226604776, "learning_rate": 4.651895096641402e-06, "loss": 1.3906278610229492, "step": 815 }, { "epoch": 1.7142857142857144, "grad_norm": 14.563103162972197, "learning_rate": 4.639701714774439e-06, "loss": 1.5373984575271606, "step": 816 }, { "epoch": 1.7163865546218489, "grad_norm": 11.845585553697056, "learning_rate": 4.627510486459999e-06, "loss": 1.1511554718017578, "step": 817 }, { "epoch": 1.7184873949579833, "grad_norm": 12.016172591706953, "learning_rate": 4.615321484566741e-06, "loss": 1.0511482954025269, "step": 818 }, { "epoch": 1.7205882352941178, "grad_norm": 24.328922920047308, "learning_rate": 4.603134781950007e-06, "loss": 1.6539651155471802, "step": 819 }, { "epoch": 1.7226890756302522, "grad_norm": 10.354684370723726, "learning_rate": 4.590950451451397e-06, "loss": 1.7340842485427856, "step": 820 }, { "epoch": 1.7247899159663866, "grad_norm": 9.128876197606015, "learning_rate": 4.578768565898337e-06, "loss": 1.9771497249603271, "step": 821 }, { "epoch": 1.726890756302521, "grad_norm": 13.12308018791223, "learning_rate": 4.566589198103635e-06, "loss": 1.6702903509140015, "step": 822 }, { "epoch": 1.7289915966386555, "grad_norm": 14.032448855066312, "learning_rate": 4.554412420865052e-06, "loss": 1.2594914436340332, "step": 823 }, { "epoch": 1.73109243697479, "grad_norm": 11.631886990165667, "learning_rate": 4.542238306964863e-06, "loss": 1.2319787740707397, "step": 824 }, { "epoch": 1.7331932773109244, "grad_norm": 12.012430999144566, "learning_rate": 4.530066929169427e-06, "loss": 1.631975769996643, "step": 825 }, { "epoch": 1.7352941176470589, "grad_norm": 14.157695450219515, "learning_rate": 4.5178983602287476e-06, "loss": 1.4831879138946533, "step": 826 }, { "epoch": 1.7373949579831933, "grad_norm": 12.493623072525319, "learning_rate": 4.505732672876037e-06, "loss": 1.225109338760376, "step": 827 }, { "epoch": 1.7394957983193278, "grad_norm": 30.459517703838213, "learning_rate": 4.493569939827288e-06, "loss": 1.6191500425338745, "step": 828 }, { "epoch": 1.7415966386554622, "grad_norm": 10.531191971687601, "learning_rate": 4.48141023378083e-06, "loss": 1.562519907951355, "step": 829 }, { "epoch": 1.7436974789915967, "grad_norm": 20.955204277046732, "learning_rate": 4.4692536274169055e-06, "loss": 1.5889461040496826, "step": 830 }, { "epoch": 1.745798319327731, "grad_norm": 16.275459231962, "learning_rate": 4.457100193397226e-06, "loss": 0.8582566976547241, "step": 831 }, { "epoch": 1.7478991596638656, "grad_norm": 8.59181477774897, "learning_rate": 4.444950004364542e-06, "loss": 1.2409437894821167, "step": 832 }, { "epoch": 1.75, "grad_norm": 11.280859647806443, "learning_rate": 4.432803132942208e-06, "loss": 1.2449380159378052, "step": 833 }, { "epoch": 1.7521008403361344, "grad_norm": 13.088404793322951, "learning_rate": 4.420659651733751e-06, "loss": 1.2676522731781006, "step": 834 }, { "epoch": 1.754201680672269, "grad_norm": 10.60776961423833, "learning_rate": 4.40851963332243e-06, "loss": 1.5941420793533325, "step": 835 }, { "epoch": 1.7563025210084033, "grad_norm": 17.131655688023486, "learning_rate": 4.396383150270811e-06, "loss": 1.1451562643051147, "step": 836 }, { "epoch": 1.7584033613445378, "grad_norm": 16.683478300158875, "learning_rate": 4.384250275120325e-06, "loss": 1.4305951595306396, "step": 837 }, { "epoch": 1.7605042016806722, "grad_norm": 23.521629142849456, "learning_rate": 4.372121080390841e-06, "loss": 1.9824583530426025, "step": 838 }, { "epoch": 1.7626050420168067, "grad_norm": 12.354999677804352, "learning_rate": 4.359995638580226e-06, "loss": 1.2548645734786987, "step": 839 }, { "epoch": 1.7647058823529411, "grad_norm": 11.031648245105625, "learning_rate": 4.34787402216392e-06, "loss": 1.8208611011505127, "step": 840 }, { "epoch": 1.7668067226890756, "grad_norm": 12.87350659067892, "learning_rate": 4.335756303594493e-06, "loss": 1.3555166721343994, "step": 841 }, { "epoch": 1.76890756302521, "grad_norm": 11.399790863860508, "learning_rate": 4.323642555301222e-06, "loss": 0.9843342900276184, "step": 842 }, { "epoch": 1.7710084033613445, "grad_norm": 9.734229808167726, "learning_rate": 4.311532849689649e-06, "loss": 1.5203514099121094, "step": 843 }, { "epoch": 1.773109243697479, "grad_norm": 12.62619531995832, "learning_rate": 4.299427259141155e-06, "loss": 2.226682662963867, "step": 844 }, { "epoch": 1.7752100840336134, "grad_norm": 9.352423304681453, "learning_rate": 4.2873258560125244e-06, "loss": 1.4532074928283691, "step": 845 }, { "epoch": 1.7773109243697478, "grad_norm": 13.614475982132188, "learning_rate": 4.275228712635511e-06, "loss": 0.939800500869751, "step": 846 }, { "epoch": 1.7794117647058822, "grad_norm": 7.898241318034454, "learning_rate": 4.263135901316406e-06, "loss": 0.9362924098968506, "step": 847 }, { "epoch": 1.7815126050420167, "grad_norm": 12.878302238594825, "learning_rate": 4.251047494335616e-06, "loss": 1.4257563352584839, "step": 848 }, { "epoch": 1.7836134453781511, "grad_norm": 11.607398907584903, "learning_rate": 4.238963563947212e-06, "loss": 1.393942952156067, "step": 849 }, { "epoch": 1.7857142857142856, "grad_norm": 9.414377919796664, "learning_rate": 4.226884182378513e-06, "loss": 1.588603138923645, "step": 850 }, { "epoch": 1.78781512605042, "grad_norm": 9.24807113557597, "learning_rate": 4.2148094218296485e-06, "loss": 1.198427677154541, "step": 851 }, { "epoch": 1.7899159663865545, "grad_norm": 9.93269092312877, "learning_rate": 4.202739354473127e-06, "loss": 1.0912418365478516, "step": 852 }, { "epoch": 1.792016806722689, "grad_norm": 9.934910320433355, "learning_rate": 4.190674052453405e-06, "loss": 1.8104877471923828, "step": 853 }, { "epoch": 1.7941176470588234, "grad_norm": 9.124793784485341, "learning_rate": 4.178613587886455e-06, "loss": 1.337807297706604, "step": 854 }, { "epoch": 1.7962184873949578, "grad_norm": 12.887138564407095, "learning_rate": 4.166558032859339e-06, "loss": 1.0441133975982666, "step": 855 }, { "epoch": 1.7983193277310925, "grad_norm": 13.183784075535518, "learning_rate": 4.154507459429769e-06, "loss": 1.8002381324768066, "step": 856 }, { "epoch": 1.800420168067227, "grad_norm": 17.636030537350624, "learning_rate": 4.142461939625685e-06, "loss": 1.7534747123718262, "step": 857 }, { "epoch": 1.8025210084033614, "grad_norm": 13.439089280649503, "learning_rate": 4.13042154544482e-06, "loss": 2.5967888832092285, "step": 858 }, { "epoch": 1.8046218487394958, "grad_norm": 11.531721271683752, "learning_rate": 4.1183863488542686e-06, "loss": 0.9714012145996094, "step": 859 }, { "epoch": 1.8067226890756303, "grad_norm": 39.93239963926744, "learning_rate": 4.106356421790062e-06, "loss": 2.0358502864837646, "step": 860 }, { "epoch": 1.8088235294117647, "grad_norm": 13.143824988546921, "learning_rate": 4.094331836156732e-06, "loss": 1.2078362703323364, "step": 861 }, { "epoch": 1.8109243697478992, "grad_norm": 17.216726387192157, "learning_rate": 4.082312663826886e-06, "loss": 1.3551952838897705, "step": 862 }, { "epoch": 1.8130252100840336, "grad_norm": 9.960021372460178, "learning_rate": 4.070298976640772e-06, "loss": 1.7473708391189575, "step": 863 }, { "epoch": 1.815126050420168, "grad_norm": 8.856051876807816, "learning_rate": 4.058290846405856e-06, "loss": 1.1888244152069092, "step": 864 }, { "epoch": 1.8172268907563025, "grad_norm": 16.353016649836196, "learning_rate": 4.046288344896388e-06, "loss": 1.7867594957351685, "step": 865 }, { "epoch": 1.819327731092437, "grad_norm": 12.062167246659023, "learning_rate": 4.034291543852973e-06, "loss": 1.2903845310211182, "step": 866 }, { "epoch": 1.8214285714285714, "grad_norm": 12.560697459985716, "learning_rate": 4.022300514982146e-06, "loss": 1.4051203727722168, "step": 867 }, { "epoch": 1.8235294117647058, "grad_norm": 7.497031379547495, "learning_rate": 4.010315329955941e-06, "loss": 1.7378381490707397, "step": 868 }, { "epoch": 1.8256302521008403, "grad_norm": 13.858534174862163, "learning_rate": 3.998336060411459e-06, "loss": 1.1623207330703735, "step": 869 }, { "epoch": 1.8277310924369747, "grad_norm": 16.260048868681064, "learning_rate": 3.986362777950448e-06, "loss": 1.9922326803207397, "step": 870 }, { "epoch": 1.8298319327731094, "grad_norm": 10.526911105706054, "learning_rate": 3.9743955541388645e-06, "loss": 1.8183355331420898, "step": 871 }, { "epoch": 1.8319327731092439, "grad_norm": 30.127353686918507, "learning_rate": 3.962434460506459e-06, "loss": 1.20865797996521, "step": 872 }, { "epoch": 1.8340336134453783, "grad_norm": 9.693811930511602, "learning_rate": 3.950479568546336e-06, "loss": 1.2787063121795654, "step": 873 }, { "epoch": 1.8361344537815127, "grad_norm": 18.05960154864148, "learning_rate": 3.938530949714533e-06, "loss": 2.1469886302948, "step": 874 }, { "epoch": 1.8382352941176472, "grad_norm": 18.30460348258425, "learning_rate": 3.926588675429591e-06, "loss": 2.5014071464538574, "step": 875 }, { "epoch": 1.8403361344537816, "grad_norm": 9.407437407393076, "learning_rate": 3.914652817072132e-06, "loss": 1.2857444286346436, "step": 876 }, { "epoch": 1.842436974789916, "grad_norm": 16.37890925961833, "learning_rate": 3.902723445984425e-06, "loss": 0.8846265077590942, "step": 877 }, { "epoch": 1.8445378151260505, "grad_norm": 23.642202819656244, "learning_rate": 3.890800633469968e-06, "loss": 3.6164169311523438, "step": 878 }, { "epoch": 1.846638655462185, "grad_norm": 13.713411145435602, "learning_rate": 3.878884450793053e-06, "loss": 1.778512954711914, "step": 879 }, { "epoch": 1.8487394957983194, "grad_norm": 14.191930149580415, "learning_rate": 3.866974969178348e-06, "loss": 1.2984943389892578, "step": 880 }, { "epoch": 1.8508403361344539, "grad_norm": 11.358190736464952, "learning_rate": 3.855072259810465e-06, "loss": 1.233088493347168, "step": 881 }, { "epoch": 1.8529411764705883, "grad_norm": 12.19590848916144, "learning_rate": 3.8431763938335415e-06, "loss": 1.0973716974258423, "step": 882 }, { "epoch": 1.8550420168067228, "grad_norm": 8.299847504153597, "learning_rate": 3.831287442350806e-06, "loss": 1.9479036331176758, "step": 883 }, { "epoch": 1.8571428571428572, "grad_norm": 25.32246065502413, "learning_rate": 3.819405476424164e-06, "loss": 2.3243212699890137, "step": 884 }, { "epoch": 1.8592436974789917, "grad_norm": 16.82297657925073, "learning_rate": 3.8075305670737605e-06, "loss": 2.167454242706299, "step": 885 }, { "epoch": 1.861344537815126, "grad_norm": 21.753642919267325, "learning_rate": 3.795662785277568e-06, "loss": 1.3896931409835815, "step": 886 }, { "epoch": 1.8634453781512605, "grad_norm": 11.74483761622084, "learning_rate": 3.783802201970953e-06, "loss": 1.7062684297561646, "step": 887 }, { "epoch": 1.865546218487395, "grad_norm": 10.437798829559927, "learning_rate": 3.7719488880462596e-06, "loss": 1.996096134185791, "step": 888 }, { "epoch": 1.8676470588235294, "grad_norm": 12.757708819402238, "learning_rate": 3.7601029143523767e-06, "loss": 0.9396399259567261, "step": 889 }, { "epoch": 1.8697478991596639, "grad_norm": 9.851656503119592, "learning_rate": 3.748264351694324e-06, "loss": 1.384545922279358, "step": 890 }, { "epoch": 1.8718487394957983, "grad_norm": 9.806551408884758, "learning_rate": 3.7364332708328232e-06, "loss": 1.298504114151001, "step": 891 }, { "epoch": 1.8739495798319328, "grad_norm": 10.623737292924032, "learning_rate": 3.7246097424838746e-06, "loss": 1.395151138305664, "step": 892 }, { "epoch": 1.8760504201680672, "grad_norm": 10.061693679195699, "learning_rate": 3.712793837318338e-06, "loss": 0.9280238747596741, "step": 893 }, { "epoch": 1.8781512605042017, "grad_norm": 15.132177107654861, "learning_rate": 3.7009856259615074e-06, "loss": 1.3795464038848877, "step": 894 }, { "epoch": 1.8802521008403361, "grad_norm": 6.163863926370169, "learning_rate": 3.689185178992689e-06, "loss": 1.3113572597503662, "step": 895 }, { "epoch": 1.8823529411764706, "grad_norm": 9.130793358710777, "learning_rate": 3.677392566944783e-06, "loss": 1.580859661102295, "step": 896 }, { "epoch": 1.884453781512605, "grad_norm": 9.259911678328434, "learning_rate": 3.665607860303857e-06, "loss": 1.5074641704559326, "step": 897 }, { "epoch": 1.8865546218487395, "grad_norm": 11.62240320198324, "learning_rate": 3.653831129508727e-06, "loss": 1.438436508178711, "step": 898 }, { "epoch": 1.888655462184874, "grad_norm": 21.40632893627489, "learning_rate": 3.642062444950537e-06, "loss": 2.4116339683532715, "step": 899 }, { "epoch": 1.8907563025210083, "grad_norm": 11.75786970161833, "learning_rate": 3.630301876972337e-06, "loss": 1.5296099185943604, "step": 900 }, { "epoch": 1.8928571428571428, "grad_norm": 13.176259265738059, "learning_rate": 3.618549495868662e-06, "loss": 1.7645788192749023, "step": 901 }, { "epoch": 1.8949579831932772, "grad_norm": 8.946035833702966, "learning_rate": 3.606805371885117e-06, "loss": 1.528565526008606, "step": 902 }, { "epoch": 1.8970588235294117, "grad_norm": 9.910748332846175, "learning_rate": 3.5950695752179487e-06, "loss": 1.5352060794830322, "step": 903 }, { "epoch": 1.8991596638655461, "grad_norm": 8.872120526454859, "learning_rate": 3.5833421760136323e-06, "loss": 1.2181806564331055, "step": 904 }, { "epoch": 1.9012605042016806, "grad_norm": 8.52566041071061, "learning_rate": 3.5716232443684486e-06, "loss": 1.2715753316879272, "step": 905 }, { "epoch": 1.903361344537815, "grad_norm": 21.774545687178733, "learning_rate": 3.559912850328069e-06, "loss": 1.4792617559432983, "step": 906 }, { "epoch": 1.9054621848739495, "grad_norm": 19.592792336690916, "learning_rate": 3.5482110638871325e-06, "loss": 1.072256326675415, "step": 907 }, { "epoch": 1.907563025210084, "grad_norm": 13.065810555435853, "learning_rate": 3.5365179549888306e-06, "loss": 1.5988600254058838, "step": 908 }, { "epoch": 1.9096638655462184, "grad_norm": 21.891373834171244, "learning_rate": 3.524833593524487e-06, "loss": 2.473078489303589, "step": 909 }, { "epoch": 1.9117647058823528, "grad_norm": 10.756654205561983, "learning_rate": 3.513158049333141e-06, "loss": 1.8987966775894165, "step": 910 }, { "epoch": 1.9138655462184873, "grad_norm": 11.56568429501611, "learning_rate": 3.50149139220113e-06, "loss": 1.6164718866348267, "step": 911 }, { "epoch": 1.9159663865546217, "grad_norm": 11.650144499924947, "learning_rate": 3.4898336918616726e-06, "loss": 1.3376764059066772, "step": 912 }, { "epoch": 1.9180672268907561, "grad_norm": 10.167435576576402, "learning_rate": 3.47818501799445e-06, "loss": 1.7546143531799316, "step": 913 }, { "epoch": 1.9201680672268906, "grad_norm": 16.044159587879186, "learning_rate": 3.4665454402251937e-06, "loss": 1.4916424751281738, "step": 914 }, { "epoch": 1.9222689075630253, "grad_norm": 9.485222637010883, "learning_rate": 3.4549150281252635e-06, "loss": 1.6852712631225586, "step": 915 }, { "epoch": 1.9243697478991597, "grad_norm": 17.238783509738973, "learning_rate": 3.443293851211237e-06, "loss": 1.7150108814239502, "step": 916 }, { "epoch": 1.9264705882352942, "grad_norm": 13.404050851317407, "learning_rate": 3.4316819789444893e-06, "loss": 1.7211201190948486, "step": 917 }, { "epoch": 1.9285714285714286, "grad_norm": 12.556393984422412, "learning_rate": 3.4200794807307834e-06, "loss": 0.7268713712692261, "step": 918 }, { "epoch": 1.930672268907563, "grad_norm": 18.049874392140797, "learning_rate": 3.40848642591985e-06, "loss": 1.4612197875976562, "step": 919 }, { "epoch": 1.9327731092436975, "grad_norm": 15.899805779516837, "learning_rate": 3.3969028838049765e-06, "loss": 2.3325533866882324, "step": 920 }, { "epoch": 1.934873949579832, "grad_norm": 12.244325200092801, "learning_rate": 3.3853289236225917e-06, "loss": 1.3644397258758545, "step": 921 }, { "epoch": 1.9369747899159664, "grad_norm": 12.06507650622523, "learning_rate": 3.37376461455185e-06, "loss": 1.405503273010254, "step": 922 }, { "epoch": 1.9390756302521008, "grad_norm": 16.537370916012062, "learning_rate": 3.362210025714222e-06, "loss": 1.1844987869262695, "step": 923 }, { "epoch": 1.9411764705882353, "grad_norm": 10.411642432611085, "learning_rate": 3.350665226173078e-06, "loss": 2.294912815093994, "step": 924 }, { "epoch": 1.9432773109243697, "grad_norm": 7.385187019711569, "learning_rate": 3.339130284933276e-06, "loss": 1.3060452938079834, "step": 925 }, { "epoch": 1.9453781512605042, "grad_norm": 11.928015027901997, "learning_rate": 3.327605270940751e-06, "loss": 1.5017865896224976, "step": 926 }, { "epoch": 1.9474789915966386, "grad_norm": 23.754968850349012, "learning_rate": 3.316090253082101e-06, "loss": 1.9816479682922363, "step": 927 }, { "epoch": 1.949579831932773, "grad_norm": 15.3357371335637, "learning_rate": 3.304585300184173e-06, "loss": 1.4715440273284912, "step": 928 }, { "epoch": 1.9516806722689075, "grad_norm": 12.706896195884193, "learning_rate": 3.293090481013661e-06, "loss": 1.321998953819275, "step": 929 }, { "epoch": 1.9537815126050422, "grad_norm": 8.254920710769508, "learning_rate": 3.28160586427668e-06, "loss": 1.1600078344345093, "step": 930 }, { "epoch": 1.9558823529411766, "grad_norm": 12.971411985932859, "learning_rate": 3.2701315186183692e-06, "loss": 1.5981496572494507, "step": 931 }, { "epoch": 1.957983193277311, "grad_norm": 9.707338087138305, "learning_rate": 3.258667512622475e-06, "loss": 1.9018357992172241, "step": 932 }, { "epoch": 1.9600840336134455, "grad_norm": 19.34885988489593, "learning_rate": 3.2472139148109416e-06, "loss": 1.175397515296936, "step": 933 }, { "epoch": 1.96218487394958, "grad_norm": 14.470424387669999, "learning_rate": 3.2357707936435013e-06, "loss": 0.5444597005844116, "step": 934 }, { "epoch": 1.9642857142857144, "grad_norm": 5.567855687640852, "learning_rate": 3.224338217517269e-06, "loss": 1.3773345947265625, "step": 935 }, { "epoch": 1.9663865546218489, "grad_norm": 14.622402074305704, "learning_rate": 3.212916254766326e-06, "loss": 2.028517007827759, "step": 936 }, { "epoch": 1.9684873949579833, "grad_norm": 9.79269410710096, "learning_rate": 3.20150497366132e-06, "loss": 0.9947667121887207, "step": 937 }, { "epoch": 1.9705882352941178, "grad_norm": 27.334085978635635, "learning_rate": 3.190104442409052e-06, "loss": 1.7532271146774292, "step": 938 }, { "epoch": 1.9726890756302522, "grad_norm": 12.841320289359894, "learning_rate": 3.1787147291520675e-06, "loss": 2.114809036254883, "step": 939 }, { "epoch": 1.9747899159663866, "grad_norm": 11.483734843753465, "learning_rate": 3.1673359019682538e-06, "loss": 2.2796754837036133, "step": 940 }, { "epoch": 1.976890756302521, "grad_norm": 11.440585157823008, "learning_rate": 3.1559680288704297e-06, "loss": 1.172208547592163, "step": 941 }, { "epoch": 1.9789915966386555, "grad_norm": 17.223673892561905, "learning_rate": 3.1446111778059405e-06, "loss": 1.1454124450683594, "step": 942 }, { "epoch": 1.98109243697479, "grad_norm": 9.23862037561014, "learning_rate": 3.1332654166562494e-06, "loss": 1.6078896522521973, "step": 943 }, { "epoch": 1.9831932773109244, "grad_norm": 8.824526781008897, "learning_rate": 3.1219308132365365e-06, "loss": 1.2369673252105713, "step": 944 }, { "epoch": 1.9852941176470589, "grad_norm": 10.697868664174909, "learning_rate": 3.110607435295289e-06, "loss": 1.075582504272461, "step": 945 }, { "epoch": 1.9873949579831933, "grad_norm": 10.407621014847015, "learning_rate": 3.099295350513898e-06, "loss": 0.9495413899421692, "step": 946 }, { "epoch": 1.9894957983193278, "grad_norm": 12.92768742395414, "learning_rate": 3.087994626506254e-06, "loss": 1.3577098846435547, "step": 947 }, { "epoch": 1.9915966386554622, "grad_norm": 15.824553960297969, "learning_rate": 3.0767053308183416e-06, "loss": 1.229673981666565, "step": 948 }, { "epoch": 1.9936974789915967, "grad_norm": 10.163559398603317, "learning_rate": 3.0654275309278382e-06, "loss": 1.3727761507034302, "step": 949 }, { "epoch": 1.995798319327731, "grad_norm": 24.18617513536731, "learning_rate": 3.0541612942437095e-06, "loss": 1.2849650382995605, "step": 950 }, { "epoch": 1.9978991596638656, "grad_norm": 11.306358671227525, "learning_rate": 3.0429066881058036e-06, "loss": 1.644538164138794, "step": 951 }, { "epoch": 2.0, "grad_norm": 8.518728725517601, "learning_rate": 3.031663779784454e-06, "loss": 1.627841591835022, "step": 952 }, { "epoch": 2.0021008403361344, "grad_norm": 7.931423247551538, "learning_rate": 3.020432636480074e-06, "loss": 0.2781870365142822, "step": 953 }, { "epoch": 2.004201680672269, "grad_norm": 10.005905501706337, "learning_rate": 3.0092133253227563e-06, "loss": 0.4595562815666199, "step": 954 }, { "epoch": 2.0063025210084033, "grad_norm": 11.609856477894333, "learning_rate": 2.9980059133718687e-06, "loss": 0.5985803604125977, "step": 955 }, { "epoch": 2.008403361344538, "grad_norm": 9.586217141888886, "learning_rate": 2.986810467615659e-06, "loss": 0.41152679920196533, "step": 956 }, { "epoch": 2.0105042016806722, "grad_norm": 7.487148279080336, "learning_rate": 2.9756270549708497e-06, "loss": 0.2737478017807007, "step": 957 }, { "epoch": 2.0126050420168067, "grad_norm": 7.91452475392683, "learning_rate": 2.9644557422822406e-06, "loss": 0.3578256368637085, "step": 958 }, { "epoch": 2.014705882352941, "grad_norm": 8.457279529419324, "learning_rate": 2.9532965963223076e-06, "loss": 0.3519413471221924, "step": 959 }, { "epoch": 2.0168067226890756, "grad_norm": 12.98619129259147, "learning_rate": 2.9421496837908036e-06, "loss": 0.5450835227966309, "step": 960 }, { "epoch": 2.01890756302521, "grad_norm": 10.10649833997759, "learning_rate": 2.9310150713143637e-06, "loss": 0.7118933796882629, "step": 961 }, { "epoch": 2.0210084033613445, "grad_norm": 14.582945225729798, "learning_rate": 2.9198928254461e-06, "loss": 0.6735545992851257, "step": 962 }, { "epoch": 2.023109243697479, "grad_norm": 13.612285721248329, "learning_rate": 2.908783012665209e-06, "loss": 0.6521182060241699, "step": 963 }, { "epoch": 2.0252100840336134, "grad_norm": 9.78372244316324, "learning_rate": 2.8976856993765766e-06, "loss": 0.476604163646698, "step": 964 }, { "epoch": 2.027310924369748, "grad_norm": 12.44882078007661, "learning_rate": 2.8866009519103705e-06, "loss": 0.46952176094055176, "step": 965 }, { "epoch": 2.0294117647058822, "grad_norm": 9.57528793268096, "learning_rate": 2.875528836521658e-06, "loss": 0.4453829526901245, "step": 966 }, { "epoch": 2.0315126050420167, "grad_norm": 10.439598731466846, "learning_rate": 2.864469419389997e-06, "loss": 0.2944750189781189, "step": 967 }, { "epoch": 2.033613445378151, "grad_norm": 11.844927445149192, "learning_rate": 2.8534227666190484e-06, "loss": 0.8550271391868591, "step": 968 }, { "epoch": 2.0357142857142856, "grad_norm": 13.15280870383674, "learning_rate": 2.8423889442361797e-06, "loss": 0.38192200660705566, "step": 969 }, { "epoch": 2.03781512605042, "grad_norm": 8.57118977839964, "learning_rate": 2.831368018192071e-06, "loss": 0.7316254377365112, "step": 970 }, { "epoch": 2.0399159663865545, "grad_norm": 11.697813667138346, "learning_rate": 2.8203600543603116e-06, "loss": 0.7615312337875366, "step": 971 }, { "epoch": 2.042016806722689, "grad_norm": 15.849948895683397, "learning_rate": 2.809365118537024e-06, "loss": 0.8274880647659302, "step": 972 }, { "epoch": 2.0441176470588234, "grad_norm": 10.693659421700279, "learning_rate": 2.7983832764404517e-06, "loss": 0.31469643115997314, "step": 973 }, { "epoch": 2.046218487394958, "grad_norm": 11.44650988759124, "learning_rate": 2.787414593710583e-06, "loss": 0.20855772495269775, "step": 974 }, { "epoch": 2.0483193277310923, "grad_norm": 15.81504400596588, "learning_rate": 2.7764591359087415e-06, "loss": 0.6759412884712219, "step": 975 }, { "epoch": 2.0504201680672267, "grad_norm": 10.204964393759596, "learning_rate": 2.7655169685172146e-06, "loss": 0.4555593430995941, "step": 976 }, { "epoch": 2.052521008403361, "grad_norm": 11.261244778956014, "learning_rate": 2.7545881569388404e-06, "loss": 0.22477459907531738, "step": 977 }, { "epoch": 2.0546218487394956, "grad_norm": 12.246362257874592, "learning_rate": 2.7436727664966368e-06, "loss": 0.47387319803237915, "step": 978 }, { "epoch": 2.05672268907563, "grad_norm": 8.866095217298547, "learning_rate": 2.7327708624333936e-06, "loss": 0.46857523918151855, "step": 979 }, { "epoch": 2.0588235294117645, "grad_norm": 13.695082205363835, "learning_rate": 2.7218825099112966e-06, "loss": 0.4427967071533203, "step": 980 }, { "epoch": 2.060924369747899, "grad_norm": 14.011986042608353, "learning_rate": 2.7110077740115315e-06, "loss": 1.3617768287658691, "step": 981 }, { "epoch": 2.0630252100840334, "grad_norm": 11.10530101134504, "learning_rate": 2.7001467197338905e-06, "loss": 0.8060270547866821, "step": 982 }, { "epoch": 2.0651260504201683, "grad_norm": 11.494347595831918, "learning_rate": 2.6892994119963965e-06, "loss": 0.29366880655288696, "step": 983 }, { "epoch": 2.0672268907563027, "grad_norm": 11.085826364505666, "learning_rate": 2.678465915634899e-06, "loss": 0.40074852108955383, "step": 984 }, { "epoch": 2.069327731092437, "grad_norm": 12.536178143522665, "learning_rate": 2.667646295402704e-06, "loss": 0.4710817337036133, "step": 985 }, { "epoch": 2.0714285714285716, "grad_norm": 10.305745046176337, "learning_rate": 2.656840615970169e-06, "loss": 0.37437137961387634, "step": 986 }, { "epoch": 2.073529411764706, "grad_norm": 19.03565667772653, "learning_rate": 2.646048941924333e-06, "loss": 0.9739346504211426, "step": 987 }, { "epoch": 2.0756302521008405, "grad_norm": 14.96080509908609, "learning_rate": 2.635271337768517e-06, "loss": 0.6326197981834412, "step": 988 }, { "epoch": 2.077731092436975, "grad_norm": 7.840013094660732, "learning_rate": 2.6245078679219503e-06, "loss": 0.15397483110427856, "step": 989 }, { "epoch": 2.0798319327731094, "grad_norm": 14.418579873307118, "learning_rate": 2.613758596719373e-06, "loss": 0.5905511379241943, "step": 990 }, { "epoch": 2.081932773109244, "grad_norm": 11.365880088991135, "learning_rate": 2.603023588410662e-06, "loss": 0.588984489440918, "step": 991 }, { "epoch": 2.0840336134453783, "grad_norm": 8.467121557746795, "learning_rate": 2.5923029071604443e-06, "loss": 0.33690521121025085, "step": 992 }, { "epoch": 2.0861344537815127, "grad_norm": 12.160131630042047, "learning_rate": 2.5815966170477065e-06, "loss": 0.23294681310653687, "step": 993 }, { "epoch": 2.088235294117647, "grad_norm": 10.365330226343618, "learning_rate": 2.5709047820654236e-06, "loss": 0.4404110908508301, "step": 994 }, { "epoch": 2.0903361344537816, "grad_norm": 11.818988939924239, "learning_rate": 2.5602274661201643e-06, "loss": 0.37340593338012695, "step": 995 }, { "epoch": 2.092436974789916, "grad_norm": 8.208877260345274, "learning_rate": 2.549564733031722e-06, "loss": 0.3671455979347229, "step": 996 }, { "epoch": 2.0945378151260505, "grad_norm": 7.093067310603152, "learning_rate": 2.538916646532718e-06, "loss": 0.6218878030776978, "step": 997 }, { "epoch": 2.096638655462185, "grad_norm": 18.77124675473162, "learning_rate": 2.528283270268238e-06, "loss": 1.2778301239013672, "step": 998 }, { "epoch": 2.0987394957983194, "grad_norm": 11.063294944827689, "learning_rate": 2.517664667795434e-06, "loss": 0.6543454527854919, "step": 999 }, { "epoch": 2.100840336134454, "grad_norm": 14.644880561077354, "learning_rate": 2.5070609025831605e-06, "loss": 0.42762574553489685, "step": 1000 }, { "epoch": 2.1029411764705883, "grad_norm": 9.207415458189004, "learning_rate": 2.49647203801158e-06, "loss": 0.40861833095550537, "step": 1001 }, { "epoch": 2.1050420168067228, "grad_norm": 13.2306061568643, "learning_rate": 2.4858981373718006e-06, "loss": 0.6941218376159668, "step": 1002 }, { "epoch": 2.107142857142857, "grad_norm": 7.498871262137285, "learning_rate": 2.47533926386548e-06, "loss": 0.1615523397922516, "step": 1003 }, { "epoch": 2.1092436974789917, "grad_norm": 10.133323758514624, "learning_rate": 2.4647954806044633e-06, "loss": 0.30699750781059265, "step": 1004 }, { "epoch": 2.111344537815126, "grad_norm": 7.757159399213717, "learning_rate": 2.454266850610398e-06, "loss": 0.27435654401779175, "step": 1005 }, { "epoch": 2.1134453781512605, "grad_norm": 12.86368889886839, "learning_rate": 2.443753436814354e-06, "loss": 0.6352673172950745, "step": 1006 }, { "epoch": 2.115546218487395, "grad_norm": 10.035026243076201, "learning_rate": 2.433255302056458e-06, "loss": 0.3478729724884033, "step": 1007 }, { "epoch": 2.1176470588235294, "grad_norm": 24.70500681984219, "learning_rate": 2.4227725090855063e-06, "loss": 0.3971726894378662, "step": 1008 }, { "epoch": 2.119747899159664, "grad_norm": 10.231694956778009, "learning_rate": 2.412305120558599e-06, "loss": 0.9241357445716858, "step": 1009 }, { "epoch": 2.1218487394957983, "grad_norm": 10.754074840458836, "learning_rate": 2.40185319904076e-06, "loss": 0.2883678674697876, "step": 1010 }, { "epoch": 2.1239495798319328, "grad_norm": 11.61442466478921, "learning_rate": 2.391416807004568e-06, "loss": 0.39812758564949036, "step": 1011 }, { "epoch": 2.1260504201680672, "grad_norm": 12.063207487307261, "learning_rate": 2.3809960068297732e-06, "loss": 0.6487483978271484, "step": 1012 }, { "epoch": 2.1281512605042017, "grad_norm": 10.36458033229305, "learning_rate": 2.370590860802938e-06, "loss": 0.44781216979026794, "step": 1013 }, { "epoch": 2.130252100840336, "grad_norm": 12.86609548868944, "learning_rate": 2.3602014311170524e-06, "loss": 0.3241298496723175, "step": 1014 }, { "epoch": 2.1323529411764706, "grad_norm": 15.003708633549396, "learning_rate": 2.3498277798711725e-06, "loss": 0.4608106315135956, "step": 1015 }, { "epoch": 2.134453781512605, "grad_norm": 12.622777494736392, "learning_rate": 2.3394699690700395e-06, "loss": 0.5967488884925842, "step": 1016 }, { "epoch": 2.1365546218487395, "grad_norm": 12.188684362144896, "learning_rate": 2.3291280606237186e-06, "loss": 0.4074782729148865, "step": 1017 }, { "epoch": 2.138655462184874, "grad_norm": 8.220918786617895, "learning_rate": 2.3188021163472206e-06, "loss": 0.354820191860199, "step": 1018 }, { "epoch": 2.1407563025210083, "grad_norm": 11.553371341140592, "learning_rate": 2.308492197960141e-06, "loss": 0.23287059366703033, "step": 1019 }, { "epoch": 2.142857142857143, "grad_norm": 14.317771847578689, "learning_rate": 2.2981983670862796e-06, "loss": 1.0973201990127563, "step": 1020 }, { "epoch": 2.1449579831932772, "grad_norm": 15.607772284187238, "learning_rate": 2.2879206852532854e-06, "loss": 0.4452645778656006, "step": 1021 }, { "epoch": 2.1470588235294117, "grad_norm": 7.00439488336282, "learning_rate": 2.2776592138922806e-06, "loss": 0.273881733417511, "step": 1022 }, { "epoch": 2.149159663865546, "grad_norm": 7.187420218160357, "learning_rate": 2.2674140143374904e-06, "loss": 0.20633578300476074, "step": 1023 }, { "epoch": 2.1512605042016806, "grad_norm": 13.420920542298727, "learning_rate": 2.2571851478258903e-06, "loss": 0.38969674706459045, "step": 1024 }, { "epoch": 2.153361344537815, "grad_norm": 10.472493973829605, "learning_rate": 2.2469726754968207e-06, "loss": 0.26989856362342834, "step": 1025 }, { "epoch": 2.1554621848739495, "grad_norm": 7.949292792396312, "learning_rate": 2.236776658391641e-06, "loss": 0.5260115265846252, "step": 1026 }, { "epoch": 2.157563025210084, "grad_norm": 10.053195823647455, "learning_rate": 2.2265971574533474e-06, "loss": 0.2469472587108612, "step": 1027 }, { "epoch": 2.1596638655462184, "grad_norm": 10.2298360545254, "learning_rate": 2.2164342335262244e-06, "loss": 0.201723113656044, "step": 1028 }, { "epoch": 2.161764705882353, "grad_norm": 8.12616695246335, "learning_rate": 2.2062879473554654e-06, "loss": 0.6355183124542236, "step": 1029 }, { "epoch": 2.1638655462184873, "grad_norm": 8.264174037140188, "learning_rate": 2.1961583595868253e-06, "loss": 0.32272863388061523, "step": 1030 }, { "epoch": 2.1659663865546217, "grad_norm": 7.57735519623744, "learning_rate": 2.186045530766244e-06, "loss": 0.24386917054653168, "step": 1031 }, { "epoch": 2.168067226890756, "grad_norm": 14.627933253501514, "learning_rate": 2.1759495213394965e-06, "loss": 0.482686847448349, "step": 1032 }, { "epoch": 2.1701680672268906, "grad_norm": 11.019581985915595, "learning_rate": 2.165870391651819e-06, "loss": 0.5142661333084106, "step": 1033 }, { "epoch": 2.172268907563025, "grad_norm": 5.2143700025854605, "learning_rate": 2.155808201947563e-06, "loss": 0.21703539788722992, "step": 1034 }, { "epoch": 2.1743697478991595, "grad_norm": 8.891172137251035, "learning_rate": 2.145763012369824e-06, "loss": 0.4068147540092468, "step": 1035 }, { "epoch": 2.176470588235294, "grad_norm": 8.535335024751161, "learning_rate": 2.1357348829600816e-06, "loss": 0.5949288606643677, "step": 1036 }, { "epoch": 2.1785714285714284, "grad_norm": 9.215827064680754, "learning_rate": 2.125723873657852e-06, "loss": 0.44353166222572327, "step": 1037 }, { "epoch": 2.180672268907563, "grad_norm": 10.942670044267897, "learning_rate": 2.115730044300313e-06, "loss": 0.4212431013584137, "step": 1038 }, { "epoch": 2.1827731092436973, "grad_norm": 9.670977047855832, "learning_rate": 2.105753454621966e-06, "loss": 0.37279778718948364, "step": 1039 }, { "epoch": 2.184873949579832, "grad_norm": 15.451697195363522, "learning_rate": 2.095794164254259e-06, "loss": 0.6137001514434814, "step": 1040 }, { "epoch": 2.1869747899159666, "grad_norm": 9.221135375907481, "learning_rate": 2.0858522327252467e-06, "loss": 0.20706136524677277, "step": 1041 }, { "epoch": 2.189075630252101, "grad_norm": 7.176753088694497, "learning_rate": 2.0759277194592208e-06, "loss": 0.3732944130897522, "step": 1042 }, { "epoch": 2.1911764705882355, "grad_norm": 6.567221722351546, "learning_rate": 2.06602068377637e-06, "loss": 0.11849310249090195, "step": 1043 }, { "epoch": 2.19327731092437, "grad_norm": 8.709699534320997, "learning_rate": 2.0561311848924082e-06, "loss": 0.35089147090911865, "step": 1044 }, { "epoch": 2.1953781512605044, "grad_norm": 6.892712538403868, "learning_rate": 2.0462592819182377e-06, "loss": 0.3482816219329834, "step": 1045 }, { "epoch": 2.197478991596639, "grad_norm": 11.886398314518281, "learning_rate": 2.0364050338595792e-06, "loss": 0.9048193097114563, "step": 1046 }, { "epoch": 2.1995798319327733, "grad_norm": 9.38373013746351, "learning_rate": 2.0265684996166345e-06, "loss": 0.34331268072128296, "step": 1047 }, { "epoch": 2.2016806722689077, "grad_norm": 13.372941805785942, "learning_rate": 2.0167497379837254e-06, "loss": 0.35536718368530273, "step": 1048 }, { "epoch": 2.203781512605042, "grad_norm": 7.556671458015662, "learning_rate": 2.0069488076489445e-06, "loss": 0.20954403281211853, "step": 1049 }, { "epoch": 2.2058823529411766, "grad_norm": 8.35211924521852, "learning_rate": 1.997165767193801e-06, "loss": 0.5290908813476562, "step": 1050 }, { "epoch": 2.207983193277311, "grad_norm": 10.477600594311985, "learning_rate": 1.9874006750928783e-06, "loss": 0.44289880990982056, "step": 1051 }, { "epoch": 2.2100840336134455, "grad_norm": 8.191084415042441, "learning_rate": 1.97765358971348e-06, "loss": 0.48035284876823425, "step": 1052 }, { "epoch": 2.21218487394958, "grad_norm": 14.892166225942573, "learning_rate": 1.967924569315275e-06, "loss": 0.2514810562133789, "step": 1053 }, { "epoch": 2.2142857142857144, "grad_norm": 9.73259092640212, "learning_rate": 1.958213672049964e-06, "loss": 0.9599279165267944, "step": 1054 }, { "epoch": 2.216386554621849, "grad_norm": 10.01655023470503, "learning_rate": 1.9485209559609148e-06, "loss": 0.30860060453414917, "step": 1055 }, { "epoch": 2.2184873949579833, "grad_norm": 21.410996670654146, "learning_rate": 1.9388464789828316e-06, "loss": 0.7747633457183838, "step": 1056 }, { "epoch": 2.2205882352941178, "grad_norm": 14.431755517939498, "learning_rate": 1.9291902989413935e-06, "loss": 0.3529064655303955, "step": 1057 }, { "epoch": 2.222689075630252, "grad_norm": 19.684041196466477, "learning_rate": 1.9195524735529237e-06, "loss": 1.0967960357666016, "step": 1058 }, { "epoch": 2.2247899159663866, "grad_norm": 9.812143417300405, "learning_rate": 1.909933060424029e-06, "loss": 0.700248122215271, "step": 1059 }, { "epoch": 2.226890756302521, "grad_norm": 10.765309787627796, "learning_rate": 1.9003321170512728e-06, "loss": 0.9177491068840027, "step": 1060 }, { "epoch": 2.2289915966386555, "grad_norm": 12.811209076397098, "learning_rate": 1.890749700820813e-06, "loss": 0.543596625328064, "step": 1061 }, { "epoch": 2.23109243697479, "grad_norm": 17.31969106411562, "learning_rate": 1.8811858690080764e-06, "loss": 0.7324357032775879, "step": 1062 }, { "epoch": 2.2331932773109244, "grad_norm": 9.418477503451474, "learning_rate": 1.8716406787774e-06, "loss": 0.4075426459312439, "step": 1063 }, { "epoch": 2.235294117647059, "grad_norm": 7.9458980097838605, "learning_rate": 1.862114187181705e-06, "loss": 0.39563894271850586, "step": 1064 }, { "epoch": 2.2373949579831933, "grad_norm": 8.299260491259234, "learning_rate": 1.8526064511621455e-06, "loss": 0.37604600191116333, "step": 1065 }, { "epoch": 2.2394957983193278, "grad_norm": 10.435458479716717, "learning_rate": 1.843117527547768e-06, "loss": 0.6682062745094299, "step": 1066 }, { "epoch": 2.241596638655462, "grad_norm": 8.776734857977067, "learning_rate": 1.8336474730551807e-06, "loss": 0.19220635294914246, "step": 1067 }, { "epoch": 2.2436974789915967, "grad_norm": 14.521651377727974, "learning_rate": 1.8241963442882005e-06, "loss": 0.27735865116119385, "step": 1068 }, { "epoch": 2.245798319327731, "grad_norm": 8.651493755796526, "learning_rate": 1.8147641977375313e-06, "loss": 0.41572022438049316, "step": 1069 }, { "epoch": 2.2478991596638656, "grad_norm": 7.20135853576087, "learning_rate": 1.8053510897804105e-06, "loss": 0.25049227476119995, "step": 1070 }, { "epoch": 2.25, "grad_norm": 8.485627286621954, "learning_rate": 1.7959570766802847e-06, "loss": 0.17869159579277039, "step": 1071 }, { "epoch": 2.2521008403361344, "grad_norm": 8.831664553556859, "learning_rate": 1.786582214586462e-06, "loss": 0.2621746361255646, "step": 1072 }, { "epoch": 2.254201680672269, "grad_norm": 13.640791806331189, "learning_rate": 1.77722655953379e-06, "loss": 0.33446362614631653, "step": 1073 }, { "epoch": 2.2563025210084033, "grad_norm": 5.657158630793571, "learning_rate": 1.7678901674423044e-06, "loss": 0.17267954349517822, "step": 1074 }, { "epoch": 2.258403361344538, "grad_norm": 11.516922535812704, "learning_rate": 1.7585730941169105e-06, "loss": 0.5281901955604553, "step": 1075 }, { "epoch": 2.2605042016806722, "grad_norm": 11.298403957574713, "learning_rate": 1.7492753952470415e-06, "loss": 0.2754780352115631, "step": 1076 }, { "epoch": 2.2626050420168067, "grad_norm": 10.413722402153681, "learning_rate": 1.739997126406322e-06, "loss": 0.3246016502380371, "step": 1077 }, { "epoch": 2.264705882352941, "grad_norm": 14.097971965363062, "learning_rate": 1.7307383430522474e-06, "loss": 0.6660511493682861, "step": 1078 }, { "epoch": 2.2668067226890756, "grad_norm": 22.503701517732946, "learning_rate": 1.7214991005258386e-06, "loss": 1.2165361642837524, "step": 1079 }, { "epoch": 2.26890756302521, "grad_norm": 8.328219817576464, "learning_rate": 1.7122794540513265e-06, "loss": 0.18396508693695068, "step": 1080 }, { "epoch": 2.2710084033613445, "grad_norm": 7.768308930354123, "learning_rate": 1.703079458735805e-06, "loss": 0.42018991708755493, "step": 1081 }, { "epoch": 2.273109243697479, "grad_norm": 9.610477928803583, "learning_rate": 1.6938991695689184e-06, "loss": 0.38192903995513916, "step": 1082 }, { "epoch": 2.2752100840336134, "grad_norm": 9.57071965935329, "learning_rate": 1.684738641422517e-06, "loss": 0.4953494966030121, "step": 1083 }, { "epoch": 2.277310924369748, "grad_norm": 11.62580762547179, "learning_rate": 1.6755979290503437e-06, "loss": 0.5324037075042725, "step": 1084 }, { "epoch": 2.2794117647058822, "grad_norm": 9.119930665905265, "learning_rate": 1.666477087087694e-06, "loss": 0.6618460416793823, "step": 1085 }, { "epoch": 2.2815126050420167, "grad_norm": 12.668770516893803, "learning_rate": 1.6573761700511004e-06, "loss": 0.29154300689697266, "step": 1086 }, { "epoch": 2.283613445378151, "grad_norm": 10.126878534173718, "learning_rate": 1.6482952323379958e-06, "loss": 0.39994263648986816, "step": 1087 }, { "epoch": 2.2857142857142856, "grad_norm": 8.084921146733947, "learning_rate": 1.639234328226399e-06, "loss": 0.2049681693315506, "step": 1088 }, { "epoch": 2.28781512605042, "grad_norm": 9.167757841002748, "learning_rate": 1.6301935118745826e-06, "loss": 0.35848674178123474, "step": 1089 }, { "epoch": 2.2899159663865545, "grad_norm": 12.543365522318467, "learning_rate": 1.621172837320754e-06, "loss": 0.4794918894767761, "step": 1090 }, { "epoch": 2.292016806722689, "grad_norm": 6.873717233986044, "learning_rate": 1.6121723584827259e-06, "loss": 0.3671627342700958, "step": 1091 }, { "epoch": 2.2941176470588234, "grad_norm": 9.315544619619539, "learning_rate": 1.6031921291576048e-06, "loss": 0.25063830614089966, "step": 1092 }, { "epoch": 2.296218487394958, "grad_norm": 11.618408926786485, "learning_rate": 1.5942322030214547e-06, "loss": 0.7581193447113037, "step": 1093 }, { "epoch": 2.2983193277310923, "grad_norm": 7.9613247000723595, "learning_rate": 1.5852926336289926e-06, "loss": 0.4217086434364319, "step": 1094 }, { "epoch": 2.3004201680672267, "grad_norm": 10.341036096752598, "learning_rate": 1.5763734744132587e-06, "loss": 0.5018645524978638, "step": 1095 }, { "epoch": 2.302521008403361, "grad_norm": 14.166467122386207, "learning_rate": 1.5674747786852935e-06, "loss": 0.5745636224746704, "step": 1096 }, { "epoch": 2.3046218487394956, "grad_norm": 9.77165887856765, "learning_rate": 1.5585965996338314e-06, "loss": 0.9145222902297974, "step": 1097 }, { "epoch": 2.30672268907563, "grad_norm": 15.937224453039251, "learning_rate": 1.5497389903249705e-06, "loss": 0.4312666058540344, "step": 1098 }, { "epoch": 2.3088235294117645, "grad_norm": 8.945920679970577, "learning_rate": 1.5409020037018652e-06, "loss": 0.4121660590171814, "step": 1099 }, { "epoch": 2.310924369747899, "grad_norm": 10.839281933281265, "learning_rate": 1.5320856925843997e-06, "loss": 0.8646482825279236, "step": 1100 }, { "epoch": 2.3130252100840334, "grad_norm": 12.806561724880765, "learning_rate": 1.5232901096688847e-06, "loss": 0.784586489200592, "step": 1101 }, { "epoch": 2.315126050420168, "grad_norm": 10.817682905964707, "learning_rate": 1.5145153075277286e-06, "loss": 0.9424635171890259, "step": 1102 }, { "epoch": 2.3172268907563023, "grad_norm": 8.922023653272449, "learning_rate": 1.505761338609137e-06, "loss": 0.28385645151138306, "step": 1103 }, { "epoch": 2.3193277310924367, "grad_norm": 15.30593506620364, "learning_rate": 1.4970282552367854e-06, "loss": 0.6689031720161438, "step": 1104 }, { "epoch": 2.3214285714285716, "grad_norm": 10.05546946420467, "learning_rate": 1.4883161096095189e-06, "loss": 0.691364586353302, "step": 1105 }, { "epoch": 2.323529411764706, "grad_norm": 13.976863852979069, "learning_rate": 1.4796249538010354e-06, "loss": 0.23520073294639587, "step": 1106 }, { "epoch": 2.3256302521008405, "grad_norm": 9.578643377397341, "learning_rate": 1.4709548397595674e-06, "loss": 0.4271107316017151, "step": 1107 }, { "epoch": 2.327731092436975, "grad_norm": 16.17388877757899, "learning_rate": 1.4623058193075852e-06, "loss": 0.9280604720115662, "step": 1108 }, { "epoch": 2.3298319327731094, "grad_norm": 13.041308775276805, "learning_rate": 1.453677944141474e-06, "loss": 0.33376407623291016, "step": 1109 }, { "epoch": 2.331932773109244, "grad_norm": 13.186142451412863, "learning_rate": 1.4450712658312356e-06, "loss": 0.7442219853401184, "step": 1110 }, { "epoch": 2.3340336134453783, "grad_norm": 10.969810510823187, "learning_rate": 1.43648583582017e-06, "loss": 1.27920663356781, "step": 1111 }, { "epoch": 2.3361344537815127, "grad_norm": 22.653518753891586, "learning_rate": 1.4279217054245793e-06, "loss": 0.6456579566001892, "step": 1112 }, { "epoch": 2.338235294117647, "grad_norm": 13.638307761366974, "learning_rate": 1.4193789258334485e-06, "loss": 1.1350394487380981, "step": 1113 }, { "epoch": 2.3403361344537816, "grad_norm": 10.59397199917471, "learning_rate": 1.4108575481081522e-06, "loss": 0.5290108919143677, "step": 1114 }, { "epoch": 2.342436974789916, "grad_norm": 9.100247445169298, "learning_rate": 1.4023576231821362e-06, "loss": 0.2833002209663391, "step": 1115 }, { "epoch": 2.3445378151260505, "grad_norm": 18.49442431345445, "learning_rate": 1.3938792018606278e-06, "loss": 0.37826409935951233, "step": 1116 }, { "epoch": 2.346638655462185, "grad_norm": 12.477810112402349, "learning_rate": 1.3854223348203171e-06, "loss": 0.3945717215538025, "step": 1117 }, { "epoch": 2.3487394957983194, "grad_norm": 8.789544191123422, "learning_rate": 1.376987072609065e-06, "loss": 0.31352269649505615, "step": 1118 }, { "epoch": 2.350840336134454, "grad_norm": 14.219313270123468, "learning_rate": 1.368573465645599e-06, "loss": 0.8024647235870361, "step": 1119 }, { "epoch": 2.3529411764705883, "grad_norm": 10.007349065084831, "learning_rate": 1.360181564219204e-06, "loss": 0.7791054248809814, "step": 1120 }, { "epoch": 2.3550420168067228, "grad_norm": 12.213364584526106, "learning_rate": 1.351811418489436e-06, "loss": 0.37381619215011597, "step": 1121 }, { "epoch": 2.357142857142857, "grad_norm": 10.713765677783302, "learning_rate": 1.3434630784858067e-06, "loss": 0.3184419870376587, "step": 1122 }, { "epoch": 2.3592436974789917, "grad_norm": 11.470256693930569, "learning_rate": 1.335136594107498e-06, "loss": 0.3431350886821747, "step": 1123 }, { "epoch": 2.361344537815126, "grad_norm": 9.119684880351647, "learning_rate": 1.3268320151230518e-06, "loss": 0.4296434819698334, "step": 1124 }, { "epoch": 2.3634453781512605, "grad_norm": 10.866853294417046, "learning_rate": 1.3185493911700854e-06, "loss": 0.48791950941085815, "step": 1125 }, { "epoch": 2.365546218487395, "grad_norm": 9.540925370722046, "learning_rate": 1.3102887717549812e-06, "loss": 0.29711413383483887, "step": 1126 }, { "epoch": 2.3676470588235294, "grad_norm": 11.627989144711366, "learning_rate": 1.302050206252602e-06, "loss": 0.39902636408805847, "step": 1127 }, { "epoch": 2.369747899159664, "grad_norm": 6.365770038684127, "learning_rate": 1.2938337439059868e-06, "loss": 0.2864948511123657, "step": 1128 }, { "epoch": 2.3718487394957983, "grad_norm": 12.606248234313094, "learning_rate": 1.2856394338260691e-06, "loss": 0.42151930928230286, "step": 1129 }, { "epoch": 2.3739495798319328, "grad_norm": 8.544922775672411, "learning_rate": 1.2774673249913656e-06, "loss": 0.330949604511261, "step": 1130 }, { "epoch": 2.3760504201680672, "grad_norm": 12.681787759512487, "learning_rate": 1.2693174662477003e-06, "loss": 0.832221508026123, "step": 1131 }, { "epoch": 2.3781512605042017, "grad_norm": 16.15687539830067, "learning_rate": 1.2611899063079002e-06, "loss": 0.3243201971054077, "step": 1132 }, { "epoch": 2.380252100840336, "grad_norm": 11.42137338593432, "learning_rate": 1.253084693751514e-06, "loss": 0.4209938049316406, "step": 1133 }, { "epoch": 2.3823529411764706, "grad_norm": 10.49566833203582, "learning_rate": 1.245001877024512e-06, "loss": 0.1905173659324646, "step": 1134 }, { "epoch": 2.384453781512605, "grad_norm": 9.325292405896798, "learning_rate": 1.2369415044390055e-06, "loss": 0.31655293703079224, "step": 1135 }, { "epoch": 2.3865546218487395, "grad_norm": 12.150405014710023, "learning_rate": 1.228903624172954e-06, "loss": 0.2780379354953766, "step": 1136 }, { "epoch": 2.388655462184874, "grad_norm": 7.132176058282011, "learning_rate": 1.220888284269874e-06, "loss": 0.5738459825515747, "step": 1137 }, { "epoch": 2.3907563025210083, "grad_norm": 9.199984669814489, "learning_rate": 1.2128955326385595e-06, "loss": 0.4594503343105316, "step": 1138 }, { "epoch": 2.392857142857143, "grad_norm": 246.0490199481034, "learning_rate": 1.2049254170527857e-06, "loss": 1.6502771377563477, "step": 1139 }, { "epoch": 2.3949579831932772, "grad_norm": 10.645480745934366, "learning_rate": 1.196977985151036e-06, "loss": 0.7063793540000916, "step": 1140 }, { "epoch": 2.3970588235294117, "grad_norm": 12.466682957005606, "learning_rate": 1.1890532844362035e-06, "loss": 0.4885460138320923, "step": 1141 }, { "epoch": 2.399159663865546, "grad_norm": 8.222098406246245, "learning_rate": 1.1811513622753196e-06, "loss": 0.29537534713745117, "step": 1142 }, { "epoch": 2.4012605042016806, "grad_norm": 10.08444013945275, "learning_rate": 1.1732722658992597e-06, "loss": 0.6734664440155029, "step": 1143 }, { "epoch": 2.403361344537815, "grad_norm": 12.85839873964936, "learning_rate": 1.1654160424024718e-06, "loss": 0.39790263772010803, "step": 1144 }, { "epoch": 2.4054621848739495, "grad_norm": 19.32478545248631, "learning_rate": 1.1575827387426846e-06, "loss": 0.2750331163406372, "step": 1145 }, { "epoch": 2.407563025210084, "grad_norm": 9.972826363554564, "learning_rate": 1.149772401740637e-06, "loss": 0.44170406460762024, "step": 1146 }, { "epoch": 2.4096638655462184, "grad_norm": 7.314481031395291, "learning_rate": 1.1419850780797864e-06, "loss": 0.19013899564743042, "step": 1147 }, { "epoch": 2.411764705882353, "grad_norm": 7.432016042351664, "learning_rate": 1.1342208143060423e-06, "loss": 0.4140137732028961, "step": 1148 }, { "epoch": 2.4138655462184873, "grad_norm": 11.414471382112064, "learning_rate": 1.1264796568274811e-06, "loss": 0.4861386716365814, "step": 1149 }, { "epoch": 2.4159663865546217, "grad_norm": 13.684580354320987, "learning_rate": 1.118761651914065e-06, "loss": 0.3487178683280945, "step": 1150 }, { "epoch": 2.418067226890756, "grad_norm": 8.714659720362214, "learning_rate": 1.1110668456973761e-06, "loss": 0.6119335889816284, "step": 1151 }, { "epoch": 2.4201680672268906, "grad_norm": 10.585809423186294, "learning_rate": 1.10339528417033e-06, "loss": 0.24830467998981476, "step": 1152 }, { "epoch": 2.422268907563025, "grad_norm": 6.6530689382799375, "learning_rate": 1.0957470131869102e-06, "loss": 0.20413950085639954, "step": 1153 }, { "epoch": 2.4243697478991595, "grad_norm": 11.585229233250407, "learning_rate": 1.088122078461884e-06, "loss": 0.7759865522384644, "step": 1154 }, { "epoch": 2.426470588235294, "grad_norm": 14.183804098321202, "learning_rate": 1.0805205255705403e-06, "loss": 0.9713194370269775, "step": 1155 }, { "epoch": 2.4285714285714284, "grad_norm": 11.392423008755229, "learning_rate": 1.0729423999484062e-06, "loss": 0.3234805464744568, "step": 1156 }, { "epoch": 2.4306722689075633, "grad_norm": 7.504753087219636, "learning_rate": 1.0653877468909857e-06, "loss": 0.2364063262939453, "step": 1157 }, { "epoch": 2.4327731092436977, "grad_norm": 9.823099282463206, "learning_rate": 1.0578566115534794e-06, "loss": 0.4705219268798828, "step": 1158 }, { "epoch": 2.434873949579832, "grad_norm": 9.451122750213175, "learning_rate": 1.0503490389505244e-06, "loss": 0.26277682185173035, "step": 1159 }, { "epoch": 2.4369747899159666, "grad_norm": 9.336273451144258, "learning_rate": 1.0428650739559138e-06, "loss": 0.13882672786712646, "step": 1160 }, { "epoch": 2.439075630252101, "grad_norm": 7.52407325404656, "learning_rate": 1.0354047613023404e-06, "loss": 0.5188834071159363, "step": 1161 }, { "epoch": 2.4411764705882355, "grad_norm": 7.853041816369125, "learning_rate": 1.0279681455811219e-06, "loss": 0.24887529015541077, "step": 1162 }, { "epoch": 2.44327731092437, "grad_norm": 10.755975449643415, "learning_rate": 1.0205552712419343e-06, "loss": 0.28220975399017334, "step": 1163 }, { "epoch": 2.4453781512605044, "grad_norm": 10.623188982430918, "learning_rate": 1.013166182592551e-06, "loss": 0.24789491295814514, "step": 1164 }, { "epoch": 2.447478991596639, "grad_norm": 10.265380274150749, "learning_rate": 1.0058009237985721e-06, "loss": 0.7892224788665771, "step": 1165 }, { "epoch": 2.4495798319327733, "grad_norm": 11.896169947706998, "learning_rate": 9.98459538883167e-07, "loss": 0.26245754957199097, "step": 1166 }, { "epoch": 2.4516806722689077, "grad_norm": 11.049089295820975, "learning_rate": 9.911420717268023e-07, "loss": 0.27979156374931335, "step": 1167 }, { "epoch": 2.453781512605042, "grad_norm": 13.265137697114756, "learning_rate": 9.838485660669906e-07, "loss": 0.7934341430664062, "step": 1168 }, { "epoch": 2.4558823529411766, "grad_norm": 10.018047542365926, "learning_rate": 9.765790654980195e-07, "loss": 0.45289355516433716, "step": 1169 }, { "epoch": 2.457983193277311, "grad_norm": 13.567130161558774, "learning_rate": 9.693336134706988e-07, "loss": 0.992337703704834, "step": 1170 }, { "epoch": 2.4600840336134455, "grad_norm": 9.387778498410693, "learning_rate": 9.621122532920908e-07, "loss": 0.29417842626571655, "step": 1171 }, { "epoch": 2.46218487394958, "grad_norm": 9.114348977338564, "learning_rate": 9.549150281252633e-07, "loss": 0.5845852494239807, "step": 1172 }, { "epoch": 2.4642857142857144, "grad_norm": 9.288921226395173, "learning_rate": 9.477419809890215e-07, "loss": 0.22582799196243286, "step": 1173 }, { "epoch": 2.466386554621849, "grad_norm": 13.268912659944744, "learning_rate": 9.405931547576591e-07, "loss": 0.26232588291168213, "step": 1174 }, { "epoch": 2.4684873949579833, "grad_norm": 9.072509999987034, "learning_rate": 9.334685921606946e-07, "loss": 0.9084593057632446, "step": 1175 }, { "epoch": 2.4705882352941178, "grad_norm": 11.285010838093699, "learning_rate": 9.26368335782622e-07, "loss": 0.8386296629905701, "step": 1176 }, { "epoch": 2.472689075630252, "grad_norm": 11.02721308505799, "learning_rate": 9.192924280626514e-07, "loss": 1.0152020454406738, "step": 1177 }, { "epoch": 2.4747899159663866, "grad_norm": 12.924069581096365, "learning_rate": 9.122409112944591e-07, "loss": 0.42396751046180725, "step": 1178 }, { "epoch": 2.476890756302521, "grad_norm": 13.79023717049261, "learning_rate": 9.052138276259348e-07, "loss": 0.3439130485057831, "step": 1179 }, { "epoch": 2.4789915966386555, "grad_norm": 8.00992187627695, "learning_rate": 8.982112190589237e-07, "loss": 0.21849340200424194, "step": 1180 }, { "epoch": 2.48109243697479, "grad_norm": 17.238112347826142, "learning_rate": 8.912331274489855e-07, "loss": 1.047693133354187, "step": 1181 }, { "epoch": 2.4831932773109244, "grad_norm": 10.977700782429032, "learning_rate": 8.842795945051335e-07, "loss": 0.4458342492580414, "step": 1182 }, { "epoch": 2.485294117647059, "grad_norm": 6.2173703827542735, "learning_rate": 8.773506617895944e-07, "loss": 0.26556795835494995, "step": 1183 }, { "epoch": 2.4873949579831933, "grad_norm": 14.598955541616366, "learning_rate": 8.704463707175526e-07, "loss": 0.8663069605827332, "step": 1184 }, { "epoch": 2.4894957983193278, "grad_norm": 9.599096123627477, "learning_rate": 8.6356676255691e-07, "loss": 0.7863715291023254, "step": 1185 }, { "epoch": 2.491596638655462, "grad_norm": 7.756695128139413, "learning_rate": 8.567118784280309e-07, "loss": 0.2747763395309448, "step": 1186 }, { "epoch": 2.4936974789915967, "grad_norm": 15.296434795066423, "learning_rate": 8.498817593035053e-07, "loss": 0.22008158266544342, "step": 1187 }, { "epoch": 2.495798319327731, "grad_norm": 42.195018093662426, "learning_rate": 8.430764460078938e-07, "loss": 0.7790160179138184, "step": 1188 }, { "epoch": 2.4978991596638656, "grad_norm": 14.930505610933327, "learning_rate": 8.362959792174941e-07, "loss": 0.3692745864391327, "step": 1189 }, { "epoch": 2.5, "grad_norm": 15.547844843931736, "learning_rate": 8.295403994600921e-07, "loss": 0.5012900829315186, "step": 1190 }, { "epoch": 2.5021008403361344, "grad_norm": 13.218952734739705, "learning_rate": 8.228097471147167e-07, "loss": 0.4049416780471802, "step": 1191 }, { "epoch": 2.504201680672269, "grad_norm": 11.42318009744243, "learning_rate": 8.161040624114075e-07, "loss": 0.14171475172042847, "step": 1192 }, { "epoch": 2.5063025210084033, "grad_norm": 8.26466575159723, "learning_rate": 8.094233854309647e-07, "loss": 0.32759952545166016, "step": 1193 }, { "epoch": 2.508403361344538, "grad_norm": 13.359323997562882, "learning_rate": 8.027677561047176e-07, "loss": 0.5382500886917114, "step": 1194 }, { "epoch": 2.5105042016806722, "grad_norm": 9.050399443504134, "learning_rate": 7.961372142142776e-07, "loss": 0.4815264940261841, "step": 1195 }, { "epoch": 2.5126050420168067, "grad_norm": 8.2857361498368, "learning_rate": 7.89531799391311e-07, "loss": 0.28123000264167786, "step": 1196 }, { "epoch": 2.514705882352941, "grad_norm": 14.813927596451204, "learning_rate": 7.829515511172897e-07, "loss": 0.5116557478904724, "step": 1197 }, { "epoch": 2.5168067226890756, "grad_norm": 11.672590724543431, "learning_rate": 7.763965087232678e-07, "loss": 0.4502016603946686, "step": 1198 }, { "epoch": 2.51890756302521, "grad_norm": 8.420294235923025, "learning_rate": 7.698667113896346e-07, "loss": 0.34997278451919556, "step": 1199 }, { "epoch": 2.5210084033613445, "grad_norm": 4.856432012218632, "learning_rate": 7.633621981458916e-07, "loss": 0.15743517875671387, "step": 1200 }, { "epoch": 2.523109243697479, "grad_norm": 10.116924629282346, "learning_rate": 7.568830078704092e-07, "loss": 0.4513791799545288, "step": 1201 }, { "epoch": 2.5252100840336134, "grad_norm": 10.448219454906289, "learning_rate": 7.504291792902024e-07, "loss": 0.5203551054000854, "step": 1202 }, { "epoch": 2.527310924369748, "grad_norm": 7.4896542285298, "learning_rate": 7.440007509806946e-07, "loss": 0.5805743932723999, "step": 1203 }, { "epoch": 2.5294117647058822, "grad_norm": 6.637043733478462, "learning_rate": 7.375977613654861e-07, "loss": 0.21151217818260193, "step": 1204 }, { "epoch": 2.5315126050420167, "grad_norm": 11.756396934264371, "learning_rate": 7.312202487161318e-07, "loss": 0.4486454725265503, "step": 1205 }, { "epoch": 2.533613445378151, "grad_norm": 13.216341095384697, "learning_rate": 7.248682511519006e-07, "loss": 0.8350504040718079, "step": 1206 }, { "epoch": 2.5357142857142856, "grad_norm": 14.368316188442714, "learning_rate": 7.18541806639561e-07, "loss": 0.37657079100608826, "step": 1207 }, { "epoch": 2.53781512605042, "grad_norm": 10.572863577964558, "learning_rate": 7.122409529931412e-07, "loss": 0.5544061660766602, "step": 1208 }, { "epoch": 2.5399159663865545, "grad_norm": 13.009489309703797, "learning_rate": 7.059657278737136e-07, "loss": 0.8755850791931152, "step": 1209 }, { "epoch": 2.542016806722689, "grad_norm": 10.419835233671352, "learning_rate": 6.997161687891635e-07, "loss": 0.6084367036819458, "step": 1210 }, { "epoch": 2.5441176470588234, "grad_norm": 7.424045502482636, "learning_rate": 6.934923130939692e-07, "loss": 0.3528558015823364, "step": 1211 }, { "epoch": 2.546218487394958, "grad_norm": 22.05326914016899, "learning_rate": 6.872941979889708e-07, "loss": 0.3760122060775757, "step": 1212 }, { "epoch": 2.5483193277310923, "grad_norm": 8.437103819513496, "learning_rate": 6.811218605211606e-07, "loss": 0.3798169195652008, "step": 1213 }, { "epoch": 2.5504201680672267, "grad_norm": 15.105682353848836, "learning_rate": 6.749753375834467e-07, "loss": 0.20516347885131836, "step": 1214 }, { "epoch": 2.552521008403361, "grad_norm": 15.328640967464176, "learning_rate": 6.688546659144479e-07, "loss": 0.39129936695098877, "step": 1215 }, { "epoch": 2.5546218487394956, "grad_norm": 18.19539322746799, "learning_rate": 6.627598820982595e-07, "loss": 0.5815962553024292, "step": 1216 }, { "epoch": 2.55672268907563, "grad_norm": 10.358040499956887, "learning_rate": 6.566910225642475e-07, "loss": 0.2462518960237503, "step": 1217 }, { "epoch": 2.5588235294117645, "grad_norm": 23.882665351929745, "learning_rate": 6.50648123586819e-07, "loss": 0.7295534610748291, "step": 1218 }, { "epoch": 2.560924369747899, "grad_norm": 11.419325337575849, "learning_rate": 6.446312212852162e-07, "loss": 0.4088057577610016, "step": 1219 }, { "epoch": 2.5630252100840334, "grad_norm": 18.506668669014132, "learning_rate": 6.386403516232948e-07, "loss": 0.6498621106147766, "step": 1220 }, { "epoch": 2.565126050420168, "grad_norm": 12.707186533109224, "learning_rate": 6.326755504093063e-07, "loss": 0.3554389476776123, "step": 1221 }, { "epoch": 2.5672268907563023, "grad_norm": 12.462287833643975, "learning_rate": 6.267368532956919e-07, "loss": 1.3259708881378174, "step": 1222 }, { "epoch": 2.5693277310924367, "grad_norm": 8.69258882253335, "learning_rate": 6.208242957788613e-07, "loss": 0.4336357116699219, "step": 1223 }, { "epoch": 2.571428571428571, "grad_norm": 10.998188149878677, "learning_rate": 6.14937913198988e-07, "loss": 0.6199144124984741, "step": 1224 }, { "epoch": 2.5735294117647056, "grad_norm": 20.134120954604086, "learning_rate": 6.090777407397902e-07, "loss": 1.075969934463501, "step": 1225 }, { "epoch": 2.57563025210084, "grad_norm": 9.646318558023589, "learning_rate": 6.032438134283286e-07, "loss": 0.5996450185775757, "step": 1226 }, { "epoch": 2.5777310924369745, "grad_norm": 10.633484513814087, "learning_rate": 5.974361661347889e-07, "loss": 0.37859058380126953, "step": 1227 }, { "epoch": 2.5798319327731094, "grad_norm": 18.160538186398977, "learning_rate": 5.916548335722822e-07, "loss": 0.3595309853553772, "step": 1228 }, { "epoch": 2.581932773109244, "grad_norm": 12.156397479975382, "learning_rate": 5.858998502966273e-07, "loss": 0.31986016035079956, "step": 1229 }, { "epoch": 2.5840336134453783, "grad_norm": 12.635254524437713, "learning_rate": 5.801712507061563e-07, "loss": 0.3975721597671509, "step": 1230 }, { "epoch": 2.5861344537815127, "grad_norm": 10.900463191925608, "learning_rate": 5.74469069041495e-07, "loss": 0.6717185974121094, "step": 1231 }, { "epoch": 2.588235294117647, "grad_norm": 10.60292331277609, "learning_rate": 5.687933393853718e-07, "loss": 0.6171470880508423, "step": 1232 }, { "epoch": 2.5903361344537816, "grad_norm": 11.817453815932138, "learning_rate": 5.631440956624057e-07, "loss": 0.47931092977523804, "step": 1233 }, { "epoch": 2.592436974789916, "grad_norm": 14.65524897977516, "learning_rate": 5.575213716389039e-07, "loss": 0.44013679027557373, "step": 1234 }, { "epoch": 2.5945378151260505, "grad_norm": 14.640686063418055, "learning_rate": 5.519252009226639e-07, "loss": 0.515785276889801, "step": 1235 }, { "epoch": 2.596638655462185, "grad_norm": 9.51325634200356, "learning_rate": 5.463556169627687e-07, "loss": 0.3664918541908264, "step": 1236 }, { "epoch": 2.5987394957983194, "grad_norm": 9.591516923545466, "learning_rate": 5.408126530493918e-07, "loss": 0.3711666762828827, "step": 1237 }, { "epoch": 2.600840336134454, "grad_norm": 7.071071092917334, "learning_rate": 5.352963423135893e-07, "loss": 0.12698325514793396, "step": 1238 }, { "epoch": 2.6029411764705883, "grad_norm": 8.042424735857201, "learning_rate": 5.298067177271144e-07, "loss": 0.3730424642562866, "step": 1239 }, { "epoch": 2.6050420168067228, "grad_norm": 9.069780325522164, "learning_rate": 5.243438121022077e-07, "loss": 0.6243601441383362, "step": 1240 }, { "epoch": 2.607142857142857, "grad_norm": 7.246887997362519, "learning_rate": 5.18907658091411e-07, "loss": 0.18001016974449158, "step": 1241 }, { "epoch": 2.6092436974789917, "grad_norm": 15.652638965395807, "learning_rate": 5.134982881873646e-07, "loss": 0.6635949611663818, "step": 1242 }, { "epoch": 2.611344537815126, "grad_norm": 9.642543803196963, "learning_rate": 5.081157347226201e-07, "loss": 0.4666215777397156, "step": 1243 }, { "epoch": 2.6134453781512605, "grad_norm": 9.416633968819704, "learning_rate": 5.027600298694397e-07, "loss": 0.1682681143283844, "step": 1244 }, { "epoch": 2.615546218487395, "grad_norm": 14.036854769880513, "learning_rate": 4.974312056396113e-07, "loss": 0.5077744722366333, "step": 1245 }, { "epoch": 2.6176470588235294, "grad_norm": 11.722652840072532, "learning_rate": 4.92129293884252e-07, "loss": 0.44359397888183594, "step": 1246 }, { "epoch": 2.619747899159664, "grad_norm": 15.585836072486865, "learning_rate": 4.868543262936176e-07, "loss": 1.2246967554092407, "step": 1247 }, { "epoch": 2.6218487394957983, "grad_norm": 10.770044484279795, "learning_rate": 4.816063343969196e-07, "loss": 0.32194000482559204, "step": 1248 }, { "epoch": 2.6239495798319328, "grad_norm": 11.639608924375384, "learning_rate": 4.763853495621251e-07, "loss": 0.5496278405189514, "step": 1249 }, { "epoch": 2.6260504201680672, "grad_norm": 6.671350027648182, "learning_rate": 4.7119140299578424e-07, "loss": 0.21257492899894714, "step": 1250 }, { "epoch": 2.6281512605042017, "grad_norm": 12.713785072488509, "learning_rate": 4.660245257428297e-07, "loss": 0.3104386329650879, "step": 1251 }, { "epoch": 2.630252100840336, "grad_norm": 14.031766333020213, "learning_rate": 4.6088474868640045e-07, "loss": 0.8334522843360901, "step": 1252 }, { "epoch": 2.6323529411764706, "grad_norm": 9.251230979164895, "learning_rate": 4.557721025476508e-07, "loss": 0.29882583022117615, "step": 1253 }, { "epoch": 2.634453781512605, "grad_norm": 10.2580288266136, "learning_rate": 4.5068661788557345e-07, "loss": 0.3209346830844879, "step": 1254 }, { "epoch": 2.6365546218487395, "grad_norm": 5.604118390936418, "learning_rate": 4.4562832509680963e-07, "loss": 0.15333116054534912, "step": 1255 }, { "epoch": 2.638655462184874, "grad_norm": 19.802213443985696, "learning_rate": 4.4059725441547464e-07, "loss": 0.48582714796066284, "step": 1256 }, { "epoch": 2.6407563025210083, "grad_norm": 11.514218359185726, "learning_rate": 4.355934359129699e-07, "loss": 0.4873425364494324, "step": 1257 }, { "epoch": 2.642857142857143, "grad_norm": 11.373550533887446, "learning_rate": 4.3061689949780995e-07, "loss": 0.2611161768436432, "step": 1258 }, { "epoch": 2.6449579831932772, "grad_norm": 13.616066692598451, "learning_rate": 4.2566767491543706e-07, "loss": 0.27621158957481384, "step": 1259 }, { "epoch": 2.6470588235294117, "grad_norm": 13.034515066864026, "learning_rate": 4.2074579174805173e-07, "loss": 0.849486231803894, "step": 1260 }, { "epoch": 2.649159663865546, "grad_norm": 11.86057949603211, "learning_rate": 4.1585127941442536e-07, "loss": 0.7652707099914551, "step": 1261 }, { "epoch": 2.6512605042016806, "grad_norm": 9.803056978877574, "learning_rate": 4.1098416716973457e-07, "loss": 0.27856025099754333, "step": 1262 }, { "epoch": 2.653361344537815, "grad_norm": 10.956379977903175, "learning_rate": 4.0614448410538077e-07, "loss": 0.3749684691429138, "step": 1263 }, { "epoch": 2.6554621848739495, "grad_norm": 12.001506859449199, "learning_rate": 4.01332259148815e-07, "loss": 0.6064971685409546, "step": 1264 }, { "epoch": 2.657563025210084, "grad_norm": 8.750382381092477, "learning_rate": 3.965475210633718e-07, "loss": 0.31089282035827637, "step": 1265 }, { "epoch": 2.6596638655462184, "grad_norm": 11.062101410973414, "learning_rate": 3.917902984480881e-07, "loss": 0.3686492443084717, "step": 1266 }, { "epoch": 2.661764705882353, "grad_norm": 9.181597675394137, "learning_rate": 3.870606197375415e-07, "loss": 0.5900052785873413, "step": 1267 }, { "epoch": 2.6638655462184873, "grad_norm": 11.229435985209061, "learning_rate": 3.823585132016711e-07, "loss": 0.23156413435935974, "step": 1268 }, { "epoch": 2.6659663865546217, "grad_norm": 14.580552525176778, "learning_rate": 3.776840069456189e-07, "loss": 1.1965575218200684, "step": 1269 }, { "epoch": 2.668067226890756, "grad_norm": 11.440843191964541, "learning_rate": 3.730371289095508e-07, "loss": 0.5137308835983276, "step": 1270 }, { "epoch": 2.6701680672268906, "grad_norm": 10.19938284065176, "learning_rate": 3.6841790686849897e-07, "loss": 0.2563337981700897, "step": 1271 }, { "epoch": 2.6722689075630255, "grad_norm": 12.902940912955524, "learning_rate": 3.6382636843218967e-07, "loss": 0.5659809708595276, "step": 1272 }, { "epoch": 2.67436974789916, "grad_norm": 10.012067877403453, "learning_rate": 3.592625410448813e-07, "loss": 0.4689119756221771, "step": 1273 }, { "epoch": 2.6764705882352944, "grad_norm": 7.152049482781003, "learning_rate": 3.5472645198520064e-07, "loss": 0.623033881187439, "step": 1274 }, { "epoch": 2.678571428571429, "grad_norm": 12.87568935637631, "learning_rate": 3.502181283659756e-07, "loss": 0.5805165767669678, "step": 1275 }, { "epoch": 2.6806722689075633, "grad_norm": 9.41187997958309, "learning_rate": 3.4573759713407927e-07, "loss": 0.5375624299049377, "step": 1276 }, { "epoch": 2.6827731092436977, "grad_norm": 9.91288200334237, "learning_rate": 3.4128488507026327e-07, "loss": 0.3185434341430664, "step": 1277 }, { "epoch": 2.684873949579832, "grad_norm": 15.97191633077991, "learning_rate": 3.3686001878900365e-07, "loss": 2.561387538909912, "step": 1278 }, { "epoch": 2.6869747899159666, "grad_norm": 15.172775416815085, "learning_rate": 3.324630247383337e-07, "loss": 0.5536858439445496, "step": 1279 }, { "epoch": 2.689075630252101, "grad_norm": 8.67689739732767, "learning_rate": 3.2809392919969483e-07, "loss": 0.18657177686691284, "step": 1280 }, { "epoch": 2.6911764705882355, "grad_norm": 12.844508734340609, "learning_rate": 3.2375275828777253e-07, "loss": 0.9441865086555481, "step": 1281 }, { "epoch": 2.69327731092437, "grad_norm": 9.066706121878353, "learning_rate": 3.194395379503451e-07, "loss": 0.5320143103599548, "step": 1282 }, { "epoch": 2.6953781512605044, "grad_norm": 8.980483143209002, "learning_rate": 3.151542939681235e-07, "loss": 0.5943700075149536, "step": 1283 }, { "epoch": 2.697478991596639, "grad_norm": 54.073743939162, "learning_rate": 3.108970519546034e-07, "loss": 1.0508530139923096, "step": 1284 }, { "epoch": 2.6995798319327733, "grad_norm": 10.409633106680213, "learning_rate": 3.066678373559062e-07, "loss": 0.3096291124820709, "step": 1285 }, { "epoch": 2.7016806722689077, "grad_norm": 17.871716634928575, "learning_rate": 3.0246667545063057e-07, "loss": 1.133009910583496, "step": 1286 }, { "epoch": 2.703781512605042, "grad_norm": 10.727929813588565, "learning_rate": 2.9829359134970206e-07, "loss": 0.3362637162208557, "step": 1287 }, { "epoch": 2.7058823529411766, "grad_norm": 10.027811395629024, "learning_rate": 2.9414860999621764e-07, "loss": 0.9418044090270996, "step": 1288 }, { "epoch": 2.707983193277311, "grad_norm": 8.072200583551933, "learning_rate": 2.9003175616530264e-07, "loss": 0.2674849033355713, "step": 1289 }, { "epoch": 2.7100840336134455, "grad_norm": 7.662563052553184, "learning_rate": 2.8594305446396245e-07, "loss": 0.39476725459098816, "step": 1290 }, { "epoch": 2.71218487394958, "grad_norm": 7.931717377933664, "learning_rate": 2.818825293309274e-07, "loss": 0.5461002588272095, "step": 1291 }, { "epoch": 2.7142857142857144, "grad_norm": 9.069033415947747, "learning_rate": 2.7785020503651783e-07, "loss": 0.36206185817718506, "step": 1292 }, { "epoch": 2.716386554621849, "grad_norm": 11.269854164923549, "learning_rate": 2.7384610568249313e-07, "loss": 0.33151179552078247, "step": 1293 }, { "epoch": 2.7184873949579833, "grad_norm": 10.182183999098427, "learning_rate": 2.698702552019045e-07, "loss": 0.3465487062931061, "step": 1294 }, { "epoch": 2.7205882352941178, "grad_norm": 12.712025830447253, "learning_rate": 2.659226773589607e-07, "loss": 0.22317005693912506, "step": 1295 }, { "epoch": 2.722689075630252, "grad_norm": 11.83879887595397, "learning_rate": 2.620033957488777e-07, "loss": 0.34791454672813416, "step": 1296 }, { "epoch": 2.7247899159663866, "grad_norm": 12.416917803129223, "learning_rate": 2.581124337977425e-07, "loss": 0.4211697578430176, "step": 1297 }, { "epoch": 2.726890756302521, "grad_norm": 15.495534828622619, "learning_rate": 2.542498147623701e-07, "loss": 0.4095291495323181, "step": 1298 }, { "epoch": 2.7289915966386555, "grad_norm": 6.6678037455089925, "learning_rate": 2.50415561730169e-07, "loss": 0.2518484592437744, "step": 1299 }, { "epoch": 2.73109243697479, "grad_norm": 8.694983560441388, "learning_rate": 2.4660969761899576e-07, "loss": 0.21484610438346863, "step": 1300 }, { "epoch": 2.7331932773109244, "grad_norm": 11.820763178851392, "learning_rate": 2.428322451770276e-07, "loss": 0.39412614703178406, "step": 1301 }, { "epoch": 2.735294117647059, "grad_norm": 11.877133639126868, "learning_rate": 2.3908322698261597e-07, "loss": 0.34464430809020996, "step": 1302 }, { "epoch": 2.7373949579831933, "grad_norm": 10.16702078484984, "learning_rate": 2.3536266544416043e-07, "loss": 0.5757449865341187, "step": 1303 }, { "epoch": 2.7394957983193278, "grad_norm": 12.93026525257059, "learning_rate": 2.3167058279997156e-07, "loss": 0.7968210577964783, "step": 1304 }, { "epoch": 2.741596638655462, "grad_norm": 10.635434378996248, "learning_rate": 2.2800700111813456e-07, "loss": 0.40927547216415405, "step": 1305 }, { "epoch": 2.7436974789915967, "grad_norm": 9.037444336220418, "learning_rate": 2.2437194229638415e-07, "loss": 0.23368996381759644, "step": 1306 }, { "epoch": 2.745798319327731, "grad_norm": 128.54979302169804, "learning_rate": 2.2076542806196588e-07, "loss": 0.7368482351303101, "step": 1307 }, { "epoch": 2.7478991596638656, "grad_norm": 8.202648198989193, "learning_rate": 2.17187479971514e-07, "loss": 0.29558128118515015, "step": 1308 }, { "epoch": 2.75, "grad_norm": 6.885187685182277, "learning_rate": 2.136381194109166e-07, "loss": 0.2764503061771393, "step": 1309 }, { "epoch": 2.7521008403361344, "grad_norm": 8.730199073100707, "learning_rate": 2.1011736759519286e-07, "loss": 0.3793492615222931, "step": 1310 }, { "epoch": 2.754201680672269, "grad_norm": 8.631276466119623, "learning_rate": 2.0662524556835982e-07, "loss": 0.5927262902259827, "step": 1311 }, { "epoch": 2.7563025210084033, "grad_norm": 12.625981527108426, "learning_rate": 2.0316177420331375e-07, "loss": 0.4284164607524872, "step": 1312 }, { "epoch": 2.758403361344538, "grad_norm": 9.329175719292097, "learning_rate": 1.997269742016994e-07, "loss": 0.4722291827201843, "step": 1313 }, { "epoch": 2.7605042016806722, "grad_norm": 17.532238777546283, "learning_rate": 1.9632086609379041e-07, "loss": 0.6627257466316223, "step": 1314 }, { "epoch": 2.7626050420168067, "grad_norm": 9.92918276948977, "learning_rate": 1.929434702383648e-07, "loss": 0.42083340883255005, "step": 1315 }, { "epoch": 2.764705882352941, "grad_norm": 9.589305807880846, "learning_rate": 1.895948068225828e-07, "loss": 0.39910781383514404, "step": 1316 }, { "epoch": 2.7668067226890756, "grad_norm": 11.560419759358716, "learning_rate": 1.862748958618682e-07, "loss": 0.2765321731567383, "step": 1317 }, { "epoch": 2.76890756302521, "grad_norm": 6.28366130869059, "learning_rate": 1.8298375719978501e-07, "loss": 0.08827929198741913, "step": 1318 }, { "epoch": 2.7710084033613445, "grad_norm": 10.943149316905583, "learning_rate": 1.797214105079248e-07, "loss": 0.5753570795059204, "step": 1319 }, { "epoch": 2.773109243697479, "grad_norm": 13.46621194548743, "learning_rate": 1.7648787528578127e-07, "loss": 0.7518602013587952, "step": 1320 }, { "epoch": 2.7752100840336134, "grad_norm": 9.047171390898557, "learning_rate": 1.732831708606425e-07, "loss": 0.6446128487586975, "step": 1321 }, { "epoch": 2.777310924369748, "grad_norm": 13.738460244304907, "learning_rate": 1.7010731638746668e-07, "loss": 0.4714201092720032, "step": 1322 }, { "epoch": 2.7794117647058822, "grad_norm": 9.086358928536246, "learning_rate": 1.669603308487755e-07, "loss": 0.23203890025615692, "step": 1323 }, { "epoch": 2.7815126050420167, "grad_norm": 25.798347784352785, "learning_rate": 1.6384223305453417e-07, "loss": 0.5102007389068604, "step": 1324 }, { "epoch": 2.783613445378151, "grad_norm": 11.210651055014003, "learning_rate": 1.6075304164204385e-07, "loss": 0.45608770847320557, "step": 1325 }, { "epoch": 2.7857142857142856, "grad_norm": 7.6934549954682465, "learning_rate": 1.5769277507582725e-07, "loss": 0.5190253257751465, "step": 1326 }, { "epoch": 2.78781512605042, "grad_norm": 13.485529024983622, "learning_rate": 1.5466145164751977e-07, "loss": 0.5670579075813293, "step": 1327 }, { "epoch": 2.7899159663865545, "grad_norm": 8.323766066498216, "learning_rate": 1.5165908947575914e-07, "loss": 0.4676046073436737, "step": 1328 }, { "epoch": 2.792016806722689, "grad_norm": 11.23683105022603, "learning_rate": 1.4868570650607816e-07, "loss": 0.2914016544818878, "step": 1329 }, { "epoch": 2.7941176470588234, "grad_norm": 17.258281880666775, "learning_rate": 1.4574132051079658e-07, "loss": 1.312021017074585, "step": 1330 }, { "epoch": 2.796218487394958, "grad_norm": 9.249285303088671, "learning_rate": 1.4282594908891666e-07, "loss": 0.3117330074310303, "step": 1331 }, { "epoch": 2.7983193277310923, "grad_norm": 9.999090012754882, "learning_rate": 1.3993960966601328e-07, "loss": 0.2705899775028229, "step": 1332 }, { "epoch": 2.8004201680672267, "grad_norm": 9.383011281190877, "learning_rate": 1.3708231949413676e-07, "loss": 0.2621600031852722, "step": 1333 }, { "epoch": 2.802521008403361, "grad_norm": 11.854334740139995, "learning_rate": 1.342540956517041e-07, "loss": 0.40849626064300537, "step": 1334 }, { "epoch": 2.8046218487394956, "grad_norm": 10.05169136975745, "learning_rate": 1.3145495504339856e-07, "loss": 0.2958400845527649, "step": 1335 }, { "epoch": 2.80672268907563, "grad_norm": 13.23608329431821, "learning_rate": 1.2868491440007015e-07, "loss": 0.7148715257644653, "step": 1336 }, { "epoch": 2.8088235294117645, "grad_norm": 11.059610717001991, "learning_rate": 1.2594399027863302e-07, "loss": 0.5344212055206299, "step": 1337 }, { "epoch": 2.810924369747899, "grad_norm": 9.850144807315097, "learning_rate": 1.232321990619695e-07, "loss": 0.3390062749385834, "step": 1338 }, { "epoch": 2.8130252100840334, "grad_norm": 12.782218028007712, "learning_rate": 1.205495569588283e-07, "loss": 0.6602462530136108, "step": 1339 }, { "epoch": 2.815126050420168, "grad_norm": 9.16202056000073, "learning_rate": 1.1789608000373209e-07, "loss": 0.2165951430797577, "step": 1340 }, { "epoch": 2.8172268907563023, "grad_norm": 9.639282432785762, "learning_rate": 1.1527178405687845e-07, "loss": 0.33785703778266907, "step": 1341 }, { "epoch": 2.8193277310924367, "grad_norm": 16.018225078825093, "learning_rate": 1.1267668480404559e-07, "loss": 0.49403730034828186, "step": 1342 }, { "epoch": 2.821428571428571, "grad_norm": 13.934169182843426, "learning_rate": 1.1011079775649969e-07, "loss": 0.5875406265258789, "step": 1343 }, { "epoch": 2.8235294117647056, "grad_norm": 10.314949862812936, "learning_rate": 1.0757413825090212e-07, "loss": 0.4375740885734558, "step": 1344 }, { "epoch": 2.82563025210084, "grad_norm": 11.258449104141572, "learning_rate": 1.0506672144921515e-07, "loss": 0.6797425746917725, "step": 1345 }, { "epoch": 2.8277310924369745, "grad_norm": 10.94330965699598, "learning_rate": 1.0258856233861524e-07, "loss": 0.36085596680641174, "step": 1346 }, { "epoch": 2.8298319327731094, "grad_norm": 9.174555872255727, "learning_rate": 1.0013967573140216e-07, "loss": 0.43387356400489807, "step": 1347 }, { "epoch": 2.831932773109244, "grad_norm": 11.299994940843328, "learning_rate": 9.77200762649072e-08, "loss": 0.44897180795669556, "step": 1348 }, { "epoch": 2.8340336134453783, "grad_norm": 10.295685178570979, "learning_rate": 9.532977840141123e-08, "loss": 0.22422294318675995, "step": 1349 }, { "epoch": 2.8361344537815127, "grad_norm": 8.127889054633478, "learning_rate": 9.29687964280529e-08, "loss": 0.642038106918335, "step": 1350 }, { "epoch": 2.838235294117647, "grad_norm": 11.9364629190832, "learning_rate": 9.063714445674776e-08, "loss": 0.8069763779640198, "step": 1351 }, { "epoch": 2.8403361344537816, "grad_norm": 11.63460546823257, "learning_rate": 8.833483642410101e-08, "loss": 0.36828362941741943, "step": 1352 }, { "epoch": 2.842436974789916, "grad_norm": 10.042654306225293, "learning_rate": 8.606188609132593e-08, "loss": 0.3019287586212158, "step": 1353 }, { "epoch": 2.8445378151260505, "grad_norm": 10.265644800483537, "learning_rate": 8.381830704415839e-08, "loss": 0.8440870046615601, "step": 1354 }, { "epoch": 2.846638655462185, "grad_norm": 11.67889502498505, "learning_rate": 8.160411269278079e-08, "loss": 2.0406436920166016, "step": 1355 }, { "epoch": 2.8487394957983194, "grad_norm": 11.659752416837614, "learning_rate": 7.941931627173827e-08, "loss": 0.23328936100006104, "step": 1356 }, { "epoch": 2.850840336134454, "grad_norm": 14.52369655098527, "learning_rate": 7.726393083985929e-08, "loss": 0.552147626876831, "step": 1357 }, { "epoch": 2.8529411764705883, "grad_norm": 8.467901286703713, "learning_rate": 7.513796928018069e-08, "loss": 0.38458627462387085, "step": 1358 }, { "epoch": 2.8550420168067228, "grad_norm": 7.6124021321848, "learning_rate": 7.30414442998667e-08, "loss": 0.3594217300415039, "step": 1359 }, { "epoch": 2.857142857142857, "grad_norm": 12.272607778978339, "learning_rate": 7.097436843013783e-08, "loss": 0.5628789067268372, "step": 1360 }, { "epoch": 2.8592436974789917, "grad_norm": 9.345724512814346, "learning_rate": 6.893675402618982e-08, "loss": 0.7206631898880005, "step": 1361 }, { "epoch": 2.861344537815126, "grad_norm": 13.88429967852116, "learning_rate": 6.692861326712652e-08, "loss": 0.8038681745529175, "step": 1362 }, { "epoch": 2.8634453781512605, "grad_norm": 15.55581945591023, "learning_rate": 6.494995815588101e-08, "loss": 0.7214268445968628, "step": 1363 }, { "epoch": 2.865546218487395, "grad_norm": 10.920690128080313, "learning_rate": 6.300080051914792e-08, "loss": 0.3757812976837158, "step": 1364 }, { "epoch": 2.8676470588235294, "grad_norm": 9.02181459032139, "learning_rate": 6.108115200731069e-08, "loss": 0.7154731154441833, "step": 1365 }, { "epoch": 2.869747899159664, "grad_norm": 12.825891467379778, "learning_rate": 5.9191024094374384e-08, "loss": 0.6805951595306396, "step": 1366 }, { "epoch": 2.8718487394957983, "grad_norm": 10.058469083040828, "learning_rate": 5.7330428077893575e-08, "loss": 0.41078895330429077, "step": 1367 }, { "epoch": 2.8739495798319328, "grad_norm": 8.36551151872813, "learning_rate": 5.5499375078906793e-08, "loss": 0.35648801922798157, "step": 1368 }, { "epoch": 2.8760504201680672, "grad_norm": 18.110033778975207, "learning_rate": 5.369787604186993e-08, "loss": 0.3897348642349243, "step": 1369 }, { "epoch": 2.8781512605042017, "grad_norm": 8.750407331993259, "learning_rate": 5.192594173459242e-08, "loss": 0.613540530204773, "step": 1370 }, { "epoch": 2.880252100840336, "grad_norm": 8.528139704622195, "learning_rate": 5.018358274816892e-08, "loss": 0.4445531964302063, "step": 1371 }, { "epoch": 2.8823529411764706, "grad_norm": 11.859807560110708, "learning_rate": 4.847080949691996e-08, "loss": 0.5488522052764893, "step": 1372 }, { "epoch": 2.884453781512605, "grad_norm": 10.23407853457865, "learning_rate": 4.6787632218326385e-08, "loss": 0.5596367716789246, "step": 1373 }, { "epoch": 2.8865546218487395, "grad_norm": 11.296256406092558, "learning_rate": 4.513406097297224e-08, "loss": 0.38018864393234253, "step": 1374 }, { "epoch": 2.888655462184874, "grad_norm": 12.156451974202069, "learning_rate": 4.351010564447977e-08, "loss": 0.661139726638794, "step": 1375 }, { "epoch": 2.8907563025210083, "grad_norm": 7.3170273092091485, "learning_rate": 4.1915775939454506e-08, "loss": 0.2944487929344177, "step": 1376 }, { "epoch": 2.892857142857143, "grad_norm": 9.331145503425732, "learning_rate": 4.035108138742416e-08, "loss": 0.23486556112766266, "step": 1377 }, { "epoch": 2.8949579831932772, "grad_norm": 11.863919468865829, "learning_rate": 3.881603134078482e-08, "loss": 0.3030620813369751, "step": 1378 }, { "epoch": 2.8970588235294117, "grad_norm": 12.573022210864796, "learning_rate": 3.731063497474152e-08, "loss": 0.3213701546192169, "step": 1379 }, { "epoch": 2.899159663865546, "grad_norm": 6.674326596763006, "learning_rate": 3.583490128725553e-08, "loss": 0.22970488667488098, "step": 1380 }, { "epoch": 2.9012605042016806, "grad_norm": 13.577742623896349, "learning_rate": 3.4388839098992154e-08, "loss": 0.32231050729751587, "step": 1381 }, { "epoch": 2.903361344537815, "grad_norm": 16.538926889757885, "learning_rate": 3.2972457053262466e-08, "loss": 0.9544304609298706, "step": 1382 }, { "epoch": 2.9054621848739495, "grad_norm": 20.22528777991979, "learning_rate": 3.158576361597887e-08, "loss": 0.7788558006286621, "step": 1383 }, { "epoch": 2.907563025210084, "grad_norm": 16.130951952781945, "learning_rate": 3.022876707559796e-08, "loss": 0.2601931393146515, "step": 1384 }, { "epoch": 2.9096638655462184, "grad_norm": 13.778242873434662, "learning_rate": 2.890147554307665e-08, "loss": 0.5957424640655518, "step": 1385 }, { "epoch": 2.911764705882353, "grad_norm": 10.295105075509534, "learning_rate": 2.7603896951817755e-08, "loss": 0.2927376925945282, "step": 1386 }, { "epoch": 2.9138655462184873, "grad_norm": 18.743122797679717, "learning_rate": 2.633603905762838e-08, "loss": 0.5990405082702637, "step": 1387 }, { "epoch": 2.9159663865546217, "grad_norm": 14.887502358752755, "learning_rate": 2.5097909438669964e-08, "loss": 0.4513130784034729, "step": 1388 }, { "epoch": 2.918067226890756, "grad_norm": 9.564277783357335, "learning_rate": 2.3889515495413297e-08, "loss": 0.6215352416038513, "step": 1389 }, { "epoch": 2.9201680672268906, "grad_norm": 5.003008688132311, "learning_rate": 2.2710864450596336e-08, "loss": 0.33804643154144287, "step": 1390 }, { "epoch": 2.9222689075630255, "grad_norm": 9.060022958520825, "learning_rate": 2.1561963349178704e-08, "loss": 0.4266011416912079, "step": 1391 }, { "epoch": 2.92436974789916, "grad_norm": 9.49932040181115, "learning_rate": 2.0442819058300588e-08, "loss": 0.3738781809806824, "step": 1392 }, { "epoch": 2.9264705882352944, "grad_norm": 11.474699381578137, "learning_rate": 1.935343826724112e-08, "loss": 0.26019287109375, "step": 1393 }, { "epoch": 2.928571428571429, "grad_norm": 9.426307307224148, "learning_rate": 1.8293827487380623e-08, "loss": 0.3799281120300293, "step": 1394 }, { "epoch": 2.9306722689075633, "grad_norm": 12.711822585165105, "learning_rate": 1.726399305215787e-08, "loss": 0.25459083914756775, "step": 1395 }, { "epoch": 2.9327731092436977, "grad_norm": 12.88439286989085, "learning_rate": 1.626394111703622e-08, "loss": 0.4746205806732178, "step": 1396 }, { "epoch": 2.934873949579832, "grad_norm": 15.570716719123634, "learning_rate": 1.5293677659463104e-08, "loss": 0.4622001647949219, "step": 1397 }, { "epoch": 2.9369747899159666, "grad_norm": 6.94964938645385, "learning_rate": 1.4353208478837256e-08, "loss": 0.18047931790351868, "step": 1398 }, { "epoch": 2.939075630252101, "grad_norm": 10.000416990177895, "learning_rate": 1.3442539196472647e-08, "loss": 0.37007540464401245, "step": 1399 }, { "epoch": 2.9411764705882355, "grad_norm": 8.931832554567432, "learning_rate": 1.2561675255564621e-08, "loss": 0.7158060073852539, "step": 1400 }, { "epoch": 2.94327731092437, "grad_norm": 11.017696540531707, "learning_rate": 1.1710621921159904e-08, "loss": 0.9123420119285583, "step": 1401 }, { "epoch": 2.9453781512605044, "grad_norm": 13.459702577958145, "learning_rate": 1.0889384280119985e-08, "loss": 1.1057486534118652, "step": 1402 }, { "epoch": 2.947478991596639, "grad_norm": 11.44996683672279, "learning_rate": 1.009796724109613e-08, "loss": 0.36926376819610596, "step": 1403 }, { "epoch": 2.9495798319327733, "grad_norm": 8.32543080681241, "learning_rate": 9.336375534497732e-09, "loss": 0.5240511298179626, "step": 1404 }, { "epoch": 2.9516806722689077, "grad_norm": 16.41897161685657, "learning_rate": 8.60461371246235e-09, "loss": 1.0361064672470093, "step": 1405 }, { "epoch": 2.953781512605042, "grad_norm": 15.488903163881536, "learning_rate": 7.902686148831273e-09, "loss": 0.7314852476119995, "step": 1406 }, { "epoch": 2.9558823529411766, "grad_norm": 10.609488526695282, "learning_rate": 7.230597039123433e-09, "loss": 0.5929103493690491, "step": 1407 }, { "epoch": 2.957983193277311, "grad_norm": 9.545516911394982, "learning_rate": 6.588350400507093e-09, "loss": 0.24979953467845917, "step": 1408 }, { "epoch": 2.9600840336134455, "grad_norm": 16.112825448357878, "learning_rate": 5.975950071779313e-09, "loss": 0.810958206653595, "step": 1409 }, { "epoch": 2.96218487394958, "grad_norm": 8.6763799711489, "learning_rate": 5.393399713341518e-09, "loss": 0.4567590355873108, "step": 1410 }, { "epoch": 2.9642857142857144, "grad_norm": 10.068385761335634, "learning_rate": 4.8407028071773e-09, "loss": 0.34989726543426514, "step": 1411 }, { "epoch": 2.966386554621849, "grad_norm": 11.30284408928835, "learning_rate": 4.317862656831873e-09, "loss": 0.3826170563697815, "step": 1412 }, { "epoch": 2.9684873949579833, "grad_norm": 13.404023548287954, "learning_rate": 3.8248823873932026e-09, "loss": 0.25103145837783813, "step": 1413 }, { "epoch": 2.9705882352941178, "grad_norm": 10.423177049027613, "learning_rate": 3.361764945473134e-09, "loss": 0.33963871002197266, "step": 1414 }, { "epoch": 2.972689075630252, "grad_norm": 9.701343246515489, "learning_rate": 2.928513099187402e-09, "loss": 0.5596168637275696, "step": 1415 }, { "epoch": 2.9747899159663866, "grad_norm": 8.852327800983687, "learning_rate": 2.52512943814176e-09, "loss": 0.3114224374294281, "step": 1416 }, { "epoch": 2.976890756302521, "grad_norm": 8.400624424787871, "learning_rate": 2.151616373417542e-09, "loss": 0.5350728631019592, "step": 1417 }, { "epoch": 2.9789915966386555, "grad_norm": 10.794481012917993, "learning_rate": 1.8079761375522365e-09, "loss": 0.6644730567932129, "step": 1418 }, { "epoch": 2.98109243697479, "grad_norm": 9.282496929164791, "learning_rate": 1.4942107845317132e-09, "loss": 0.2426847219467163, "step": 1419 }, { "epoch": 2.9831932773109244, "grad_norm": 9.113139352861424, "learning_rate": 1.210322189774682e-09, "loss": 0.2127893567085266, "step": 1420 }, { "epoch": 2.985294117647059, "grad_norm": 10.249299245135052, "learning_rate": 9.563120501221434e-10, "loss": 0.31507742404937744, "step": 1421 }, { "epoch": 2.9873949579831933, "grad_norm": 10.643798027898825, "learning_rate": 7.321818838279537e-10, "loss": 0.49292629957199097, "step": 1422 }, { "epoch": 2.9894957983193278, "grad_norm": 7.762285292055822, "learning_rate": 5.379330305488317e-10, "loss": 0.25357064604759216, "step": 1423 }, { "epoch": 2.991596638655462, "grad_norm": 7.423869479037056, "learning_rate": 3.735666513371428e-10, "loss": 0.4229947328567505, "step": 1424 }, { "epoch": 2.9936974789915967, "grad_norm": 10.794639890750766, "learning_rate": 2.3908372863368223e-10, "loss": 0.5679960250854492, "step": 1425 }, { "epoch": 2.995798319327731, "grad_norm": 9.814210260546373, "learning_rate": 1.344850662604591e-10, "loss": 0.3406621515750885, "step": 1426 }, { "epoch": 2.9978991596638656, "grad_norm": 7.52747077028302, "learning_rate": 5.977128941903055e-11, "loss": 0.3986052870750427, "step": 1427 }, { "epoch": 3.0, "grad_norm": 8.379236977666347, "learning_rate": 1.494284468384066e-11, "loss": 0.49183082580566406, "step": 1428 }, { "epoch": 3.0, "step": 1428, "total_flos": 3902317486080.0, "train_loss": 1.6620939874066776, "train_runtime": 1853.0016, "train_samples_per_second": 3.081, "train_steps_per_second": 0.771 } ], "logging_steps": 1, "max_steps": 1428, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3902317486080.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }