{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 5712, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005252100840336134, "grad_norm": 34.372692050718165, "learning_rate": 0.0, "loss": 4.102292060852051, "step": 1 }, { "epoch": 0.0010504201680672268, "grad_norm": 36.51762099089081, "learning_rate": 1.7482517482517483e-08, "loss": 4.162827491760254, "step": 2 }, { "epoch": 0.0015756302521008404, "grad_norm": 39.35112829697694, "learning_rate": 3.4965034965034967e-08, "loss": 4.137117385864258, "step": 3 }, { "epoch": 0.0021008403361344537, "grad_norm": 31.085406362496506, "learning_rate": 5.244755244755245e-08, "loss": 4.171526908874512, "step": 4 }, { "epoch": 0.0026260504201680674, "grad_norm": 36.90007791074726, "learning_rate": 6.993006993006993e-08, "loss": 4.477399826049805, "step": 5 }, { "epoch": 0.0031512605042016808, "grad_norm": 33.27901393600758, "learning_rate": 8.741258741258742e-08, "loss": 4.03668212890625, "step": 6 }, { "epoch": 0.003676470588235294, "grad_norm": 38.00002780928001, "learning_rate": 1.048951048951049e-07, "loss": 4.246670722961426, "step": 7 }, { "epoch": 0.004201680672268907, "grad_norm": 36.001513542837934, "learning_rate": 1.223776223776224e-07, "loss": 3.781754732131958, "step": 8 }, { "epoch": 0.004726890756302521, "grad_norm": 39.13792126575477, "learning_rate": 1.3986013986013987e-07, "loss": 4.13449764251709, "step": 9 }, { "epoch": 0.005252100840336135, "grad_norm": 34.251627318099416, "learning_rate": 1.5734265734265737e-07, "loss": 4.231945037841797, "step": 10 }, { "epoch": 0.005777310924369748, "grad_norm": 38.693008481778556, "learning_rate": 1.7482517482517484e-07, "loss": 4.490153789520264, "step": 11 }, { "epoch": 0.0063025210084033615, "grad_norm": 28.03685486294642, "learning_rate": 1.9230769230769234e-07, "loss": 3.567911148071289, "step": 12 }, { "epoch": 0.006827731092436975, "grad_norm": 32.79480844249344, "learning_rate": 2.097902097902098e-07, "loss": 3.733537435531616, "step": 13 }, { "epoch": 0.007352941176470588, "grad_norm": 47.860697756169074, "learning_rate": 2.2727272727272729e-07, "loss": 4.282741546630859, "step": 14 }, { "epoch": 0.007878151260504201, "grad_norm": 48.34040544579808, "learning_rate": 2.447552447552448e-07, "loss": 4.783939361572266, "step": 15 }, { "epoch": 0.008403361344537815, "grad_norm": 27.610574129516216, "learning_rate": 2.622377622377623e-07, "loss": 3.809292793273926, "step": 16 }, { "epoch": 0.008928571428571428, "grad_norm": 38.61921628311389, "learning_rate": 2.7972027972027973e-07, "loss": 4.540763854980469, "step": 17 }, { "epoch": 0.009453781512605041, "grad_norm": 38.36264754368808, "learning_rate": 2.9720279720279723e-07, "loss": 4.1531805992126465, "step": 18 }, { "epoch": 0.009978991596638655, "grad_norm": 31.561418642945256, "learning_rate": 3.1468531468531473e-07, "loss": 3.653745174407959, "step": 19 }, { "epoch": 0.01050420168067227, "grad_norm": 33.74062557341127, "learning_rate": 3.321678321678322e-07, "loss": 4.493264198303223, "step": 20 }, { "epoch": 0.011029411764705883, "grad_norm": 32.8043036933105, "learning_rate": 3.496503496503497e-07, "loss": 3.8892321586608887, "step": 21 }, { "epoch": 0.011554621848739496, "grad_norm": 40.810783301273766, "learning_rate": 3.6713286713286713e-07, "loss": 4.073638439178467, "step": 22 }, { "epoch": 0.01207983193277311, "grad_norm": 38.157265350961374, "learning_rate": 3.846153846153847e-07, "loss": 4.504125118255615, "step": 23 }, { "epoch": 0.012605042016806723, "grad_norm": 35.474059489035355, "learning_rate": 4.020979020979021e-07, "loss": 4.312219619750977, "step": 24 }, { "epoch": 0.013130252100840336, "grad_norm": 29.174644468083308, "learning_rate": 4.195804195804196e-07, "loss": 3.979979991912842, "step": 25 }, { "epoch": 0.01365546218487395, "grad_norm": 31.827136755689573, "learning_rate": 4.3706293706293707e-07, "loss": 4.110677242279053, "step": 26 }, { "epoch": 0.014180672268907563, "grad_norm": 38.844533989686525, "learning_rate": 4.5454545454545457e-07, "loss": 4.600620269775391, "step": 27 }, { "epoch": 0.014705882352941176, "grad_norm": 33.36776770479228, "learning_rate": 4.72027972027972e-07, "loss": 4.107484817504883, "step": 28 }, { "epoch": 0.01523109243697479, "grad_norm": 33.2123109875993, "learning_rate": 4.895104895104896e-07, "loss": 3.9041717052459717, "step": 29 }, { "epoch": 0.015756302521008403, "grad_norm": 31.29738620396506, "learning_rate": 5.06993006993007e-07, "loss": 4.2493743896484375, "step": 30 }, { "epoch": 0.016281512605042018, "grad_norm": 37.28210473967995, "learning_rate": 5.244755244755246e-07, "loss": 4.2800374031066895, "step": 31 }, { "epoch": 0.01680672268907563, "grad_norm": 30.510043067679604, "learning_rate": 5.41958041958042e-07, "loss": 4.002791881561279, "step": 32 }, { "epoch": 0.017331932773109245, "grad_norm": 26.80401075149564, "learning_rate": 5.594405594405595e-07, "loss": 3.7842516899108887, "step": 33 }, { "epoch": 0.017857142857142856, "grad_norm": 25.100534103543946, "learning_rate": 5.76923076923077e-07, "loss": 2.3941800594329834, "step": 34 }, { "epoch": 0.01838235294117647, "grad_norm": 31.241036694937726, "learning_rate": 5.944055944055945e-07, "loss": 4.037428379058838, "step": 35 }, { "epoch": 0.018907563025210083, "grad_norm": 25.681059449130732, "learning_rate": 6.118881118881119e-07, "loss": 3.7350282669067383, "step": 36 }, { "epoch": 0.019432773109243698, "grad_norm": 22.999454428628805, "learning_rate": 6.293706293706295e-07, "loss": 3.6307826042175293, "step": 37 }, { "epoch": 0.01995798319327731, "grad_norm": 25.210696458344774, "learning_rate": 6.468531468531469e-07, "loss": 3.746466636657715, "step": 38 }, { "epoch": 0.020483193277310924, "grad_norm": 30.80804783213676, "learning_rate": 6.643356643356644e-07, "loss": 3.945478677749634, "step": 39 }, { "epoch": 0.02100840336134454, "grad_norm": 23.155886619235893, "learning_rate": 6.818181818181818e-07, "loss": 3.9150636196136475, "step": 40 }, { "epoch": 0.02153361344537815, "grad_norm": 23.936510414650403, "learning_rate": 6.993006993006994e-07, "loss": 3.6661462783813477, "step": 41 }, { "epoch": 0.022058823529411766, "grad_norm": 38.55267469232255, "learning_rate": 7.167832167832168e-07, "loss": 4.267165184020996, "step": 42 }, { "epoch": 0.022584033613445378, "grad_norm": 18.85328597089255, "learning_rate": 7.342657342657343e-07, "loss": 3.355626106262207, "step": 43 }, { "epoch": 0.023109243697478993, "grad_norm": 19.375880502190483, "learning_rate": 7.517482517482517e-07, "loss": 3.7092792987823486, "step": 44 }, { "epoch": 0.023634453781512604, "grad_norm": 15.164586828853214, "learning_rate": 7.692307692307694e-07, "loss": 3.6890788078308105, "step": 45 }, { "epoch": 0.02415966386554622, "grad_norm": 15.346695121215316, "learning_rate": 7.867132867132868e-07, "loss": 2.885910987854004, "step": 46 }, { "epoch": 0.02468487394957983, "grad_norm": 15.946481065675485, "learning_rate": 8.041958041958043e-07, "loss": 3.008267879486084, "step": 47 }, { "epoch": 0.025210084033613446, "grad_norm": 21.26001098082826, "learning_rate": 8.216783216783217e-07, "loss": 3.832730770111084, "step": 48 }, { "epoch": 0.025735294117647058, "grad_norm": 14.434196417210957, "learning_rate": 8.391608391608393e-07, "loss": 3.1919186115264893, "step": 49 }, { "epoch": 0.026260504201680673, "grad_norm": 14.09245979625255, "learning_rate": 8.566433566433567e-07, "loss": 3.3092079162597656, "step": 50 }, { "epoch": 0.026785714285714284, "grad_norm": 14.105978255824217, "learning_rate": 8.741258741258741e-07, "loss": 3.7159504890441895, "step": 51 }, { "epoch": 0.0273109243697479, "grad_norm": 11.33389583742992, "learning_rate": 8.916083916083917e-07, "loss": 2.8575963973999023, "step": 52 }, { "epoch": 0.027836134453781514, "grad_norm": 13.7592294205824, "learning_rate": 9.090909090909091e-07, "loss": 3.6637725830078125, "step": 53 }, { "epoch": 0.028361344537815126, "grad_norm": 21.452492952094683, "learning_rate": 9.265734265734266e-07, "loss": 3.2712621688842773, "step": 54 }, { "epoch": 0.02888655462184874, "grad_norm": 18.171724171552793, "learning_rate": 9.44055944055944e-07, "loss": 4.053844451904297, "step": 55 }, { "epoch": 0.029411764705882353, "grad_norm": 12.70889446822849, "learning_rate": 9.615384615384617e-07, "loss": 3.6038215160369873, "step": 56 }, { "epoch": 0.029936974789915968, "grad_norm": 18.06293758360709, "learning_rate": 9.790209790209791e-07, "loss": 3.310293674468994, "step": 57 }, { "epoch": 0.03046218487394958, "grad_norm": 16.298729106431914, "learning_rate": 9.965034965034966e-07, "loss": 3.351804256439209, "step": 58 }, { "epoch": 0.030987394957983194, "grad_norm": 11.66177063101149, "learning_rate": 1.013986013986014e-06, "loss": 3.5310189723968506, "step": 59 }, { "epoch": 0.031512605042016806, "grad_norm": 16.79127622060395, "learning_rate": 1.0314685314685317e-06, "loss": 3.427992343902588, "step": 60 }, { "epoch": 0.03203781512605042, "grad_norm": 10.889976358660403, "learning_rate": 1.0489510489510491e-06, "loss": 3.3590340614318848, "step": 61 }, { "epoch": 0.032563025210084036, "grad_norm": 22.24062386613456, "learning_rate": 1.0664335664335666e-06, "loss": 3.339024305343628, "step": 62 }, { "epoch": 0.03308823529411765, "grad_norm": 14.853723489288882, "learning_rate": 1.083916083916084e-06, "loss": 3.7138495445251465, "step": 63 }, { "epoch": 0.03361344537815126, "grad_norm": 20.294398852176837, "learning_rate": 1.1013986013986015e-06, "loss": 4.287203311920166, "step": 64 }, { "epoch": 0.03413865546218487, "grad_norm": 15.44050443054915, "learning_rate": 1.118881118881119e-06, "loss": 3.9542369842529297, "step": 65 }, { "epoch": 0.03466386554621849, "grad_norm": 43.24480760624098, "learning_rate": 1.1363636363636364e-06, "loss": 4.417222023010254, "step": 66 }, { "epoch": 0.0351890756302521, "grad_norm": 16.74594067652709, "learning_rate": 1.153846153846154e-06, "loss": 3.835775375366211, "step": 67 }, { "epoch": 0.03571428571428571, "grad_norm": 16.264180303923414, "learning_rate": 1.1713286713286715e-06, "loss": 3.7545199394226074, "step": 68 }, { "epoch": 0.03623949579831933, "grad_norm": 12.139972439257933, "learning_rate": 1.188811188811189e-06, "loss": 4.071127891540527, "step": 69 }, { "epoch": 0.03676470588235294, "grad_norm": 24.150412848493595, "learning_rate": 1.2062937062937064e-06, "loss": 3.519155502319336, "step": 70 }, { "epoch": 0.037289915966386554, "grad_norm": 16.08963114427864, "learning_rate": 1.2237762237762238e-06, "loss": 3.74234938621521, "step": 71 }, { "epoch": 0.037815126050420166, "grad_norm": 20.34658014070812, "learning_rate": 1.2412587412587413e-06, "loss": 3.2490570545196533, "step": 72 }, { "epoch": 0.038340336134453784, "grad_norm": 11.333461679214565, "learning_rate": 1.258741258741259e-06, "loss": 3.378715991973877, "step": 73 }, { "epoch": 0.038865546218487396, "grad_norm": 15.421380175445977, "learning_rate": 1.2762237762237764e-06, "loss": 3.179351806640625, "step": 74 }, { "epoch": 0.03939075630252101, "grad_norm": 12.364609041568757, "learning_rate": 1.2937062937062938e-06, "loss": 3.696169853210449, "step": 75 }, { "epoch": 0.03991596638655462, "grad_norm": 14.797487208066967, "learning_rate": 1.3111888111888113e-06, "loss": 3.8278417587280273, "step": 76 }, { "epoch": 0.04044117647058824, "grad_norm": 19.575504410027442, "learning_rate": 1.3286713286713287e-06, "loss": 3.6845762729644775, "step": 77 }, { "epoch": 0.04096638655462185, "grad_norm": 9.795650741073091, "learning_rate": 1.3461538461538462e-06, "loss": 3.3823916912078857, "step": 78 }, { "epoch": 0.04149159663865546, "grad_norm": 10.62079090075366, "learning_rate": 1.3636363636363636e-06, "loss": 3.334550380706787, "step": 79 }, { "epoch": 0.04201680672268908, "grad_norm": 8.883324972999093, "learning_rate": 1.381118881118881e-06, "loss": 2.9630346298217773, "step": 80 }, { "epoch": 0.04254201680672269, "grad_norm": 16.176590194969382, "learning_rate": 1.3986013986013987e-06, "loss": 2.7338333129882812, "step": 81 }, { "epoch": 0.0430672268907563, "grad_norm": 20.83406849219658, "learning_rate": 1.4160839160839162e-06, "loss": 3.1813783645629883, "step": 82 }, { "epoch": 0.043592436974789914, "grad_norm": 10.425495266080052, "learning_rate": 1.4335664335664336e-06, "loss": 3.501983642578125, "step": 83 }, { "epoch": 0.04411764705882353, "grad_norm": 33.92403196912798, "learning_rate": 1.451048951048951e-06, "loss": 4.1713409423828125, "step": 84 }, { "epoch": 0.044642857142857144, "grad_norm": 14.58308011283638, "learning_rate": 1.4685314685314685e-06, "loss": 3.1751015186309814, "step": 85 }, { "epoch": 0.045168067226890755, "grad_norm": 9.735714288847259, "learning_rate": 1.486013986013986e-06, "loss": 2.9015283584594727, "step": 86 }, { "epoch": 0.04569327731092437, "grad_norm": 12.752318960038618, "learning_rate": 1.5034965034965034e-06, "loss": 3.1568045616149902, "step": 87 }, { "epoch": 0.046218487394957986, "grad_norm": 10.24072405792317, "learning_rate": 1.5209790209790213e-06, "loss": 3.683905601501465, "step": 88 }, { "epoch": 0.0467436974789916, "grad_norm": 16.137425682786226, "learning_rate": 1.5384615384615387e-06, "loss": 3.0892813205718994, "step": 89 }, { "epoch": 0.04726890756302521, "grad_norm": 8.408487827632596, "learning_rate": 1.5559440559440562e-06, "loss": 3.147287130355835, "step": 90 }, { "epoch": 0.04779411764705882, "grad_norm": 21.475842549976687, "learning_rate": 1.5734265734265736e-06, "loss": 3.7005114555358887, "step": 91 }, { "epoch": 0.04831932773109244, "grad_norm": 11.167540754791897, "learning_rate": 1.590909090909091e-06, "loss": 3.4915692806243896, "step": 92 }, { "epoch": 0.04884453781512605, "grad_norm": 13.65410221246126, "learning_rate": 1.6083916083916085e-06, "loss": 2.9103403091430664, "step": 93 }, { "epoch": 0.04936974789915966, "grad_norm": 15.532940581970639, "learning_rate": 1.625874125874126e-06, "loss": 2.892765522003174, "step": 94 }, { "epoch": 0.04989495798319328, "grad_norm": 9.5126247250127, "learning_rate": 1.6433566433566434e-06, "loss": 2.752000093460083, "step": 95 }, { "epoch": 0.05042016806722689, "grad_norm": 15.05522579035742, "learning_rate": 1.660839160839161e-06, "loss": 3.762812852859497, "step": 96 }, { "epoch": 0.050945378151260504, "grad_norm": 9.850059079966185, "learning_rate": 1.6783216783216785e-06, "loss": 3.203524589538574, "step": 97 }, { "epoch": 0.051470588235294115, "grad_norm": 11.003796272875793, "learning_rate": 1.695804195804196e-06, "loss": 2.939239025115967, "step": 98 }, { "epoch": 0.051995798319327734, "grad_norm": 7.308308991898506, "learning_rate": 1.7132867132867134e-06, "loss": 3.039363145828247, "step": 99 }, { "epoch": 0.052521008403361345, "grad_norm": 18.340776852855072, "learning_rate": 1.7307692307692308e-06, "loss": 3.8210678100585938, "step": 100 }, { "epoch": 0.05304621848739496, "grad_norm": 12.528552794960676, "learning_rate": 1.7482517482517483e-06, "loss": 2.9607348442077637, "step": 101 }, { "epoch": 0.05357142857142857, "grad_norm": 11.610455034799767, "learning_rate": 1.7657342657342657e-06, "loss": 3.047253370285034, "step": 102 }, { "epoch": 0.05409663865546219, "grad_norm": 8.650454729688116, "learning_rate": 1.7832167832167834e-06, "loss": 3.390791177749634, "step": 103 }, { "epoch": 0.0546218487394958, "grad_norm": 11.358288501544784, "learning_rate": 1.8006993006993008e-06, "loss": 2.8329169750213623, "step": 104 }, { "epoch": 0.05514705882352941, "grad_norm": 12.077596878633972, "learning_rate": 1.8181818181818183e-06, "loss": 3.383918285369873, "step": 105 }, { "epoch": 0.05567226890756303, "grad_norm": 16.712929429294753, "learning_rate": 1.8356643356643357e-06, "loss": 5.218741416931152, "step": 106 }, { "epoch": 0.05619747899159664, "grad_norm": 12.765028044884678, "learning_rate": 1.8531468531468532e-06, "loss": 3.01009202003479, "step": 107 }, { "epoch": 0.05672268907563025, "grad_norm": 7.58903376172291, "learning_rate": 1.8706293706293706e-06, "loss": 2.75844144821167, "step": 108 }, { "epoch": 0.05724789915966386, "grad_norm": 8.522158351054237, "learning_rate": 1.888111888111888e-06, "loss": 2.7749361991882324, "step": 109 }, { "epoch": 0.05777310924369748, "grad_norm": 8.608247143023526, "learning_rate": 1.9055944055944055e-06, "loss": 3.086836814880371, "step": 110 }, { "epoch": 0.058298319327731093, "grad_norm": 8.843532629640318, "learning_rate": 1.9230769230769234e-06, "loss": 3.258808135986328, "step": 111 }, { "epoch": 0.058823529411764705, "grad_norm": 14.132519139896296, "learning_rate": 1.9405594405594406e-06, "loss": 2.722972869873047, "step": 112 }, { "epoch": 0.05934873949579832, "grad_norm": 11.283796170078592, "learning_rate": 1.9580419580419583e-06, "loss": 3.538330316543579, "step": 113 }, { "epoch": 0.059873949579831935, "grad_norm": 8.882121571795718, "learning_rate": 1.9755244755244755e-06, "loss": 2.9473233222961426, "step": 114 }, { "epoch": 0.06039915966386555, "grad_norm": 10.461716733325476, "learning_rate": 1.993006993006993e-06, "loss": 3.2069764137268066, "step": 115 }, { "epoch": 0.06092436974789916, "grad_norm": 10.79135880503189, "learning_rate": 2.0104895104895104e-06, "loss": 2.776052951812744, "step": 116 }, { "epoch": 0.06144957983193277, "grad_norm": 12.852647194585002, "learning_rate": 2.027972027972028e-06, "loss": 3.164353370666504, "step": 117 }, { "epoch": 0.06197478991596639, "grad_norm": 20.595577508728553, "learning_rate": 2.0454545454545457e-06, "loss": 3.7819983959198, "step": 118 }, { "epoch": 0.0625, "grad_norm": 7.416532992444785, "learning_rate": 2.0629370629370634e-06, "loss": 3.207777500152588, "step": 119 }, { "epoch": 0.06302521008403361, "grad_norm": 8.288644250013293, "learning_rate": 2.0804195804195806e-06, "loss": 3.3697268962860107, "step": 120 }, { "epoch": 0.06355042016806722, "grad_norm": 11.053204236520791, "learning_rate": 2.0979020979020983e-06, "loss": 3.0271708965301514, "step": 121 }, { "epoch": 0.06407563025210083, "grad_norm": 10.881721316994767, "learning_rate": 2.1153846153846155e-06, "loss": 2.805438756942749, "step": 122 }, { "epoch": 0.06460084033613446, "grad_norm": 11.540998180504324, "learning_rate": 2.132867132867133e-06, "loss": 2.9184906482696533, "step": 123 }, { "epoch": 0.06512605042016807, "grad_norm": 9.557384208664955, "learning_rate": 2.1503496503496504e-06, "loss": 3.7422034740448, "step": 124 }, { "epoch": 0.06565126050420168, "grad_norm": 11.178002440365203, "learning_rate": 2.167832167832168e-06, "loss": 2.6965441703796387, "step": 125 }, { "epoch": 0.0661764705882353, "grad_norm": 7.619594789595347, "learning_rate": 2.1853146853146857e-06, "loss": 3.0583324432373047, "step": 126 }, { "epoch": 0.0667016806722689, "grad_norm": 9.387918036172614, "learning_rate": 2.202797202797203e-06, "loss": 2.6428847312927246, "step": 127 }, { "epoch": 0.06722689075630252, "grad_norm": 11.323075383472942, "learning_rate": 2.2202797202797206e-06, "loss": 3.23486328125, "step": 128 }, { "epoch": 0.06775210084033613, "grad_norm": 13.402078543796202, "learning_rate": 2.237762237762238e-06, "loss": 3.3365354537963867, "step": 129 }, { "epoch": 0.06827731092436974, "grad_norm": 11.308850890428076, "learning_rate": 2.2552447552447555e-06, "loss": 3.187561273574829, "step": 130 }, { "epoch": 0.06880252100840337, "grad_norm": 8.737315488014152, "learning_rate": 2.2727272727272728e-06, "loss": 2.4140090942382812, "step": 131 }, { "epoch": 0.06932773109243698, "grad_norm": 12.100833741328035, "learning_rate": 2.2902097902097904e-06, "loss": 3.1806931495666504, "step": 132 }, { "epoch": 0.06985294117647059, "grad_norm": 8.511670123183169, "learning_rate": 2.307692307692308e-06, "loss": 3.1618127822875977, "step": 133 }, { "epoch": 0.0703781512605042, "grad_norm": 16.89380862864223, "learning_rate": 2.3251748251748253e-06, "loss": 3.4971280097961426, "step": 134 }, { "epoch": 0.07090336134453781, "grad_norm": 9.189097737179944, "learning_rate": 2.342657342657343e-06, "loss": 2.7373313903808594, "step": 135 }, { "epoch": 0.07142857142857142, "grad_norm": 8.941802174535024, "learning_rate": 2.36013986013986e-06, "loss": 3.1305856704711914, "step": 136 }, { "epoch": 0.07195378151260504, "grad_norm": 6.820542963713525, "learning_rate": 2.377622377622378e-06, "loss": 2.563201904296875, "step": 137 }, { "epoch": 0.07247899159663866, "grad_norm": 11.611160703686382, "learning_rate": 2.395104895104895e-06, "loss": 2.9833860397338867, "step": 138 }, { "epoch": 0.07300420168067227, "grad_norm": 11.973193711874616, "learning_rate": 2.4125874125874128e-06, "loss": 3.0472545623779297, "step": 139 }, { "epoch": 0.07352941176470588, "grad_norm": 13.2367114956955, "learning_rate": 2.43006993006993e-06, "loss": 3.030299425125122, "step": 140 }, { "epoch": 0.0740546218487395, "grad_norm": 10.665734481419761, "learning_rate": 2.4475524475524477e-06, "loss": 3.1181631088256836, "step": 141 }, { "epoch": 0.07457983193277311, "grad_norm": 10.299897816995667, "learning_rate": 2.4650349650349653e-06, "loss": 3.3858418464660645, "step": 142 }, { "epoch": 0.07510504201680672, "grad_norm": 14.658268440587843, "learning_rate": 2.4825174825174825e-06, "loss": 3.0084147453308105, "step": 143 }, { "epoch": 0.07563025210084033, "grad_norm": 8.416572561906216, "learning_rate": 2.5e-06, "loss": 2.6434106826782227, "step": 144 }, { "epoch": 0.07615546218487394, "grad_norm": 12.773713450167529, "learning_rate": 2.517482517482518e-06, "loss": 3.660475730895996, "step": 145 }, { "epoch": 0.07668067226890757, "grad_norm": 13.643829409806205, "learning_rate": 2.534965034965035e-06, "loss": 2.801238775253296, "step": 146 }, { "epoch": 0.07720588235294118, "grad_norm": 9.936924513187812, "learning_rate": 2.5524475524475528e-06, "loss": 3.1104047298431396, "step": 147 }, { "epoch": 0.07773109243697479, "grad_norm": 9.006787964351338, "learning_rate": 2.56993006993007e-06, "loss": 2.8557915687561035, "step": 148 }, { "epoch": 0.0782563025210084, "grad_norm": 8.546555604210928, "learning_rate": 2.5874125874125877e-06, "loss": 2.5419888496398926, "step": 149 }, { "epoch": 0.07878151260504201, "grad_norm": 9.70164848054522, "learning_rate": 2.604895104895105e-06, "loss": 3.414214849472046, "step": 150 }, { "epoch": 0.07930672268907563, "grad_norm": 15.884940591453903, "learning_rate": 2.6223776223776225e-06, "loss": 2.6538589000701904, "step": 151 }, { "epoch": 0.07983193277310924, "grad_norm": 14.316305610479533, "learning_rate": 2.63986013986014e-06, "loss": 2.4650402069091797, "step": 152 }, { "epoch": 0.08035714285714286, "grad_norm": 11.143203112170086, "learning_rate": 2.6573426573426574e-06, "loss": 3.405933380126953, "step": 153 }, { "epoch": 0.08088235294117647, "grad_norm": 9.826512593789431, "learning_rate": 2.674825174825175e-06, "loss": 2.706834077835083, "step": 154 }, { "epoch": 0.08140756302521009, "grad_norm": 15.185136292845078, "learning_rate": 2.6923076923076923e-06, "loss": 3.490084409713745, "step": 155 }, { "epoch": 0.0819327731092437, "grad_norm": 9.493774180363053, "learning_rate": 2.70979020979021e-06, "loss": 2.6900107860565186, "step": 156 }, { "epoch": 0.08245798319327731, "grad_norm": 14.266301028366609, "learning_rate": 2.7272727272727272e-06, "loss": 2.7169108390808105, "step": 157 }, { "epoch": 0.08298319327731092, "grad_norm": 12.370813316919985, "learning_rate": 2.744755244755245e-06, "loss": 3.0527358055114746, "step": 158 }, { "epoch": 0.08350840336134453, "grad_norm": 11.574919073948696, "learning_rate": 2.762237762237762e-06, "loss": 2.889268636703491, "step": 159 }, { "epoch": 0.08403361344537816, "grad_norm": 8.362332310841456, "learning_rate": 2.7797202797202798e-06, "loss": 2.820096015930176, "step": 160 }, { "epoch": 0.08455882352941177, "grad_norm": 10.952826531084153, "learning_rate": 2.7972027972027974e-06, "loss": 2.741314172744751, "step": 161 }, { "epoch": 0.08508403361344538, "grad_norm": 12.96774516203996, "learning_rate": 2.8146853146853147e-06, "loss": 3.665605068206787, "step": 162 }, { "epoch": 0.08560924369747899, "grad_norm": 14.061227256256071, "learning_rate": 2.8321678321678323e-06, "loss": 3.036461353302002, "step": 163 }, { "epoch": 0.0861344537815126, "grad_norm": 12.386763761192109, "learning_rate": 2.8496503496503496e-06, "loss": 2.961890459060669, "step": 164 }, { "epoch": 0.08665966386554622, "grad_norm": 12.069018255605632, "learning_rate": 2.8671328671328672e-06, "loss": 2.5157463550567627, "step": 165 }, { "epoch": 0.08718487394957983, "grad_norm": 16.020284110145372, "learning_rate": 2.8846153846153845e-06, "loss": 2.6061654090881348, "step": 166 }, { "epoch": 0.08771008403361344, "grad_norm": 10.081405378228968, "learning_rate": 2.902097902097902e-06, "loss": 2.827906608581543, "step": 167 }, { "epoch": 0.08823529411764706, "grad_norm": 20.078073394145157, "learning_rate": 2.91958041958042e-06, "loss": 2.846405506134033, "step": 168 }, { "epoch": 0.08876050420168068, "grad_norm": 8.930183892960082, "learning_rate": 2.937062937062937e-06, "loss": 3.1270575523376465, "step": 169 }, { "epoch": 0.08928571428571429, "grad_norm": 8.646868809078281, "learning_rate": 2.954545454545455e-06, "loss": 2.7245287895202637, "step": 170 }, { "epoch": 0.0898109243697479, "grad_norm": 9.023992096062173, "learning_rate": 2.972027972027972e-06, "loss": 2.8285470008850098, "step": 171 }, { "epoch": 0.09033613445378151, "grad_norm": 9.408629636600987, "learning_rate": 2.98951048951049e-06, "loss": 3.208327054977417, "step": 172 }, { "epoch": 0.09086134453781512, "grad_norm": 15.435922685471567, "learning_rate": 3.006993006993007e-06, "loss": 3.037814140319824, "step": 173 }, { "epoch": 0.09138655462184873, "grad_norm": 11.863479816829765, "learning_rate": 3.024475524475525e-06, "loss": 3.15146541595459, "step": 174 }, { "epoch": 0.09191176470588236, "grad_norm": 19.641745609151776, "learning_rate": 3.0419580419580425e-06, "loss": 3.0058999061584473, "step": 175 }, { "epoch": 0.09243697478991597, "grad_norm": 9.524444165308065, "learning_rate": 3.0594405594405598e-06, "loss": 3.323092460632324, "step": 176 }, { "epoch": 0.09296218487394958, "grad_norm": 18.77864617215371, "learning_rate": 3.0769230769230774e-06, "loss": 2.5695178508758545, "step": 177 }, { "epoch": 0.0934873949579832, "grad_norm": 9.150670272159463, "learning_rate": 3.0944055944055947e-06, "loss": 2.732858419418335, "step": 178 }, { "epoch": 0.0940126050420168, "grad_norm": 15.584170556028567, "learning_rate": 3.1118881118881123e-06, "loss": 3.000278949737549, "step": 179 }, { "epoch": 0.09453781512605042, "grad_norm": 7.764882511162812, "learning_rate": 3.1293706293706296e-06, "loss": 2.537888288497925, "step": 180 }, { "epoch": 0.09506302521008403, "grad_norm": 12.640917564361425, "learning_rate": 3.1468531468531472e-06, "loss": 3.4139657020568848, "step": 181 }, { "epoch": 0.09558823529411764, "grad_norm": 13.736638173520092, "learning_rate": 3.164335664335665e-06, "loss": 3.2975075244903564, "step": 182 }, { "epoch": 0.09611344537815127, "grad_norm": 11.146515420942752, "learning_rate": 3.181818181818182e-06, "loss": 2.9420247077941895, "step": 183 }, { "epoch": 0.09663865546218488, "grad_norm": 16.077168756793455, "learning_rate": 3.1993006993006998e-06, "loss": 3.0430757999420166, "step": 184 }, { "epoch": 0.09716386554621849, "grad_norm": 7.818414764781988, "learning_rate": 3.216783216783217e-06, "loss": 2.572394371032715, "step": 185 }, { "epoch": 0.0976890756302521, "grad_norm": 11.284887261483664, "learning_rate": 3.2342657342657347e-06, "loss": 3.490865468978882, "step": 186 }, { "epoch": 0.09821428571428571, "grad_norm": 10.56509804328153, "learning_rate": 3.251748251748252e-06, "loss": 3.073840618133545, "step": 187 }, { "epoch": 0.09873949579831932, "grad_norm": 13.842893330036237, "learning_rate": 3.2692307692307696e-06, "loss": 2.848374843597412, "step": 188 }, { "epoch": 0.09926470588235294, "grad_norm": 12.97923818274776, "learning_rate": 3.286713286713287e-06, "loss": 2.7974178791046143, "step": 189 }, { "epoch": 0.09978991596638656, "grad_norm": 10.920365289443275, "learning_rate": 3.3041958041958045e-06, "loss": 3.00117826461792, "step": 190 }, { "epoch": 0.10031512605042017, "grad_norm": 8.240494221367149, "learning_rate": 3.321678321678322e-06, "loss": 3.0037038326263428, "step": 191 }, { "epoch": 0.10084033613445378, "grad_norm": 12.682655438957338, "learning_rate": 3.3391608391608394e-06, "loss": 2.929532527923584, "step": 192 }, { "epoch": 0.1013655462184874, "grad_norm": 13.824729489943875, "learning_rate": 3.356643356643357e-06, "loss": 3.097743272781372, "step": 193 }, { "epoch": 0.10189075630252101, "grad_norm": 6.984366164106969, "learning_rate": 3.3741258741258742e-06, "loss": 2.898709535598755, "step": 194 }, { "epoch": 0.10241596638655462, "grad_norm": 15.648955746370264, "learning_rate": 3.391608391608392e-06, "loss": 2.750286340713501, "step": 195 }, { "epoch": 0.10294117647058823, "grad_norm": 11.296236998793132, "learning_rate": 3.409090909090909e-06, "loss": 2.8797812461853027, "step": 196 }, { "epoch": 0.10346638655462184, "grad_norm": 12.78603394088546, "learning_rate": 3.426573426573427e-06, "loss": 2.301283121109009, "step": 197 }, { "epoch": 0.10399159663865547, "grad_norm": 10.171492942594694, "learning_rate": 3.4440559440559445e-06, "loss": 3.184098243713379, "step": 198 }, { "epoch": 0.10451680672268908, "grad_norm": 13.60666168480492, "learning_rate": 3.4615384615384617e-06, "loss": 2.9111146926879883, "step": 199 }, { "epoch": 0.10504201680672269, "grad_norm": 7.7514528457617695, "learning_rate": 3.4790209790209793e-06, "loss": 3.051267623901367, "step": 200 }, { "epoch": 0.1055672268907563, "grad_norm": 7.5968277415578145, "learning_rate": 3.4965034965034966e-06, "loss": 2.8756015300750732, "step": 201 }, { "epoch": 0.10609243697478991, "grad_norm": 7.276738165713097, "learning_rate": 3.5139860139860142e-06, "loss": 2.7797441482543945, "step": 202 }, { "epoch": 0.10661764705882353, "grad_norm": 15.493691798944, "learning_rate": 3.5314685314685315e-06, "loss": 3.124054431915283, "step": 203 }, { "epoch": 0.10714285714285714, "grad_norm": 21.992254056135568, "learning_rate": 3.548951048951049e-06, "loss": 2.4626293182373047, "step": 204 }, { "epoch": 0.10766806722689076, "grad_norm": 22.921388511775255, "learning_rate": 3.566433566433567e-06, "loss": 2.624375343322754, "step": 205 }, { "epoch": 0.10819327731092437, "grad_norm": 13.418579314372957, "learning_rate": 3.583916083916084e-06, "loss": 3.0480997562408447, "step": 206 }, { "epoch": 0.10871848739495799, "grad_norm": 18.140784399104454, "learning_rate": 3.6013986013986017e-06, "loss": 3.1774277687072754, "step": 207 }, { "epoch": 0.1092436974789916, "grad_norm": 23.10467055540296, "learning_rate": 3.618881118881119e-06, "loss": 2.5532002449035645, "step": 208 }, { "epoch": 0.10976890756302521, "grad_norm": 18.788373579277287, "learning_rate": 3.6363636363636366e-06, "loss": 2.434422731399536, "step": 209 }, { "epoch": 0.11029411764705882, "grad_norm": 11.075037647644848, "learning_rate": 3.653846153846154e-06, "loss": 3.3075475692749023, "step": 210 }, { "epoch": 0.11081932773109243, "grad_norm": 9.709952091780451, "learning_rate": 3.6713286713286715e-06, "loss": 3.0162198543548584, "step": 211 }, { "epoch": 0.11134453781512606, "grad_norm": 14.839852984942395, "learning_rate": 3.6888111888111896e-06, "loss": 2.989046096801758, "step": 212 }, { "epoch": 0.11186974789915967, "grad_norm": 9.615367564730676, "learning_rate": 3.7062937062937064e-06, "loss": 2.36433744430542, "step": 213 }, { "epoch": 0.11239495798319328, "grad_norm": 11.609479452472392, "learning_rate": 3.7237762237762245e-06, "loss": 2.972330093383789, "step": 214 }, { "epoch": 0.11292016806722689, "grad_norm": 14.618563522858688, "learning_rate": 3.7412587412587413e-06, "loss": 3.2336065769195557, "step": 215 }, { "epoch": 0.1134453781512605, "grad_norm": 11.569333653417587, "learning_rate": 3.7587412587412593e-06, "loss": 3.465144634246826, "step": 216 }, { "epoch": 0.11397058823529412, "grad_norm": 8.415895685524674, "learning_rate": 3.776223776223776e-06, "loss": 2.604027271270752, "step": 217 }, { "epoch": 0.11449579831932773, "grad_norm": 11.913951491593625, "learning_rate": 3.7937062937062942e-06, "loss": 3.1154234409332275, "step": 218 }, { "epoch": 0.11502100840336134, "grad_norm": 9.673968676254225, "learning_rate": 3.811188811188811e-06, "loss": 3.360666275024414, "step": 219 }, { "epoch": 0.11554621848739496, "grad_norm": 22.86078722612697, "learning_rate": 3.828671328671329e-06, "loss": 3.4900522232055664, "step": 220 }, { "epoch": 0.11607142857142858, "grad_norm": 13.062407442240385, "learning_rate": 3.846153846153847e-06, "loss": 2.4134976863861084, "step": 221 }, { "epoch": 0.11659663865546219, "grad_norm": 11.384408617132706, "learning_rate": 3.863636363636364e-06, "loss": 3.5764036178588867, "step": 222 }, { "epoch": 0.1171218487394958, "grad_norm": 12.438124931647444, "learning_rate": 3.881118881118881e-06, "loss": 3.0310349464416504, "step": 223 }, { "epoch": 0.11764705882352941, "grad_norm": 8.312284188439419, "learning_rate": 3.898601398601399e-06, "loss": 3.1959681510925293, "step": 224 }, { "epoch": 0.11817226890756302, "grad_norm": 11.60753443958062, "learning_rate": 3.916083916083917e-06, "loss": 3.0111706256866455, "step": 225 }, { "epoch": 0.11869747899159663, "grad_norm": 15.26549031847711, "learning_rate": 3.933566433566433e-06, "loss": 2.2159054279327393, "step": 226 }, { "epoch": 0.11922268907563026, "grad_norm": 12.052371940229559, "learning_rate": 3.951048951048951e-06, "loss": 2.9275083541870117, "step": 227 }, { "epoch": 0.11974789915966387, "grad_norm": 14.344014660662777, "learning_rate": 3.968531468531469e-06, "loss": 2.7885520458221436, "step": 228 }, { "epoch": 0.12027310924369748, "grad_norm": 10.749652811681903, "learning_rate": 3.986013986013986e-06, "loss": 3.0690507888793945, "step": 229 }, { "epoch": 0.1207983193277311, "grad_norm": 34.97507294561634, "learning_rate": 4.003496503496504e-06, "loss": 2.659562349319458, "step": 230 }, { "epoch": 0.1213235294117647, "grad_norm": 20.572400182307433, "learning_rate": 4.020979020979021e-06, "loss": 2.328030824661255, "step": 231 }, { "epoch": 0.12184873949579832, "grad_norm": 15.60933445684268, "learning_rate": 4.0384615384615385e-06, "loss": 2.6655068397521973, "step": 232 }, { "epoch": 0.12237394957983193, "grad_norm": 13.642415835616449, "learning_rate": 4.055944055944056e-06, "loss": 2.774475336074829, "step": 233 }, { "epoch": 0.12289915966386554, "grad_norm": 8.39833072721463, "learning_rate": 4.073426573426574e-06, "loss": 2.903592348098755, "step": 234 }, { "epoch": 0.12342436974789917, "grad_norm": 8.415681931951033, "learning_rate": 4.0909090909090915e-06, "loss": 3.264169216156006, "step": 235 }, { "epoch": 0.12394957983193278, "grad_norm": 19.358879649047967, "learning_rate": 4.108391608391608e-06, "loss": 2.8825795650482178, "step": 236 }, { "epoch": 0.12447478991596639, "grad_norm": 8.227022656346648, "learning_rate": 4.125874125874127e-06, "loss": 3.172744035720825, "step": 237 }, { "epoch": 0.125, "grad_norm": 11.583219450129397, "learning_rate": 4.143356643356644e-06, "loss": 2.7165675163269043, "step": 238 }, { "epoch": 0.1255252100840336, "grad_norm": 8.41588668294577, "learning_rate": 4.160839160839161e-06, "loss": 2.731034755706787, "step": 239 }, { "epoch": 0.12605042016806722, "grad_norm": 12.027666027582784, "learning_rate": 4.178321678321678e-06, "loss": 2.968233823776245, "step": 240 }, { "epoch": 0.12657563025210083, "grad_norm": 29.255592488924112, "learning_rate": 4.195804195804197e-06, "loss": 2.7731008529663086, "step": 241 }, { "epoch": 0.12710084033613445, "grad_norm": 7.723178232065691, "learning_rate": 4.213286713286714e-06, "loss": 3.0388343334198, "step": 242 }, { "epoch": 0.12762605042016806, "grad_norm": 16.25151188823703, "learning_rate": 4.230769230769231e-06, "loss": 3.1828207969665527, "step": 243 }, { "epoch": 0.12815126050420167, "grad_norm": 11.155763849543153, "learning_rate": 4.248251748251749e-06, "loss": 3.1027138233184814, "step": 244 }, { "epoch": 0.12867647058823528, "grad_norm": 18.878790527529304, "learning_rate": 4.265734265734266e-06, "loss": 3.6192004680633545, "step": 245 }, { "epoch": 0.12920168067226892, "grad_norm": 14.46355970963623, "learning_rate": 4.283216783216784e-06, "loss": 3.3602585792541504, "step": 246 }, { "epoch": 0.12972689075630253, "grad_norm": 7.185646733610239, "learning_rate": 4.300699300699301e-06, "loss": 3.087383270263672, "step": 247 }, { "epoch": 0.13025210084033614, "grad_norm": 9.554447790840326, "learning_rate": 4.3181818181818185e-06, "loss": 2.7826695442199707, "step": 248 }, { "epoch": 0.13077731092436976, "grad_norm": 13.301901608552498, "learning_rate": 4.335664335664336e-06, "loss": 3.091538906097412, "step": 249 }, { "epoch": 0.13130252100840337, "grad_norm": 12.24745264661296, "learning_rate": 4.353146853146854e-06, "loss": 2.380802631378174, "step": 250 }, { "epoch": 0.13182773109243698, "grad_norm": 11.933675211169204, "learning_rate": 4.3706293706293715e-06, "loss": 3.5029330253601074, "step": 251 }, { "epoch": 0.1323529411764706, "grad_norm": 6.36826707491301, "learning_rate": 4.388111888111888e-06, "loss": 2.601318120956421, "step": 252 }, { "epoch": 0.1328781512605042, "grad_norm": 7.8489978295167635, "learning_rate": 4.405594405594406e-06, "loss": 2.7776780128479004, "step": 253 }, { "epoch": 0.1334033613445378, "grad_norm": 16.97252676906109, "learning_rate": 4.423076923076924e-06, "loss": 3.3952476978302, "step": 254 }, { "epoch": 0.13392857142857142, "grad_norm": 11.673521836277425, "learning_rate": 4.440559440559441e-06, "loss": 2.972081184387207, "step": 255 }, { "epoch": 0.13445378151260504, "grad_norm": 9.06001648270617, "learning_rate": 4.458041958041958e-06, "loss": 2.2182092666625977, "step": 256 }, { "epoch": 0.13497899159663865, "grad_norm": 10.616408630136574, "learning_rate": 4.475524475524476e-06, "loss": 2.9145538806915283, "step": 257 }, { "epoch": 0.13550420168067226, "grad_norm": 8.129751834678931, "learning_rate": 4.493006993006993e-06, "loss": 2.7237493991851807, "step": 258 }, { "epoch": 0.13602941176470587, "grad_norm": 15.525937684631266, "learning_rate": 4.510489510489511e-06, "loss": 2.9480385780334473, "step": 259 }, { "epoch": 0.13655462184873948, "grad_norm": 9.085038297487555, "learning_rate": 4.527972027972029e-06, "loss": 3.32718825340271, "step": 260 }, { "epoch": 0.13707983193277312, "grad_norm": 12.237178714393849, "learning_rate": 4.5454545454545455e-06, "loss": 2.8504390716552734, "step": 261 }, { "epoch": 0.13760504201680673, "grad_norm": 26.459368666654875, "learning_rate": 4.562937062937063e-06, "loss": 3.4439854621887207, "step": 262 }, { "epoch": 0.13813025210084034, "grad_norm": 9.466463325357706, "learning_rate": 4.580419580419581e-06, "loss": 2.9045250415802, "step": 263 }, { "epoch": 0.13865546218487396, "grad_norm": 8.778972681585577, "learning_rate": 4.5979020979020985e-06, "loss": 2.701341390609741, "step": 264 }, { "epoch": 0.13918067226890757, "grad_norm": 10.683773330506448, "learning_rate": 4.615384615384616e-06, "loss": 2.8882217407226562, "step": 265 }, { "epoch": 0.13970588235294118, "grad_norm": 11.01283314975054, "learning_rate": 4.632867132867133e-06, "loss": 3.0006091594696045, "step": 266 }, { "epoch": 0.1402310924369748, "grad_norm": 10.175808650808271, "learning_rate": 4.650349650349651e-06, "loss": 2.9005932807922363, "step": 267 }, { "epoch": 0.1407563025210084, "grad_norm": 20.32774974068184, "learning_rate": 4.667832167832168e-06, "loss": 2.868788719177246, "step": 268 }, { "epoch": 0.14128151260504201, "grad_norm": 16.498875846940365, "learning_rate": 4.685314685314686e-06, "loss": 2.9905383586883545, "step": 269 }, { "epoch": 0.14180672268907563, "grad_norm": 11.147824941371098, "learning_rate": 4.702797202797203e-06, "loss": 2.5644679069519043, "step": 270 }, { "epoch": 0.14233193277310924, "grad_norm": 15.519193323150592, "learning_rate": 4.72027972027972e-06, "loss": 2.969034194946289, "step": 271 }, { "epoch": 0.14285714285714285, "grad_norm": 15.987808850113785, "learning_rate": 4.737762237762238e-06, "loss": 3.084165573120117, "step": 272 }, { "epoch": 0.14338235294117646, "grad_norm": 16.02073441890616, "learning_rate": 4.755244755244756e-06, "loss": 2.9546210765838623, "step": 273 }, { "epoch": 0.14390756302521007, "grad_norm": 12.82049236600706, "learning_rate": 4.772727272727273e-06, "loss": 2.4903135299682617, "step": 274 }, { "epoch": 0.14443277310924368, "grad_norm": 8.825327979578235, "learning_rate": 4.79020979020979e-06, "loss": 2.3993375301361084, "step": 275 }, { "epoch": 0.14495798319327732, "grad_norm": 11.433202975059613, "learning_rate": 4.807692307692308e-06, "loss": 3.0640833377838135, "step": 276 }, { "epoch": 0.14548319327731093, "grad_norm": 16.78525878625079, "learning_rate": 4.8251748251748255e-06, "loss": 2.915358543395996, "step": 277 }, { "epoch": 0.14600840336134455, "grad_norm": 12.297443057485832, "learning_rate": 4.842657342657343e-06, "loss": 2.8075063228607178, "step": 278 }, { "epoch": 0.14653361344537816, "grad_norm": 7.701076026693094, "learning_rate": 4.86013986013986e-06, "loss": 2.650007963180542, "step": 279 }, { "epoch": 0.14705882352941177, "grad_norm": 21.50963940806387, "learning_rate": 4.877622377622378e-06, "loss": 2.8522448539733887, "step": 280 }, { "epoch": 0.14758403361344538, "grad_norm": 17.474278566991046, "learning_rate": 4.895104895104895e-06, "loss": 2.9502627849578857, "step": 281 }, { "epoch": 0.148109243697479, "grad_norm": 9.91976809612693, "learning_rate": 4.912587412587413e-06, "loss": 3.335960626602173, "step": 282 }, { "epoch": 0.1486344537815126, "grad_norm": 9.918649814805438, "learning_rate": 4.930069930069931e-06, "loss": 2.692478895187378, "step": 283 }, { "epoch": 0.14915966386554622, "grad_norm": 11.143348082372727, "learning_rate": 4.9475524475524474e-06, "loss": 2.9984793663024902, "step": 284 }, { "epoch": 0.14968487394957983, "grad_norm": 10.73671912279009, "learning_rate": 4.965034965034965e-06, "loss": 3.2969272136688232, "step": 285 }, { "epoch": 0.15021008403361344, "grad_norm": 13.521630444285467, "learning_rate": 4.982517482517483e-06, "loss": 2.885775566101074, "step": 286 }, { "epoch": 0.15073529411764705, "grad_norm": 7.998114081414412, "learning_rate": 5e-06, "loss": 2.1184072494506836, "step": 287 }, { "epoch": 0.15126050420168066, "grad_norm": 13.94717668174975, "learning_rate": 5.017482517482518e-06, "loss": 3.6645607948303223, "step": 288 }, { "epoch": 0.15178571428571427, "grad_norm": 10.631771354405638, "learning_rate": 5.034965034965036e-06, "loss": 2.526690721511841, "step": 289 }, { "epoch": 0.15231092436974789, "grad_norm": 14.444464408724446, "learning_rate": 5.0524475524475525e-06, "loss": 3.0754122734069824, "step": 290 }, { "epoch": 0.15283613445378152, "grad_norm": 22.846153399067962, "learning_rate": 5.06993006993007e-06, "loss": 2.9716532230377197, "step": 291 }, { "epoch": 0.15336134453781514, "grad_norm": 10.441806847508389, "learning_rate": 5.087412587412588e-06, "loss": 3.054677724838257, "step": 292 }, { "epoch": 0.15388655462184875, "grad_norm": 11.57460744247853, "learning_rate": 5.1048951048951055e-06, "loss": 2.5806143283843994, "step": 293 }, { "epoch": 0.15441176470588236, "grad_norm": 8.620784213684933, "learning_rate": 5.122377622377622e-06, "loss": 3.0842535495758057, "step": 294 }, { "epoch": 0.15493697478991597, "grad_norm": 9.731529031655892, "learning_rate": 5.13986013986014e-06, "loss": 2.42826509475708, "step": 295 }, { "epoch": 0.15546218487394958, "grad_norm": 12.396404564185275, "learning_rate": 5.157342657342658e-06, "loss": 3.0564281940460205, "step": 296 }, { "epoch": 0.1559873949579832, "grad_norm": 14.222847351584731, "learning_rate": 5.174825174825175e-06, "loss": 2.490323543548584, "step": 297 }, { "epoch": 0.1565126050420168, "grad_norm": 9.084233904714603, "learning_rate": 5.192307692307693e-06, "loss": 2.894385814666748, "step": 298 }, { "epoch": 0.15703781512605042, "grad_norm": 10.192577795739174, "learning_rate": 5.20979020979021e-06, "loss": 2.8993654251098633, "step": 299 }, { "epoch": 0.15756302521008403, "grad_norm": 9.57220974681429, "learning_rate": 5.2272727272727274e-06, "loss": 2.6970958709716797, "step": 300 }, { "epoch": 0.15808823529411764, "grad_norm": 22.600556530562834, "learning_rate": 5.244755244755245e-06, "loss": 3.1567840576171875, "step": 301 }, { "epoch": 0.15861344537815125, "grad_norm": 23.030039586268348, "learning_rate": 5.262237762237763e-06, "loss": 3.4071342945098877, "step": 302 }, { "epoch": 0.15913865546218486, "grad_norm": 9.933167341415635, "learning_rate": 5.27972027972028e-06, "loss": 3.2700154781341553, "step": 303 }, { "epoch": 0.15966386554621848, "grad_norm": 8.272506082831457, "learning_rate": 5.297202797202797e-06, "loss": 2.4964592456817627, "step": 304 }, { "epoch": 0.16018907563025211, "grad_norm": 10.42727483479198, "learning_rate": 5.314685314685315e-06, "loss": 2.8914425373077393, "step": 305 }, { "epoch": 0.16071428571428573, "grad_norm": 10.080120194085032, "learning_rate": 5.3321678321678325e-06, "loss": 2.873271942138672, "step": 306 }, { "epoch": 0.16123949579831934, "grad_norm": 25.48866106385711, "learning_rate": 5.34965034965035e-06, "loss": 3.065826892852783, "step": 307 }, { "epoch": 0.16176470588235295, "grad_norm": 7.985604781040925, "learning_rate": 5.367132867132867e-06, "loss": 2.7347259521484375, "step": 308 }, { "epoch": 0.16228991596638656, "grad_norm": 22.85816677576059, "learning_rate": 5.384615384615385e-06, "loss": 3.3949899673461914, "step": 309 }, { "epoch": 0.16281512605042017, "grad_norm": 12.951795036113193, "learning_rate": 5.402097902097902e-06, "loss": 2.639277219772339, "step": 310 }, { "epoch": 0.16334033613445378, "grad_norm": 18.327832664050792, "learning_rate": 5.41958041958042e-06, "loss": 2.5833029747009277, "step": 311 }, { "epoch": 0.1638655462184874, "grad_norm": 12.24670204746658, "learning_rate": 5.437062937062938e-06, "loss": 2.796782970428467, "step": 312 }, { "epoch": 0.164390756302521, "grad_norm": 15.437957584113905, "learning_rate": 5.4545454545454545e-06, "loss": 3.127584934234619, "step": 313 }, { "epoch": 0.16491596638655462, "grad_norm": 16.523944603955105, "learning_rate": 5.472027972027972e-06, "loss": 2.613748550415039, "step": 314 }, { "epoch": 0.16544117647058823, "grad_norm": 17.531432028273453, "learning_rate": 5.48951048951049e-06, "loss": 2.914043664932251, "step": 315 }, { "epoch": 0.16596638655462184, "grad_norm": 8.873856320684599, "learning_rate": 5.5069930069930074e-06, "loss": 2.637112855911255, "step": 316 }, { "epoch": 0.16649159663865545, "grad_norm": 9.716690797904613, "learning_rate": 5.524475524475524e-06, "loss": 2.3453238010406494, "step": 317 }, { "epoch": 0.16701680672268907, "grad_norm": 13.335520358387795, "learning_rate": 5.541958041958042e-06, "loss": 2.8517067432403564, "step": 318 }, { "epoch": 0.16754201680672268, "grad_norm": 7.958293446281881, "learning_rate": 5.5594405594405596e-06, "loss": 2.308143377304077, "step": 319 }, { "epoch": 0.16806722689075632, "grad_norm": 13.536505294069386, "learning_rate": 5.576923076923077e-06, "loss": 2.496377944946289, "step": 320 }, { "epoch": 0.16859243697478993, "grad_norm": 11.556116676169184, "learning_rate": 5.594405594405595e-06, "loss": 2.844165802001953, "step": 321 }, { "epoch": 0.16911764705882354, "grad_norm": 38.4339348963778, "learning_rate": 5.611888111888112e-06, "loss": 3.34017276763916, "step": 322 }, { "epoch": 0.16964285714285715, "grad_norm": 8.81393612808593, "learning_rate": 5.629370629370629e-06, "loss": 2.8381388187408447, "step": 323 }, { "epoch": 0.17016806722689076, "grad_norm": 13.491449386435834, "learning_rate": 5.646853146853147e-06, "loss": 3.00146484375, "step": 324 }, { "epoch": 0.17069327731092437, "grad_norm": 10.56532718056163, "learning_rate": 5.664335664335665e-06, "loss": 3.2175936698913574, "step": 325 }, { "epoch": 0.17121848739495799, "grad_norm": 8.90381181305133, "learning_rate": 5.681818181818183e-06, "loss": 2.1831305027008057, "step": 326 }, { "epoch": 0.1717436974789916, "grad_norm": 13.833832379132224, "learning_rate": 5.699300699300699e-06, "loss": 2.598001003265381, "step": 327 }, { "epoch": 0.1722689075630252, "grad_norm": 13.284880697736416, "learning_rate": 5.716783216783217e-06, "loss": 2.8579132556915283, "step": 328 }, { "epoch": 0.17279411764705882, "grad_norm": 10.03274643755016, "learning_rate": 5.7342657342657345e-06, "loss": 2.6181423664093018, "step": 329 }, { "epoch": 0.17331932773109243, "grad_norm": 22.94440201330624, "learning_rate": 5.751748251748253e-06, "loss": 3.288753032684326, "step": 330 }, { "epoch": 0.17384453781512604, "grad_norm": 6.984083541878451, "learning_rate": 5.769230769230769e-06, "loss": 3.0615525245666504, "step": 331 }, { "epoch": 0.17436974789915966, "grad_norm": 10.044648077843894, "learning_rate": 5.786713286713287e-06, "loss": 2.8029568195343018, "step": 332 }, { "epoch": 0.17489495798319327, "grad_norm": 8.03248869711385, "learning_rate": 5.804195804195804e-06, "loss": 3.3441247940063477, "step": 333 }, { "epoch": 0.17542016806722688, "grad_norm": 17.576647360014295, "learning_rate": 5.821678321678323e-06, "loss": 2.7332277297973633, "step": 334 }, { "epoch": 0.17594537815126052, "grad_norm": 10.870948748653749, "learning_rate": 5.83916083916084e-06, "loss": 3.1329116821289062, "step": 335 }, { "epoch": 0.17647058823529413, "grad_norm": 9.547122044890056, "learning_rate": 5.856643356643356e-06, "loss": 2.8317198753356934, "step": 336 }, { "epoch": 0.17699579831932774, "grad_norm": 16.535666974764705, "learning_rate": 5.874125874125874e-06, "loss": 3.296326160430908, "step": 337 }, { "epoch": 0.17752100840336135, "grad_norm": 32.236437315703704, "learning_rate": 5.8916083916083925e-06, "loss": 2.118476629257202, "step": 338 }, { "epoch": 0.17804621848739496, "grad_norm": 11.630796483773931, "learning_rate": 5.90909090909091e-06, "loss": 3.121518611907959, "step": 339 }, { "epoch": 0.17857142857142858, "grad_norm": 19.388312080195448, "learning_rate": 5.926573426573428e-06, "loss": 3.1046595573425293, "step": 340 }, { "epoch": 0.1790966386554622, "grad_norm": 12.001962559970615, "learning_rate": 5.944055944055944e-06, "loss": 2.9630184173583984, "step": 341 }, { "epoch": 0.1796218487394958, "grad_norm": 16.359504215504014, "learning_rate": 5.961538461538462e-06, "loss": 3.542586326599121, "step": 342 }, { "epoch": 0.1801470588235294, "grad_norm": 15.97692453633404, "learning_rate": 5.97902097902098e-06, "loss": 2.631359577178955, "step": 343 }, { "epoch": 0.18067226890756302, "grad_norm": 10.896101566535322, "learning_rate": 5.996503496503498e-06, "loss": 2.8164381980895996, "step": 344 }, { "epoch": 0.18119747899159663, "grad_norm": 8.405088981188355, "learning_rate": 6.013986013986014e-06, "loss": 2.8848729133605957, "step": 345 }, { "epoch": 0.18172268907563024, "grad_norm": 12.527972452854746, "learning_rate": 6.031468531468532e-06, "loss": 3.0304975509643555, "step": 346 }, { "epoch": 0.18224789915966386, "grad_norm": 11.26596331715966, "learning_rate": 6.04895104895105e-06, "loss": 2.392552137374878, "step": 347 }, { "epoch": 0.18277310924369747, "grad_norm": 16.52357762076619, "learning_rate": 6.0664335664335674e-06, "loss": 2.7970218658447266, "step": 348 }, { "epoch": 0.18329831932773108, "grad_norm": 17.84614213512548, "learning_rate": 6.083916083916085e-06, "loss": 3.2228212356567383, "step": 349 }, { "epoch": 0.18382352941176472, "grad_norm": 11.699379344566392, "learning_rate": 6.101398601398602e-06, "loss": 3.0732061862945557, "step": 350 }, { "epoch": 0.18434873949579833, "grad_norm": 8.986265862248208, "learning_rate": 6.1188811188811196e-06, "loss": 2.5782558917999268, "step": 351 }, { "epoch": 0.18487394957983194, "grad_norm": 10.091949551695143, "learning_rate": 6.136363636363637e-06, "loss": 2.93255877494812, "step": 352 }, { "epoch": 0.18539915966386555, "grad_norm": 9.658375459178844, "learning_rate": 6.153846153846155e-06, "loss": 3.047292470932007, "step": 353 }, { "epoch": 0.18592436974789917, "grad_norm": 13.676797411232373, "learning_rate": 6.171328671328672e-06, "loss": 2.2338690757751465, "step": 354 }, { "epoch": 0.18644957983193278, "grad_norm": 15.01158558376865, "learning_rate": 6.188811188811189e-06, "loss": 1.2794667482376099, "step": 355 }, { "epoch": 0.1869747899159664, "grad_norm": 9.743805359222968, "learning_rate": 6.206293706293707e-06, "loss": 2.782721519470215, "step": 356 }, { "epoch": 0.1875, "grad_norm": 19.860933042719402, "learning_rate": 6.223776223776225e-06, "loss": 3.5968642234802246, "step": 357 }, { "epoch": 0.1880252100840336, "grad_norm": 11.718618315486523, "learning_rate": 6.241258741258742e-06, "loss": 3.20735502243042, "step": 358 }, { "epoch": 0.18855042016806722, "grad_norm": 17.40896852185293, "learning_rate": 6.258741258741259e-06, "loss": 2.6875576972961426, "step": 359 }, { "epoch": 0.18907563025210083, "grad_norm": 30.279753647692278, "learning_rate": 6.276223776223777e-06, "loss": 2.811734676361084, "step": 360 }, { "epoch": 0.18960084033613445, "grad_norm": 11.568365547294238, "learning_rate": 6.2937062937062944e-06, "loss": 2.677192449569702, "step": 361 }, { "epoch": 0.19012605042016806, "grad_norm": 7.457266358229516, "learning_rate": 6.311188811188812e-06, "loss": 3.1059458255767822, "step": 362 }, { "epoch": 0.19065126050420167, "grad_norm": 25.871852992276406, "learning_rate": 6.32867132867133e-06, "loss": 1.924442172050476, "step": 363 }, { "epoch": 0.19117647058823528, "grad_norm": 13.099023718893376, "learning_rate": 6.3461538461538466e-06, "loss": 2.608693838119507, "step": 364 }, { "epoch": 0.19170168067226892, "grad_norm": 19.954457552123216, "learning_rate": 6.363636363636364e-06, "loss": 3.336481809616089, "step": 365 }, { "epoch": 0.19222689075630253, "grad_norm": 10.593017470914477, "learning_rate": 6.381118881118882e-06, "loss": 2.4383559226989746, "step": 366 }, { "epoch": 0.19275210084033614, "grad_norm": 53.152245448473245, "learning_rate": 6.3986013986013996e-06, "loss": 3.030740261077881, "step": 367 }, { "epoch": 0.19327731092436976, "grad_norm": 20.630864189878373, "learning_rate": 6.416083916083916e-06, "loss": 3.375492572784424, "step": 368 }, { "epoch": 0.19380252100840337, "grad_norm": 20.40697417985276, "learning_rate": 6.433566433566434e-06, "loss": 3.3279261589050293, "step": 369 }, { "epoch": 0.19432773109243698, "grad_norm": 14.21414483251476, "learning_rate": 6.451048951048952e-06, "loss": 4.912796974182129, "step": 370 }, { "epoch": 0.1948529411764706, "grad_norm": 10.735114493440422, "learning_rate": 6.468531468531469e-06, "loss": 2.725327730178833, "step": 371 }, { "epoch": 0.1953781512605042, "grad_norm": 12.21193520568221, "learning_rate": 6.486013986013987e-06, "loss": 2.3871378898620605, "step": 372 }, { "epoch": 0.1959033613445378, "grad_norm": 12.19748977293293, "learning_rate": 6.503496503496504e-06, "loss": 3.147094249725342, "step": 373 }, { "epoch": 0.19642857142857142, "grad_norm": 12.112433407918623, "learning_rate": 6.5209790209790215e-06, "loss": 3.0235302448272705, "step": 374 }, { "epoch": 0.19695378151260504, "grad_norm": 8.68406243331884, "learning_rate": 6.538461538461539e-06, "loss": 2.0696754455566406, "step": 375 }, { "epoch": 0.19747899159663865, "grad_norm": 8.88435144089432, "learning_rate": 6.555944055944057e-06, "loss": 3.0018765926361084, "step": 376 }, { "epoch": 0.19800420168067226, "grad_norm": 10.335624452113418, "learning_rate": 6.573426573426574e-06, "loss": 2.522580146789551, "step": 377 }, { "epoch": 0.19852941176470587, "grad_norm": 9.176871587842054, "learning_rate": 6.590909090909091e-06, "loss": 2.918633460998535, "step": 378 }, { "epoch": 0.19905462184873948, "grad_norm": 13.695375443957785, "learning_rate": 6.608391608391609e-06, "loss": 2.945152759552002, "step": 379 }, { "epoch": 0.19957983193277312, "grad_norm": 26.971965437533072, "learning_rate": 6.6258741258741266e-06, "loss": 2.4536383152008057, "step": 380 }, { "epoch": 0.20010504201680673, "grad_norm": 10.702095768119317, "learning_rate": 6.643356643356644e-06, "loss": 2.701633930206299, "step": 381 }, { "epoch": 0.20063025210084034, "grad_norm": 11.714171516300004, "learning_rate": 6.660839160839161e-06, "loss": 3.37770414352417, "step": 382 }, { "epoch": 0.20115546218487396, "grad_norm": 12.122437886642187, "learning_rate": 6.678321678321679e-06, "loss": 2.713408946990967, "step": 383 }, { "epoch": 0.20168067226890757, "grad_norm": 6.4339691304640425, "learning_rate": 6.695804195804196e-06, "loss": 2.7212979793548584, "step": 384 }, { "epoch": 0.20220588235294118, "grad_norm": 14.561942681725727, "learning_rate": 6.713286713286714e-06, "loss": 2.2358717918395996, "step": 385 }, { "epoch": 0.2027310924369748, "grad_norm": 9.649089049655132, "learning_rate": 6.730769230769232e-06, "loss": 2.7439918518066406, "step": 386 }, { "epoch": 0.2032563025210084, "grad_norm": 8.951758220922223, "learning_rate": 6.7482517482517485e-06, "loss": 2.8917198181152344, "step": 387 }, { "epoch": 0.20378151260504201, "grad_norm": 12.727990149897483, "learning_rate": 6.765734265734266e-06, "loss": 2.8389973640441895, "step": 388 }, { "epoch": 0.20430672268907563, "grad_norm": 12.096894265468466, "learning_rate": 6.783216783216784e-06, "loss": 2.311751365661621, "step": 389 }, { "epoch": 0.20483193277310924, "grad_norm": 8.755646218028499, "learning_rate": 6.8006993006993015e-06, "loss": 2.6330480575561523, "step": 390 }, { "epoch": 0.20535714285714285, "grad_norm": 12.55791109622108, "learning_rate": 6.818181818181818e-06, "loss": 2.372936248779297, "step": 391 }, { "epoch": 0.20588235294117646, "grad_norm": 7.279471068593738, "learning_rate": 6.835664335664336e-06, "loss": 3.1617183685302734, "step": 392 }, { "epoch": 0.20640756302521007, "grad_norm": 11.020675003369632, "learning_rate": 6.853146853146854e-06, "loss": 3.1837525367736816, "step": 393 }, { "epoch": 0.20693277310924368, "grad_norm": 10.629050400933053, "learning_rate": 6.870629370629371e-06, "loss": 3.0233469009399414, "step": 394 }, { "epoch": 0.20745798319327732, "grad_norm": 24.269864804926, "learning_rate": 6.888111888111889e-06, "loss": 2.827796220779419, "step": 395 }, { "epoch": 0.20798319327731093, "grad_norm": 8.847343500765968, "learning_rate": 6.905594405594406e-06, "loss": 2.9433014392852783, "step": 396 }, { "epoch": 0.20850840336134455, "grad_norm": 12.290546828593667, "learning_rate": 6.923076923076923e-06, "loss": 3.1878905296325684, "step": 397 }, { "epoch": 0.20903361344537816, "grad_norm": 7.87874296353446, "learning_rate": 6.940559440559441e-06, "loss": 1.6629223823547363, "step": 398 }, { "epoch": 0.20955882352941177, "grad_norm": 18.84740177195252, "learning_rate": 6.958041958041959e-06, "loss": 2.9216864109039307, "step": 399 }, { "epoch": 0.21008403361344538, "grad_norm": 10.619109030802841, "learning_rate": 6.975524475524476e-06, "loss": 2.8075966835021973, "step": 400 }, { "epoch": 0.210609243697479, "grad_norm": 8.826150550203034, "learning_rate": 6.993006993006993e-06, "loss": 2.8049070835113525, "step": 401 }, { "epoch": 0.2111344537815126, "grad_norm": 13.199071344423656, "learning_rate": 7.010489510489511e-06, "loss": 3.451148748397827, "step": 402 }, { "epoch": 0.21165966386554622, "grad_norm": 43.12178745862726, "learning_rate": 7.0279720279720285e-06, "loss": 3.186109781265259, "step": 403 }, { "epoch": 0.21218487394957983, "grad_norm": 9.661154608505777, "learning_rate": 7.045454545454546e-06, "loss": 2.57002592086792, "step": 404 }, { "epoch": 0.21271008403361344, "grad_norm": 15.372456538441849, "learning_rate": 7.062937062937063e-06, "loss": 3.042940616607666, "step": 405 }, { "epoch": 0.21323529411764705, "grad_norm": 12.699673608642163, "learning_rate": 7.080419580419581e-06, "loss": 2.5307865142822266, "step": 406 }, { "epoch": 0.21376050420168066, "grad_norm": 58.7512320706578, "learning_rate": 7.097902097902098e-06, "loss": 2.579166889190674, "step": 407 }, { "epoch": 0.21428571428571427, "grad_norm": 7.817782683190833, "learning_rate": 7.115384615384616e-06, "loss": 2.7906084060668945, "step": 408 }, { "epoch": 0.21481092436974789, "grad_norm": 14.115688106613835, "learning_rate": 7.132867132867134e-06, "loss": 2.506618022918701, "step": 409 }, { "epoch": 0.21533613445378152, "grad_norm": 7.327806401537482, "learning_rate": 7.15034965034965e-06, "loss": 2.4115633964538574, "step": 410 }, { "epoch": 0.21586134453781514, "grad_norm": 19.26219369457865, "learning_rate": 7.167832167832168e-06, "loss": 3.5165960788726807, "step": 411 }, { "epoch": 0.21638655462184875, "grad_norm": 28.48386496666936, "learning_rate": 7.185314685314686e-06, "loss": 2.7558658123016357, "step": 412 }, { "epoch": 0.21691176470588236, "grad_norm": 11.447207055182961, "learning_rate": 7.202797202797203e-06, "loss": 2.8666818141937256, "step": 413 }, { "epoch": 0.21743697478991597, "grad_norm": 21.88243211842485, "learning_rate": 7.22027972027972e-06, "loss": 3.344409227371216, "step": 414 }, { "epoch": 0.21796218487394958, "grad_norm": 13.79881510025008, "learning_rate": 7.237762237762238e-06, "loss": 2.7095088958740234, "step": 415 }, { "epoch": 0.2184873949579832, "grad_norm": 13.143219789122687, "learning_rate": 7.2552447552447555e-06, "loss": 2.305788516998291, "step": 416 }, { "epoch": 0.2190126050420168, "grad_norm": 27.665957272489745, "learning_rate": 7.272727272727273e-06, "loss": 2.0047407150268555, "step": 417 }, { "epoch": 0.21953781512605042, "grad_norm": 9.680989528857488, "learning_rate": 7.290209790209791e-06, "loss": 2.2492542266845703, "step": 418 }, { "epoch": 0.22006302521008403, "grad_norm": 50.407251349513935, "learning_rate": 7.307692307692308e-06, "loss": 2.4409117698669434, "step": 419 }, { "epoch": 0.22058823529411764, "grad_norm": 12.795457760530025, "learning_rate": 7.325174825174825e-06, "loss": 2.942840576171875, "step": 420 }, { "epoch": 0.22111344537815125, "grad_norm": 14.993251079745352, "learning_rate": 7.342657342657343e-06, "loss": 2.966130256652832, "step": 421 }, { "epoch": 0.22163865546218486, "grad_norm": 12.05849461018198, "learning_rate": 7.360139860139861e-06, "loss": 3.0949718952178955, "step": 422 }, { "epoch": 0.22216386554621848, "grad_norm": 24.531642489999527, "learning_rate": 7.377622377622379e-06, "loss": 3.202450752258301, "step": 423 }, { "epoch": 0.22268907563025211, "grad_norm": 12.426345197431875, "learning_rate": 7.395104895104895e-06, "loss": 2.9883623123168945, "step": 424 }, { "epoch": 0.22321428571428573, "grad_norm": 8.65476120633608, "learning_rate": 7.412587412587413e-06, "loss": 2.8595736026763916, "step": 425 }, { "epoch": 0.22373949579831934, "grad_norm": 22.944717699417094, "learning_rate": 7.43006993006993e-06, "loss": 4.234559535980225, "step": 426 }, { "epoch": 0.22426470588235295, "grad_norm": 17.425585791825945, "learning_rate": 7.447552447552449e-06, "loss": 2.753218650817871, "step": 427 }, { "epoch": 0.22478991596638656, "grad_norm": 11.927897223766982, "learning_rate": 7.465034965034965e-06, "loss": 2.100989818572998, "step": 428 }, { "epoch": 0.22531512605042017, "grad_norm": 14.864287091266377, "learning_rate": 7.4825174825174825e-06, "loss": 3.3836236000061035, "step": 429 }, { "epoch": 0.22584033613445378, "grad_norm": 9.490344862736825, "learning_rate": 7.500000000000001e-06, "loss": 2.7179176807403564, "step": 430 }, { "epoch": 0.2263655462184874, "grad_norm": 9.573928430416439, "learning_rate": 7.517482517482519e-06, "loss": 2.606822967529297, "step": 431 }, { "epoch": 0.226890756302521, "grad_norm": 8.857455297135216, "learning_rate": 7.534965034965036e-06, "loss": 2.43107533454895, "step": 432 }, { "epoch": 0.22741596638655462, "grad_norm": 10.678051193590884, "learning_rate": 7.552447552447552e-06, "loss": 2.6220951080322266, "step": 433 }, { "epoch": 0.22794117647058823, "grad_norm": 13.919948243399672, "learning_rate": 7.569930069930071e-06, "loss": 2.6895902156829834, "step": 434 }, { "epoch": 0.22846638655462184, "grad_norm": 12.759389092274477, "learning_rate": 7.5874125874125885e-06, "loss": 2.776400566101074, "step": 435 }, { "epoch": 0.22899159663865545, "grad_norm": 14.99027393652492, "learning_rate": 7.604895104895106e-06, "loss": 3.0019478797912598, "step": 436 }, { "epoch": 0.22951680672268907, "grad_norm": 19.973323516321663, "learning_rate": 7.622377622377622e-06, "loss": 2.6804957389831543, "step": 437 }, { "epoch": 0.23004201680672268, "grad_norm": 15.91658106452273, "learning_rate": 7.63986013986014e-06, "loss": 3.0910873413085938, "step": 438 }, { "epoch": 0.23056722689075632, "grad_norm": 11.941879681685068, "learning_rate": 7.657342657342658e-06, "loss": 2.8254013061523438, "step": 439 }, { "epoch": 0.23109243697478993, "grad_norm": 16.67329539072613, "learning_rate": 7.674825174825176e-06, "loss": 2.724972724914551, "step": 440 }, { "epoch": 0.23161764705882354, "grad_norm": 12.006380920035427, "learning_rate": 7.692307692307694e-06, "loss": 1.9090602397918701, "step": 441 }, { "epoch": 0.23214285714285715, "grad_norm": 17.75374334716951, "learning_rate": 7.70979020979021e-06, "loss": 3.409651279449463, "step": 442 }, { "epoch": 0.23266806722689076, "grad_norm": 17.823273573885945, "learning_rate": 7.727272727272727e-06, "loss": 3.083376884460449, "step": 443 }, { "epoch": 0.23319327731092437, "grad_norm": 11.395891600147067, "learning_rate": 7.744755244755245e-06, "loss": 3.033438205718994, "step": 444 }, { "epoch": 0.23371848739495799, "grad_norm": 17.736568351871117, "learning_rate": 7.762237762237763e-06, "loss": 2.304586887359619, "step": 445 }, { "epoch": 0.2342436974789916, "grad_norm": 15.868522472650948, "learning_rate": 7.77972027972028e-06, "loss": 3.2195613384246826, "step": 446 }, { "epoch": 0.2347689075630252, "grad_norm": 7.286503781689302, "learning_rate": 7.797202797202798e-06, "loss": 2.5759634971618652, "step": 447 }, { "epoch": 0.23529411764705882, "grad_norm": 12.91141291704742, "learning_rate": 7.814685314685316e-06, "loss": 2.8108344078063965, "step": 448 }, { "epoch": 0.23581932773109243, "grad_norm": 14.991946303213734, "learning_rate": 7.832167832167833e-06, "loss": 3.3856277465820312, "step": 449 }, { "epoch": 0.23634453781512604, "grad_norm": 13.392793377874854, "learning_rate": 7.84965034965035e-06, "loss": 3.1772289276123047, "step": 450 }, { "epoch": 0.23686974789915966, "grad_norm": 28.1178428850119, "learning_rate": 7.867132867132867e-06, "loss": 2.233346939086914, "step": 451 }, { "epoch": 0.23739495798319327, "grad_norm": 12.912780095530756, "learning_rate": 7.884615384615384e-06, "loss": 3.240116596221924, "step": 452 }, { "epoch": 0.23792016806722688, "grad_norm": 16.063801823505663, "learning_rate": 7.902097902097902e-06, "loss": 3.0511634349823, "step": 453 }, { "epoch": 0.23844537815126052, "grad_norm": 20.678965505863175, "learning_rate": 7.91958041958042e-06, "loss": 2.8257339000701904, "step": 454 }, { "epoch": 0.23897058823529413, "grad_norm": 8.976117116341166, "learning_rate": 7.937062937062937e-06, "loss": 3.0524065494537354, "step": 455 }, { "epoch": 0.23949579831932774, "grad_norm": 10.251712933120457, "learning_rate": 7.954545454545455e-06, "loss": 3.0190975666046143, "step": 456 }, { "epoch": 0.24002100840336135, "grad_norm": 21.335298624149395, "learning_rate": 7.972027972027973e-06, "loss": 2.1252050399780273, "step": 457 }, { "epoch": 0.24054621848739496, "grad_norm": 7.27545971092921, "learning_rate": 7.98951048951049e-06, "loss": 2.513131618499756, "step": 458 }, { "epoch": 0.24107142857142858, "grad_norm": 12.600056652767337, "learning_rate": 8.006993006993008e-06, "loss": 2.483368396759033, "step": 459 }, { "epoch": 0.2415966386554622, "grad_norm": 6.832576475942737, "learning_rate": 8.024475524475524e-06, "loss": 3.0280661582946777, "step": 460 }, { "epoch": 0.2421218487394958, "grad_norm": 26.980936003585214, "learning_rate": 8.041958041958042e-06, "loss": 3.4621315002441406, "step": 461 }, { "epoch": 0.2426470588235294, "grad_norm": 7.878259025356279, "learning_rate": 8.05944055944056e-06, "loss": 2.8057236671447754, "step": 462 }, { "epoch": 0.24317226890756302, "grad_norm": 10.356915736855935, "learning_rate": 8.076923076923077e-06, "loss": 2.4050445556640625, "step": 463 }, { "epoch": 0.24369747899159663, "grad_norm": 7.750041232775024, "learning_rate": 8.094405594405595e-06, "loss": 2.925077199935913, "step": 464 }, { "epoch": 0.24422268907563024, "grad_norm": 11.688477607154828, "learning_rate": 8.111888111888112e-06, "loss": 3.2121710777282715, "step": 465 }, { "epoch": 0.24474789915966386, "grad_norm": 14.487448473659374, "learning_rate": 8.12937062937063e-06, "loss": 2.1468801498413086, "step": 466 }, { "epoch": 0.24527310924369747, "grad_norm": 23.42760042788643, "learning_rate": 8.146853146853148e-06, "loss": 3.138906478881836, "step": 467 }, { "epoch": 0.24579831932773108, "grad_norm": 26.956291607054943, "learning_rate": 8.164335664335665e-06, "loss": 2.754650592803955, "step": 468 }, { "epoch": 0.24632352941176472, "grad_norm": 12.804852697963751, "learning_rate": 8.181818181818183e-06, "loss": 2.7791335582733154, "step": 469 }, { "epoch": 0.24684873949579833, "grad_norm": 6.708018742172395, "learning_rate": 8.199300699300699e-06, "loss": 2.6262125968933105, "step": 470 }, { "epoch": 0.24737394957983194, "grad_norm": 9.374690323175574, "learning_rate": 8.216783216783217e-06, "loss": 2.1458685398101807, "step": 471 }, { "epoch": 0.24789915966386555, "grad_norm": 10.154124803652127, "learning_rate": 8.234265734265734e-06, "loss": 2.8066558837890625, "step": 472 }, { "epoch": 0.24842436974789917, "grad_norm": 8.731552323977164, "learning_rate": 8.251748251748254e-06, "loss": 2.9753003120422363, "step": 473 }, { "epoch": 0.24894957983193278, "grad_norm": 11.294678238925123, "learning_rate": 8.26923076923077e-06, "loss": 2.6883530616760254, "step": 474 }, { "epoch": 0.2494747899159664, "grad_norm": 11.075640338771539, "learning_rate": 8.286713286713287e-06, "loss": 3.029934883117676, "step": 475 }, { "epoch": 0.25, "grad_norm": 9.065132853953475, "learning_rate": 8.304195804195805e-06, "loss": 3.183340072631836, "step": 476 }, { "epoch": 0.2505252100840336, "grad_norm": 11.811314399111644, "learning_rate": 8.321678321678323e-06, "loss": 2.8612143993377686, "step": 477 }, { "epoch": 0.2510504201680672, "grad_norm": 13.831559056031068, "learning_rate": 8.33916083916084e-06, "loss": 2.720844268798828, "step": 478 }, { "epoch": 0.25157563025210083, "grad_norm": 18.044618112881952, "learning_rate": 8.356643356643356e-06, "loss": 3.130622625350952, "step": 479 }, { "epoch": 0.25210084033613445, "grad_norm": 10.808743157600965, "learning_rate": 8.374125874125874e-06, "loss": 2.920591354370117, "step": 480 }, { "epoch": 0.25262605042016806, "grad_norm": 14.286153466059856, "learning_rate": 8.391608391608393e-06, "loss": 2.8152403831481934, "step": 481 }, { "epoch": 0.25315126050420167, "grad_norm": 17.403115383352798, "learning_rate": 8.40909090909091e-06, "loss": 2.840240240097046, "step": 482 }, { "epoch": 0.2536764705882353, "grad_norm": 29.332509289729217, "learning_rate": 8.426573426573428e-06, "loss": 3.4587111473083496, "step": 483 }, { "epoch": 0.2542016806722689, "grad_norm": 10.967822109016453, "learning_rate": 8.444055944055944e-06, "loss": 3.0167973041534424, "step": 484 }, { "epoch": 0.2547268907563025, "grad_norm": 17.253531535564193, "learning_rate": 8.461538461538462e-06, "loss": 2.800180673599243, "step": 485 }, { "epoch": 0.2552521008403361, "grad_norm": 11.7873602705349, "learning_rate": 8.47902097902098e-06, "loss": 2.726072072982788, "step": 486 }, { "epoch": 0.2557773109243697, "grad_norm": 26.084283953713964, "learning_rate": 8.496503496503497e-06, "loss": 3.3081958293914795, "step": 487 }, { "epoch": 0.25630252100840334, "grad_norm": 23.353797537803327, "learning_rate": 8.513986013986013e-06, "loss": 2.4980058670043945, "step": 488 }, { "epoch": 0.25682773109243695, "grad_norm": 13.691435256067452, "learning_rate": 8.531468531468533e-06, "loss": 1.8919637203216553, "step": 489 }, { "epoch": 0.25735294117647056, "grad_norm": 11.418232071468879, "learning_rate": 8.54895104895105e-06, "loss": 2.665480613708496, "step": 490 }, { "epoch": 0.25787815126050423, "grad_norm": 16.665872215319176, "learning_rate": 8.566433566433568e-06, "loss": 2.935713052749634, "step": 491 }, { "epoch": 0.25840336134453784, "grad_norm": 25.55314521003453, "learning_rate": 8.583916083916086e-06, "loss": 2.6508588790893555, "step": 492 }, { "epoch": 0.25892857142857145, "grad_norm": 20.92869289645859, "learning_rate": 8.601398601398602e-06, "loss": 3.028712749481201, "step": 493 }, { "epoch": 0.25945378151260506, "grad_norm": 18.75020493469476, "learning_rate": 8.61888111888112e-06, "loss": 2.847996234893799, "step": 494 }, { "epoch": 0.2599789915966387, "grad_norm": 27.56630788936767, "learning_rate": 8.636363636363637e-06, "loss": 3.2749228477478027, "step": 495 }, { "epoch": 0.2605042016806723, "grad_norm": 13.69382630257854, "learning_rate": 8.653846153846155e-06, "loss": 3.1429078578948975, "step": 496 }, { "epoch": 0.2610294117647059, "grad_norm": 11.716274800000257, "learning_rate": 8.671328671328672e-06, "loss": 3.0947024822235107, "step": 497 }, { "epoch": 0.2615546218487395, "grad_norm": 18.28739725634593, "learning_rate": 8.68881118881119e-06, "loss": 2.3251190185546875, "step": 498 }, { "epoch": 0.2620798319327731, "grad_norm": 16.317437537387974, "learning_rate": 8.706293706293708e-06, "loss": 2.511931896209717, "step": 499 }, { "epoch": 0.26260504201680673, "grad_norm": 12.629147521616476, "learning_rate": 8.723776223776225e-06, "loss": 3.039483070373535, "step": 500 }, { "epoch": 0.26313025210084034, "grad_norm": 17.417276755487123, "learning_rate": 8.741258741258743e-06, "loss": 2.0832390785217285, "step": 501 }, { "epoch": 0.26365546218487396, "grad_norm": 11.661959144751236, "learning_rate": 8.758741258741259e-06, "loss": 2.5170040130615234, "step": 502 }, { "epoch": 0.26418067226890757, "grad_norm": 8.587572578481677, "learning_rate": 8.776223776223777e-06, "loss": 2.963388681411743, "step": 503 }, { "epoch": 0.2647058823529412, "grad_norm": 9.814489827235624, "learning_rate": 8.793706293706294e-06, "loss": 2.2981114387512207, "step": 504 }, { "epoch": 0.2652310924369748, "grad_norm": 6.895356060394732, "learning_rate": 8.811188811188812e-06, "loss": 2.869265079498291, "step": 505 }, { "epoch": 0.2657563025210084, "grad_norm": 18.872287384439865, "learning_rate": 8.82867132867133e-06, "loss": 2.2735471725463867, "step": 506 }, { "epoch": 0.266281512605042, "grad_norm": 12.14165400771466, "learning_rate": 8.846153846153847e-06, "loss": 2.5119335651397705, "step": 507 }, { "epoch": 0.2668067226890756, "grad_norm": 10.156856628838941, "learning_rate": 8.863636363636365e-06, "loss": 2.6723649501800537, "step": 508 }, { "epoch": 0.26733193277310924, "grad_norm": 19.460735524933785, "learning_rate": 8.881118881118883e-06, "loss": 2.925903797149658, "step": 509 }, { "epoch": 0.26785714285714285, "grad_norm": 17.74393460064321, "learning_rate": 8.8986013986014e-06, "loss": 2.6899666786193848, "step": 510 }, { "epoch": 0.26838235294117646, "grad_norm": 24.150033870893935, "learning_rate": 8.916083916083916e-06, "loss": 3.411919116973877, "step": 511 }, { "epoch": 0.2689075630252101, "grad_norm": 8.436504371655232, "learning_rate": 8.933566433566434e-06, "loss": 2.6116344928741455, "step": 512 }, { "epoch": 0.2694327731092437, "grad_norm": 9.718603320457653, "learning_rate": 8.951048951048951e-06, "loss": 2.750563859939575, "step": 513 }, { "epoch": 0.2699579831932773, "grad_norm": 8.577525339758392, "learning_rate": 8.968531468531469e-06, "loss": 2.9900131225585938, "step": 514 }, { "epoch": 0.2704831932773109, "grad_norm": 9.72020801019397, "learning_rate": 8.986013986013987e-06, "loss": 2.7598955631256104, "step": 515 }, { "epoch": 0.2710084033613445, "grad_norm": 18.02025057464598, "learning_rate": 9.003496503496504e-06, "loss": 2.7998476028442383, "step": 516 }, { "epoch": 0.27153361344537813, "grad_norm": 14.894906994176859, "learning_rate": 9.020979020979022e-06, "loss": 3.013887643814087, "step": 517 }, { "epoch": 0.27205882352941174, "grad_norm": 9.049787877783231, "learning_rate": 9.03846153846154e-06, "loss": 2.826267719268799, "step": 518 }, { "epoch": 0.27258403361344535, "grad_norm": 7.573154489562956, "learning_rate": 9.055944055944057e-06, "loss": 2.8274731636047363, "step": 519 }, { "epoch": 0.27310924369747897, "grad_norm": 21.913540817150594, "learning_rate": 9.073426573426573e-06, "loss": 3.278071641921997, "step": 520 }, { "epoch": 0.27363445378151263, "grad_norm": 15.180990763462034, "learning_rate": 9.090909090909091e-06, "loss": 2.5987932682037354, "step": 521 }, { "epoch": 0.27415966386554624, "grad_norm": 7.783671375514505, "learning_rate": 9.108391608391609e-06, "loss": 2.1832573413848877, "step": 522 }, { "epoch": 0.27468487394957986, "grad_norm": 10.721651287282956, "learning_rate": 9.125874125874126e-06, "loss": 3.219067096710205, "step": 523 }, { "epoch": 0.27521008403361347, "grad_norm": 13.116446727203488, "learning_rate": 9.143356643356644e-06, "loss": 2.9926705360412598, "step": 524 }, { "epoch": 0.2757352941176471, "grad_norm": 9.655360562971806, "learning_rate": 9.160839160839162e-06, "loss": 2.256592273712158, "step": 525 }, { "epoch": 0.2762605042016807, "grad_norm": 10.519913346333762, "learning_rate": 9.17832167832168e-06, "loss": 3.0461020469665527, "step": 526 }, { "epoch": 0.2767857142857143, "grad_norm": 13.293942326735147, "learning_rate": 9.195804195804197e-06, "loss": 3.2841827869415283, "step": 527 }, { "epoch": 0.2773109243697479, "grad_norm": 9.69975674353828, "learning_rate": 9.213286713286715e-06, "loss": 2.6983697414398193, "step": 528 }, { "epoch": 0.2778361344537815, "grad_norm": 13.908294823435138, "learning_rate": 9.230769230769232e-06, "loss": 2.669904947280884, "step": 529 }, { "epoch": 0.27836134453781514, "grad_norm": 15.020927144946038, "learning_rate": 9.248251748251748e-06, "loss": 2.8003804683685303, "step": 530 }, { "epoch": 0.27888655462184875, "grad_norm": 8.145015265202593, "learning_rate": 9.265734265734266e-06, "loss": 2.855621337890625, "step": 531 }, { "epoch": 0.27941176470588236, "grad_norm": 9.217520734527652, "learning_rate": 9.283216783216784e-06, "loss": 3.296477794647217, "step": 532 }, { "epoch": 0.27993697478991597, "grad_norm": 11.001708480571994, "learning_rate": 9.300699300699301e-06, "loss": 3.406341791152954, "step": 533 }, { "epoch": 0.2804621848739496, "grad_norm": 14.220941141028558, "learning_rate": 9.318181818181819e-06, "loss": 2.766058921813965, "step": 534 }, { "epoch": 0.2809873949579832, "grad_norm": 16.019050997199002, "learning_rate": 9.335664335664337e-06, "loss": 2.7180471420288086, "step": 535 }, { "epoch": 0.2815126050420168, "grad_norm": 13.57650452804989, "learning_rate": 9.353146853146854e-06, "loss": 2.658740997314453, "step": 536 }, { "epoch": 0.2820378151260504, "grad_norm": 12.403016551952248, "learning_rate": 9.370629370629372e-06, "loss": 3.016836166381836, "step": 537 }, { "epoch": 0.28256302521008403, "grad_norm": 12.52016091546998, "learning_rate": 9.38811188811189e-06, "loss": 2.647848129272461, "step": 538 }, { "epoch": 0.28308823529411764, "grad_norm": 9.35663561444136, "learning_rate": 9.405594405594406e-06, "loss": 3.000723361968994, "step": 539 }, { "epoch": 0.28361344537815125, "grad_norm": 6.754483303180034, "learning_rate": 9.423076923076923e-06, "loss": 2.221613883972168, "step": 540 }, { "epoch": 0.28413865546218486, "grad_norm": 10.081871721267813, "learning_rate": 9.44055944055944e-06, "loss": 2.757201910018921, "step": 541 }, { "epoch": 0.2846638655462185, "grad_norm": 8.432746811039069, "learning_rate": 9.458041958041958e-06, "loss": 2.7212443351745605, "step": 542 }, { "epoch": 0.2851890756302521, "grad_norm": 12.828062679779878, "learning_rate": 9.475524475524476e-06, "loss": 2.9237170219421387, "step": 543 }, { "epoch": 0.2857142857142857, "grad_norm": 9.160119559898765, "learning_rate": 9.493006993006994e-06, "loss": 2.876392364501953, "step": 544 }, { "epoch": 0.2862394957983193, "grad_norm": 8.948718551105781, "learning_rate": 9.510489510489511e-06, "loss": 2.8873000144958496, "step": 545 }, { "epoch": 0.2867647058823529, "grad_norm": 16.11031612097912, "learning_rate": 9.527972027972029e-06, "loss": 2.6230111122131348, "step": 546 }, { "epoch": 0.28728991596638653, "grad_norm": 21.929482022240183, "learning_rate": 9.545454545454547e-06, "loss": 2.9438681602478027, "step": 547 }, { "epoch": 0.28781512605042014, "grad_norm": 12.426071712303534, "learning_rate": 9.562937062937063e-06, "loss": 2.5408456325531006, "step": 548 }, { "epoch": 0.28834033613445376, "grad_norm": 11.414531240521558, "learning_rate": 9.58041958041958e-06, "loss": 3.2351088523864746, "step": 549 }, { "epoch": 0.28886554621848737, "grad_norm": 9.40392398669366, "learning_rate": 9.597902097902098e-06, "loss": 2.4242911338806152, "step": 550 }, { "epoch": 0.28939075630252103, "grad_norm": 9.799732867736692, "learning_rate": 9.615384615384616e-06, "loss": 2.551866292953491, "step": 551 }, { "epoch": 0.28991596638655465, "grad_norm": 16.78383984737307, "learning_rate": 9.632867132867133e-06, "loss": 2.7000250816345215, "step": 552 }, { "epoch": 0.29044117647058826, "grad_norm": 10.934529013401667, "learning_rate": 9.650349650349651e-06, "loss": 2.7635700702667236, "step": 553 }, { "epoch": 0.29096638655462187, "grad_norm": 9.616026433955014, "learning_rate": 9.667832167832169e-06, "loss": 2.279999017715454, "step": 554 }, { "epoch": 0.2914915966386555, "grad_norm": 8.212243251536766, "learning_rate": 9.685314685314686e-06, "loss": 2.586347818374634, "step": 555 }, { "epoch": 0.2920168067226891, "grad_norm": 11.613308358122108, "learning_rate": 9.702797202797204e-06, "loss": 2.9352517127990723, "step": 556 }, { "epoch": 0.2925420168067227, "grad_norm": 16.131150380536646, "learning_rate": 9.72027972027972e-06, "loss": 2.757617235183716, "step": 557 }, { "epoch": 0.2930672268907563, "grad_norm": 11.79514279059609, "learning_rate": 9.737762237762238e-06, "loss": 2.605280876159668, "step": 558 }, { "epoch": 0.2935924369747899, "grad_norm": 7.6099755173044805, "learning_rate": 9.755244755244755e-06, "loss": 2.793330192565918, "step": 559 }, { "epoch": 0.29411764705882354, "grad_norm": 12.937770860201477, "learning_rate": 9.772727272727273e-06, "loss": 2.594179153442383, "step": 560 }, { "epoch": 0.29464285714285715, "grad_norm": 11.099571079329944, "learning_rate": 9.79020979020979e-06, "loss": 2.3938279151916504, "step": 561 }, { "epoch": 0.29516806722689076, "grad_norm": 10.38188967863859, "learning_rate": 9.807692307692308e-06, "loss": 3.088261127471924, "step": 562 }, { "epoch": 0.2956932773109244, "grad_norm": 11.598487313259774, "learning_rate": 9.825174825174826e-06, "loss": 3.0211265087127686, "step": 563 }, { "epoch": 0.296218487394958, "grad_norm": 14.51034321124632, "learning_rate": 9.842657342657344e-06, "loss": 3.197432279586792, "step": 564 }, { "epoch": 0.2967436974789916, "grad_norm": 25.63509858878865, "learning_rate": 9.860139860139861e-06, "loss": 2.7388033866882324, "step": 565 }, { "epoch": 0.2972689075630252, "grad_norm": 17.08255545887913, "learning_rate": 9.877622377622379e-06, "loss": 3.1421351432800293, "step": 566 }, { "epoch": 0.2977941176470588, "grad_norm": 14.491041482152639, "learning_rate": 9.895104895104895e-06, "loss": 2.792797088623047, "step": 567 }, { "epoch": 0.29831932773109243, "grad_norm": 11.516514066126438, "learning_rate": 9.912587412587413e-06, "loss": 3.216681957244873, "step": 568 }, { "epoch": 0.29884453781512604, "grad_norm": 15.277721323551134, "learning_rate": 9.93006993006993e-06, "loss": 2.2214996814727783, "step": 569 }, { "epoch": 0.29936974789915966, "grad_norm": 9.908637980577337, "learning_rate": 9.94755244755245e-06, "loss": 2.3919496536254883, "step": 570 }, { "epoch": 0.29989495798319327, "grad_norm": 11.157153523666555, "learning_rate": 9.965034965034966e-06, "loss": 2.538166046142578, "step": 571 }, { "epoch": 0.3004201680672269, "grad_norm": 11.56868921848655, "learning_rate": 9.982517482517483e-06, "loss": 2.437208414077759, "step": 572 }, { "epoch": 0.3009453781512605, "grad_norm": 8.366580030672965, "learning_rate": 1e-05, "loss": 2.69106388092041, "step": 573 }, { "epoch": 0.3014705882352941, "grad_norm": 8.285082482324782, "learning_rate": 9.999999066071773e-06, "loss": 2.420135259628296, "step": 574 }, { "epoch": 0.3019957983193277, "grad_norm": 18.337761587305817, "learning_rate": 9.999996264287436e-06, "loss": 2.271578311920166, "step": 575 }, { "epoch": 0.3025210084033613, "grad_norm": 18.091609827817756, "learning_rate": 9.999991594648035e-06, "loss": 3.1087193489074707, "step": 576 }, { "epoch": 0.30304621848739494, "grad_norm": 10.727849393430924, "learning_rate": 9.999985057155316e-06, "loss": 2.7490944862365723, "step": 577 }, { "epoch": 0.30357142857142855, "grad_norm": 15.761615720732681, "learning_rate": 9.999976651811724e-06, "loss": 1.8860646486282349, "step": 578 }, { "epoch": 0.30409663865546216, "grad_norm": 19.854802786471527, "learning_rate": 9.999966378620396e-06, "loss": 2.6921753883361816, "step": 579 }, { "epoch": 0.30462184873949577, "grad_norm": 14.669702649740051, "learning_rate": 9.99995423758517e-06, "loss": 2.938077449798584, "step": 580 }, { "epoch": 0.30514705882352944, "grad_norm": 13.794604407278413, "learning_rate": 9.999940228710581e-06, "loss": 2.822067975997925, "step": 581 }, { "epoch": 0.30567226890756305, "grad_norm": 13.655086277625093, "learning_rate": 9.999924352001864e-06, "loss": 2.838697910308838, "step": 582 }, { "epoch": 0.30619747899159666, "grad_norm": 10.698956690256324, "learning_rate": 9.99990660746495e-06, "loss": 3.6764464378356934, "step": 583 }, { "epoch": 0.3067226890756303, "grad_norm": 7.9104080477242, "learning_rate": 9.999886995106467e-06, "loss": 2.723898410797119, "step": 584 }, { "epoch": 0.3072478991596639, "grad_norm": 12.249625648796686, "learning_rate": 9.99986551493374e-06, "loss": 3.1120963096618652, "step": 585 }, { "epoch": 0.3077731092436975, "grad_norm": 14.439861817712407, "learning_rate": 9.999842166954797e-06, "loss": 2.98933744430542, "step": 586 }, { "epoch": 0.3082983193277311, "grad_norm": 14.302343534185399, "learning_rate": 9.999816951178356e-06, "loss": 2.622647285461426, "step": 587 }, { "epoch": 0.3088235294117647, "grad_norm": 38.37469850342068, "learning_rate": 9.99978986761384e-06, "loss": 2.3630852699279785, "step": 588 }, { "epoch": 0.30934873949579833, "grad_norm": 13.073607679409657, "learning_rate": 9.999760916271368e-06, "loss": 2.528589963912964, "step": 589 }, { "epoch": 0.30987394957983194, "grad_norm": 11.144728729471069, "learning_rate": 9.99973009716175e-06, "loss": 3.504377841949463, "step": 590 }, { "epoch": 0.31039915966386555, "grad_norm": 11.27268551570553, "learning_rate": 9.999697410296505e-06, "loss": 2.9286673069000244, "step": 591 }, { "epoch": 0.31092436974789917, "grad_norm": 17.618527421243975, "learning_rate": 9.99966285568784e-06, "loss": 3.3268320560455322, "step": 592 }, { "epoch": 0.3114495798319328, "grad_norm": 12.285303824543004, "learning_rate": 9.999626433348664e-06, "loss": 2.728505849838257, "step": 593 }, { "epoch": 0.3119747899159664, "grad_norm": 8.138294552039829, "learning_rate": 9.999588143292584e-06, "loss": 2.930448532104492, "step": 594 }, { "epoch": 0.3125, "grad_norm": 9.927962429564628, "learning_rate": 9.999547985533905e-06, "loss": 3.1505517959594727, "step": 595 }, { "epoch": 0.3130252100840336, "grad_norm": 5.922799171737987, "learning_rate": 9.999505960087627e-06, "loss": 2.8477983474731445, "step": 596 }, { "epoch": 0.3135504201680672, "grad_norm": 13.944936899177495, "learning_rate": 9.999462066969451e-06, "loss": 3.0594921112060547, "step": 597 }, { "epoch": 0.31407563025210083, "grad_norm": 16.090994911441584, "learning_rate": 9.999416306195775e-06, "loss": 2.8699352741241455, "step": 598 }, { "epoch": 0.31460084033613445, "grad_norm": 15.377709468639337, "learning_rate": 9.999368677783691e-06, "loss": 1.935495376586914, "step": 599 }, { "epoch": 0.31512605042016806, "grad_norm": 8.588582192713162, "learning_rate": 9.999319181750993e-06, "loss": 2.664287567138672, "step": 600 }, { "epoch": 0.31565126050420167, "grad_norm": 14.800403011758473, "learning_rate": 9.999267818116173e-06, "loss": 2.7809691429138184, "step": 601 }, { "epoch": 0.3161764705882353, "grad_norm": 11.687554735482323, "learning_rate": 9.999214586898417e-06, "loss": 2.6982336044311523, "step": 602 }, { "epoch": 0.3167016806722689, "grad_norm": 7.660259977047418, "learning_rate": 9.99915948811761e-06, "loss": 2.6417441368103027, "step": 603 }, { "epoch": 0.3172268907563025, "grad_norm": 15.272726013049263, "learning_rate": 9.999102521794336e-06, "loss": 2.302229404449463, "step": 604 }, { "epoch": 0.3177521008403361, "grad_norm": 6.73413097294637, "learning_rate": 9.999043687949878e-06, "loss": 3.0615999698638916, "step": 605 }, { "epoch": 0.3182773109243697, "grad_norm": 8.989449343418547, "learning_rate": 9.998982986606214e-06, "loss": 2.5908992290496826, "step": 606 }, { "epoch": 0.31880252100840334, "grad_norm": 11.279803822617207, "learning_rate": 9.998920417786018e-06, "loss": 2.433568000793457, "step": 607 }, { "epoch": 0.31932773109243695, "grad_norm": 10.712145177485182, "learning_rate": 9.998855981512665e-06, "loss": 2.5364127159118652, "step": 608 }, { "epoch": 0.31985294117647056, "grad_norm": 9.44916528577445, "learning_rate": 9.998789677810226e-06, "loss": 2.695469617843628, "step": 609 }, { "epoch": 0.32037815126050423, "grad_norm": 15.717511398115636, "learning_rate": 9.998721506703473e-06, "loss": 2.3882129192352295, "step": 610 }, { "epoch": 0.32090336134453784, "grad_norm": 16.480968240086398, "learning_rate": 9.998651468217869e-06, "loss": 3.1402809619903564, "step": 611 }, { "epoch": 0.32142857142857145, "grad_norm": 11.077375672666998, "learning_rate": 9.99857956237958e-06, "loss": 2.8062262535095215, "step": 612 }, { "epoch": 0.32195378151260506, "grad_norm": 7.429179430952721, "learning_rate": 9.998505789215469e-06, "loss": 2.9973835945129395, "step": 613 }, { "epoch": 0.3224789915966387, "grad_norm": 9.403513277942446, "learning_rate": 9.998430148753095e-06, "loss": 2.661933422088623, "step": 614 }, { "epoch": 0.3230042016806723, "grad_norm": 25.09734450638449, "learning_rate": 9.998352641020714e-06, "loss": 3.1352996826171875, "step": 615 }, { "epoch": 0.3235294117647059, "grad_norm": 20.659801255635177, "learning_rate": 9.99827326604728e-06, "loss": 2.932847261428833, "step": 616 }, { "epoch": 0.3240546218487395, "grad_norm": 9.229192777393214, "learning_rate": 9.998192023862448e-06, "loss": 2.449533462524414, "step": 617 }, { "epoch": 0.3245798319327731, "grad_norm": 17.396599362794852, "learning_rate": 9.998108914496567e-06, "loss": 2.4606223106384277, "step": 618 }, { "epoch": 0.32510504201680673, "grad_norm": 6.589611594303426, "learning_rate": 9.998023937980683e-06, "loss": 2.7767136096954346, "step": 619 }, { "epoch": 0.32563025210084034, "grad_norm": 13.587399901755328, "learning_rate": 9.997937094346542e-06, "loss": 3.07078218460083, "step": 620 }, { "epoch": 0.32615546218487396, "grad_norm": 13.757165659427377, "learning_rate": 9.997848383626583e-06, "loss": 2.532907009124756, "step": 621 }, { "epoch": 0.32668067226890757, "grad_norm": 15.826090861905238, "learning_rate": 9.997757805853951e-06, "loss": 1.9090718030929565, "step": 622 }, { "epoch": 0.3272058823529412, "grad_norm": 8.401318999924264, "learning_rate": 9.99766536106248e-06, "loss": 2.527989625930786, "step": 623 }, { "epoch": 0.3277310924369748, "grad_norm": 14.196562630426328, "learning_rate": 9.997571049286706e-06, "loss": 2.8110876083374023, "step": 624 }, { "epoch": 0.3282563025210084, "grad_norm": 11.535703972555819, "learning_rate": 9.997474870561858e-06, "loss": 3.0162670612335205, "step": 625 }, { "epoch": 0.328781512605042, "grad_norm": 14.388689512029586, "learning_rate": 9.99737682492387e-06, "loss": 2.6724231243133545, "step": 626 }, { "epoch": 0.3293067226890756, "grad_norm": 20.552179177835775, "learning_rate": 9.997276912409369e-06, "loss": 2.8142333030700684, "step": 627 }, { "epoch": 0.32983193277310924, "grad_norm": 13.237498962761254, "learning_rate": 9.997175133055676e-06, "loss": 2.8185126781463623, "step": 628 }, { "epoch": 0.33035714285714285, "grad_norm": 9.583123197551005, "learning_rate": 9.997071486900813e-06, "loss": 2.719855785369873, "step": 629 }, { "epoch": 0.33088235294117646, "grad_norm": 13.478794221532347, "learning_rate": 9.996965973983503e-06, "loss": 3.1515822410583496, "step": 630 }, { "epoch": 0.3314075630252101, "grad_norm": 10.978117747492535, "learning_rate": 9.996858594343159e-06, "loss": 3.1971404552459717, "step": 631 }, { "epoch": 0.3319327731092437, "grad_norm": 18.07855511683476, "learning_rate": 9.996749348019898e-06, "loss": 3.107539176940918, "step": 632 }, { "epoch": 0.3324579831932773, "grad_norm": 8.18501392389578, "learning_rate": 9.996638235054527e-06, "loss": 2.682018280029297, "step": 633 }, { "epoch": 0.3329831932773109, "grad_norm": 12.184516047650115, "learning_rate": 9.996525255488559e-06, "loss": 2.6156351566314697, "step": 634 }, { "epoch": 0.3335084033613445, "grad_norm": 26.49471236163556, "learning_rate": 9.996410409364198e-06, "loss": 3.175257682800293, "step": 635 }, { "epoch": 0.33403361344537813, "grad_norm": 7.755694969661678, "learning_rate": 9.996293696724347e-06, "loss": 2.5734364986419678, "step": 636 }, { "epoch": 0.33455882352941174, "grad_norm": 9.856280234345837, "learning_rate": 9.996175117612608e-06, "loss": 2.624603271484375, "step": 637 }, { "epoch": 0.33508403361344535, "grad_norm": 9.377322848668618, "learning_rate": 9.996054672073276e-06, "loss": 2.9139628410339355, "step": 638 }, { "epoch": 0.33560924369747897, "grad_norm": 8.708097433883548, "learning_rate": 9.995932360151348e-06, "loss": 2.903323173522949, "step": 639 }, { "epoch": 0.33613445378151263, "grad_norm": 22.5662372687559, "learning_rate": 9.995808181892516e-06, "loss": 2.6533048152923584, "step": 640 }, { "epoch": 0.33665966386554624, "grad_norm": 11.27337849539793, "learning_rate": 9.99568213734317e-06, "loss": 2.5544841289520264, "step": 641 }, { "epoch": 0.33718487394957986, "grad_norm": 25.097768822401978, "learning_rate": 9.995554226550395e-06, "loss": 3.004918098449707, "step": 642 }, { "epoch": 0.33771008403361347, "grad_norm": 13.747250602715669, "learning_rate": 9.995424449561974e-06, "loss": 3.2306368350982666, "step": 643 }, { "epoch": 0.3382352941176471, "grad_norm": 8.706127255035039, "learning_rate": 9.995292806426392e-06, "loss": 2.882741928100586, "step": 644 }, { "epoch": 0.3387605042016807, "grad_norm": 12.898703667341863, "learning_rate": 9.995159297192824e-06, "loss": 2.462981700897217, "step": 645 }, { "epoch": 0.3392857142857143, "grad_norm": 8.940322682188675, "learning_rate": 9.995023921911146e-06, "loss": 2.655332088470459, "step": 646 }, { "epoch": 0.3398109243697479, "grad_norm": 8.829986905443867, "learning_rate": 9.99488668063193e-06, "loss": 3.0068254470825195, "step": 647 }, { "epoch": 0.3403361344537815, "grad_norm": 8.735286812318217, "learning_rate": 9.994747573406444e-06, "loss": 2.6651973724365234, "step": 648 }, { "epoch": 0.34086134453781514, "grad_norm": 10.27315507806615, "learning_rate": 9.99460660028666e-06, "loss": 2.4464921951293945, "step": 649 }, { "epoch": 0.34138655462184875, "grad_norm": 7.155452815715878, "learning_rate": 9.994463761325235e-06, "loss": 2.3645894527435303, "step": 650 }, { "epoch": 0.34191176470588236, "grad_norm": 12.28602457587812, "learning_rate": 9.994319056575532e-06, "loss": 2.9508180618286133, "step": 651 }, { "epoch": 0.34243697478991597, "grad_norm": 8.509858158883015, "learning_rate": 9.99417248609161e-06, "loss": 3.0886614322662354, "step": 652 }, { "epoch": 0.3429621848739496, "grad_norm": 7.63107284880161, "learning_rate": 9.994024049928222e-06, "loss": 3.215698480606079, "step": 653 }, { "epoch": 0.3434873949579832, "grad_norm": 16.02061369261867, "learning_rate": 9.99387374814082e-06, "loss": 1.5853204727172852, "step": 654 }, { "epoch": 0.3440126050420168, "grad_norm": 12.599815277623422, "learning_rate": 9.99372158078555e-06, "loss": 2.880288600921631, "step": 655 }, { "epoch": 0.3445378151260504, "grad_norm": 23.718360310240516, "learning_rate": 9.99356754791926e-06, "loss": 2.114903450012207, "step": 656 }, { "epoch": 0.34506302521008403, "grad_norm": 15.218989533301585, "learning_rate": 9.993411649599494e-06, "loss": 2.7294764518737793, "step": 657 }, { "epoch": 0.34558823529411764, "grad_norm": 10.25377666124804, "learning_rate": 9.993253885884488e-06, "loss": 2.971414566040039, "step": 658 }, { "epoch": 0.34611344537815125, "grad_norm": 10.812972960374735, "learning_rate": 9.993094256833178e-06, "loss": 2.6103994846343994, "step": 659 }, { "epoch": 0.34663865546218486, "grad_norm": 8.598787979315924, "learning_rate": 9.992932762505198e-06, "loss": 2.697841167449951, "step": 660 }, { "epoch": 0.3471638655462185, "grad_norm": 14.817163668782701, "learning_rate": 9.992769402960878e-06, "loss": 2.9950594902038574, "step": 661 }, { "epoch": 0.3476890756302521, "grad_norm": 11.638383460998597, "learning_rate": 9.992604178261242e-06, "loss": 2.944634437561035, "step": 662 }, { "epoch": 0.3482142857142857, "grad_norm": 10.830593207520312, "learning_rate": 9.992437088468016e-06, "loss": 2.856689214706421, "step": 663 }, { "epoch": 0.3487394957983193, "grad_norm": 10.553427456128704, "learning_rate": 9.992268133643622e-06, "loss": 2.556269645690918, "step": 664 }, { "epoch": 0.3492647058823529, "grad_norm": 18.737148533839182, "learning_rate": 9.99209731385117e-06, "loss": 2.78225040435791, "step": 665 }, { "epoch": 0.34978991596638653, "grad_norm": 22.82662907356697, "learning_rate": 9.991924629154476e-06, "loss": 2.0274147987365723, "step": 666 }, { "epoch": 0.35031512605042014, "grad_norm": 15.543809015361044, "learning_rate": 9.991750079618054e-06, "loss": 2.732177734375, "step": 667 }, { "epoch": 0.35084033613445376, "grad_norm": 9.785556149667748, "learning_rate": 9.991573665307108e-06, "loss": 2.142753839492798, "step": 668 }, { "epoch": 0.35136554621848737, "grad_norm": 36.13433828190088, "learning_rate": 9.99139538628754e-06, "loss": 2.969231128692627, "step": 669 }, { "epoch": 0.35189075630252103, "grad_norm": 10.418003369691215, "learning_rate": 9.991215242625948e-06, "loss": 3.4130592346191406, "step": 670 }, { "epoch": 0.35241596638655465, "grad_norm": 10.45910909072077, "learning_rate": 9.991033234389636e-06, "loss": 2.8507094383239746, "step": 671 }, { "epoch": 0.35294117647058826, "grad_norm": 11.296132395257468, "learning_rate": 9.99084936164659e-06, "loss": 2.2903499603271484, "step": 672 }, { "epoch": 0.35346638655462187, "grad_norm": 11.822520466773746, "learning_rate": 9.990663624465504e-06, "loss": 2.813413381576538, "step": 673 }, { "epoch": 0.3539915966386555, "grad_norm": 12.45634710640318, "learning_rate": 9.990476022915761e-06, "loss": 3.4090490341186523, "step": 674 }, { "epoch": 0.3545168067226891, "grad_norm": 12.816620367166667, "learning_rate": 9.990286557067443e-06, "loss": 2.671079635620117, "step": 675 }, { "epoch": 0.3550420168067227, "grad_norm": 7.775861620360174, "learning_rate": 9.990095226991334e-06, "loss": 3.0635085105895996, "step": 676 }, { "epoch": 0.3555672268907563, "grad_norm": 7.128343265286214, "learning_rate": 9.989902032758904e-06, "loss": 2.5118045806884766, "step": 677 }, { "epoch": 0.3560924369747899, "grad_norm": 9.23231361002266, "learning_rate": 9.989706974442329e-06, "loss": 2.978017807006836, "step": 678 }, { "epoch": 0.35661764705882354, "grad_norm": 11.130901634177837, "learning_rate": 9.989510052114473e-06, "loss": 2.4600491523742676, "step": 679 }, { "epoch": 0.35714285714285715, "grad_norm": 46.64047674219602, "learning_rate": 9.989311265848905e-06, "loss": 2.603870153427124, "step": 680 }, { "epoch": 0.35766806722689076, "grad_norm": 10.503936598310489, "learning_rate": 9.989110615719882e-06, "loss": 2.6567111015319824, "step": 681 }, { "epoch": 0.3581932773109244, "grad_norm": 12.325303927314826, "learning_rate": 9.988908101802361e-06, "loss": 2.937870740890503, "step": 682 }, { "epoch": 0.358718487394958, "grad_norm": 9.066199122716661, "learning_rate": 9.988703724172e-06, "loss": 2.1229398250579834, "step": 683 }, { "epoch": 0.3592436974789916, "grad_norm": 15.897454227218814, "learning_rate": 9.988497482905145e-06, "loss": 3.0912346839904785, "step": 684 }, { "epoch": 0.3597689075630252, "grad_norm": 20.734995219911607, "learning_rate": 9.988289378078842e-06, "loss": 3.042353630065918, "step": 685 }, { "epoch": 0.3602941176470588, "grad_norm": 13.937087303746189, "learning_rate": 9.988079409770832e-06, "loss": 2.5730111598968506, "step": 686 }, { "epoch": 0.36081932773109243, "grad_norm": 18.68167099777117, "learning_rate": 9.987867578059557e-06, "loss": 3.8723182678222656, "step": 687 }, { "epoch": 0.36134453781512604, "grad_norm": 14.050087711676303, "learning_rate": 9.987653883024147e-06, "loss": 2.8078064918518066, "step": 688 }, { "epoch": 0.36186974789915966, "grad_norm": 10.431859572774215, "learning_rate": 9.987438324744437e-06, "loss": 3.016974449157715, "step": 689 }, { "epoch": 0.36239495798319327, "grad_norm": 11.822299541853985, "learning_rate": 9.987220903300947e-06, "loss": 3.129451036453247, "step": 690 }, { "epoch": 0.3629201680672269, "grad_norm": 9.636064124635695, "learning_rate": 9.987001618774906e-06, "loss": 2.768364667892456, "step": 691 }, { "epoch": 0.3634453781512605, "grad_norm": 7.7086105081417875, "learning_rate": 9.986780471248228e-06, "loss": 2.748843193054199, "step": 692 }, { "epoch": 0.3639705882352941, "grad_norm": 8.25025639760207, "learning_rate": 9.986557460803527e-06, "loss": 2.6695477962493896, "step": 693 }, { "epoch": 0.3644957983193277, "grad_norm": 21.101621960287538, "learning_rate": 9.98633258752412e-06, "loss": 2.9242889881134033, "step": 694 }, { "epoch": 0.3650210084033613, "grad_norm": 7.915319275851179, "learning_rate": 9.986105851494003e-06, "loss": 2.942355155944824, "step": 695 }, { "epoch": 0.36554621848739494, "grad_norm": 9.792091922597695, "learning_rate": 9.985877252797887e-06, "loss": 1.7359246015548706, "step": 696 }, { "epoch": 0.36607142857142855, "grad_norm": 12.917423002334813, "learning_rate": 9.985646791521165e-06, "loss": 2.9983890056610107, "step": 697 }, { "epoch": 0.36659663865546216, "grad_norm": 35.091463458302385, "learning_rate": 9.98541446774993e-06, "loss": 3.2080330848693848, "step": 698 }, { "epoch": 0.36712184873949577, "grad_norm": 12.125766825502915, "learning_rate": 9.985180281570976e-06, "loss": 2.50394868850708, "step": 699 }, { "epoch": 0.36764705882352944, "grad_norm": 7.844200768249517, "learning_rate": 9.984944233071785e-06, "loss": 2.4198319911956787, "step": 700 }, { "epoch": 0.36817226890756305, "grad_norm": 10.404307824162935, "learning_rate": 9.984706322340539e-06, "loss": 2.928926467895508, "step": 701 }, { "epoch": 0.36869747899159666, "grad_norm": 10.490212869826033, "learning_rate": 9.984466549466112e-06, "loss": 2.9764275550842285, "step": 702 }, { "epoch": 0.3692226890756303, "grad_norm": 9.884497573900134, "learning_rate": 9.98422491453808e-06, "loss": 2.9621715545654297, "step": 703 }, { "epoch": 0.3697478991596639, "grad_norm": 9.942147288667627, "learning_rate": 9.98398141764671e-06, "loss": 2.4516334533691406, "step": 704 }, { "epoch": 0.3702731092436975, "grad_norm": 12.937384346927333, "learning_rate": 9.983736058882965e-06, "loss": 2.8038949966430664, "step": 705 }, { "epoch": 0.3707983193277311, "grad_norm": 8.693898923683015, "learning_rate": 9.983488838338504e-06, "loss": 3.0094220638275146, "step": 706 }, { "epoch": 0.3713235294117647, "grad_norm": 10.708732196701385, "learning_rate": 9.98323975610568e-06, "loss": 2.505156993865967, "step": 707 }, { "epoch": 0.37184873949579833, "grad_norm": 8.262363197685096, "learning_rate": 9.982988812277544e-06, "loss": 2.4521894454956055, "step": 708 }, { "epoch": 0.37237394957983194, "grad_norm": 8.759386744985079, "learning_rate": 9.982736006947842e-06, "loss": 2.5744028091430664, "step": 709 }, { "epoch": 0.37289915966386555, "grad_norm": 17.34528310589552, "learning_rate": 9.982481340211016e-06, "loss": 3.3058454990386963, "step": 710 }, { "epoch": 0.37342436974789917, "grad_norm": 22.382439682764094, "learning_rate": 9.9822248121622e-06, "loss": 3.381484031677246, "step": 711 }, { "epoch": 0.3739495798319328, "grad_norm": 32.160743822492435, "learning_rate": 9.981966422897225e-06, "loss": 2.116076946258545, "step": 712 }, { "epoch": 0.3744747899159664, "grad_norm": 11.865491148282732, "learning_rate": 9.98170617251262e-06, "loss": 2.513551950454712, "step": 713 }, { "epoch": 0.375, "grad_norm": 9.803448733419238, "learning_rate": 9.981444061105607e-06, "loss": 2.6778907775878906, "step": 714 }, { "epoch": 0.3755252100840336, "grad_norm": 15.056919870056147, "learning_rate": 9.9811800887741e-06, "loss": 2.695645332336426, "step": 715 }, { "epoch": 0.3760504201680672, "grad_norm": 7.337704853621016, "learning_rate": 9.980914255616716e-06, "loss": 2.4225122928619385, "step": 716 }, { "epoch": 0.37657563025210083, "grad_norm": 51.695039374935995, "learning_rate": 9.98064656173276e-06, "loss": 3.8589346408843994, "step": 717 }, { "epoch": 0.37710084033613445, "grad_norm": 16.11942131442461, "learning_rate": 9.980377007222236e-06, "loss": 2.013482093811035, "step": 718 }, { "epoch": 0.37762605042016806, "grad_norm": 9.933013258491656, "learning_rate": 9.980105592185838e-06, "loss": 2.68209171295166, "step": 719 }, { "epoch": 0.37815126050420167, "grad_norm": 14.001108544199091, "learning_rate": 9.979832316724965e-06, "loss": 2.5532543659210205, "step": 720 }, { "epoch": 0.3786764705882353, "grad_norm": 7.912448258198673, "learning_rate": 9.979557180941702e-06, "loss": 2.7311716079711914, "step": 721 }, { "epoch": 0.3792016806722689, "grad_norm": 13.435411246242886, "learning_rate": 9.979280184938829e-06, "loss": 2.514404296875, "step": 722 }, { "epoch": 0.3797268907563025, "grad_norm": 13.030047357377368, "learning_rate": 9.979001328819828e-06, "loss": 3.085047721862793, "step": 723 }, { "epoch": 0.3802521008403361, "grad_norm": 9.747365344941048, "learning_rate": 9.97872061268887e-06, "loss": 2.680142641067505, "step": 724 }, { "epoch": 0.3807773109243697, "grad_norm": 8.138043787406302, "learning_rate": 9.978438036650822e-06, "loss": 2.492799758911133, "step": 725 }, { "epoch": 0.38130252100840334, "grad_norm": 9.370875617288384, "learning_rate": 9.978153600811247e-06, "loss": 2.7892208099365234, "step": 726 }, { "epoch": 0.38182773109243695, "grad_norm": 8.580976998458176, "learning_rate": 9.977867305276403e-06, "loss": 2.453538417816162, "step": 727 }, { "epoch": 0.38235294117647056, "grad_norm": 10.991432407863732, "learning_rate": 9.97757915015324e-06, "loss": 2.6907737255096436, "step": 728 }, { "epoch": 0.38287815126050423, "grad_norm": 14.510205789056386, "learning_rate": 9.977289135549404e-06, "loss": 2.8692243099212646, "step": 729 }, { "epoch": 0.38340336134453784, "grad_norm": 7.913979618741548, "learning_rate": 9.976997261573239e-06, "loss": 2.712153911590576, "step": 730 }, { "epoch": 0.38392857142857145, "grad_norm": 6.721599599733559, "learning_rate": 9.976703528333777e-06, "loss": 3.417105197906494, "step": 731 }, { "epoch": 0.38445378151260506, "grad_norm": 9.974817268351917, "learning_rate": 9.976407935940753e-06, "loss": 2.704678773880005, "step": 732 }, { "epoch": 0.3849789915966387, "grad_norm": 17.857088853020603, "learning_rate": 9.976110484504587e-06, "loss": 2.9578113555908203, "step": 733 }, { "epoch": 0.3855042016806723, "grad_norm": 10.613227127445194, "learning_rate": 9.975811174136401e-06, "loss": 2.7810139656066895, "step": 734 }, { "epoch": 0.3860294117647059, "grad_norm": 10.190958240778137, "learning_rate": 9.97551000494801e-06, "loss": 3.049150228500366, "step": 735 }, { "epoch": 0.3865546218487395, "grad_norm": 68.69548371753692, "learning_rate": 9.975206977051919e-06, "loss": 4.675893783569336, "step": 736 }, { "epoch": 0.3870798319327731, "grad_norm": 15.76881007178103, "learning_rate": 9.974902090561331e-06, "loss": 2.786167621612549, "step": 737 }, { "epoch": 0.38760504201680673, "grad_norm": 10.862519522476427, "learning_rate": 9.974595345590146e-06, "loss": 3.2471139430999756, "step": 738 }, { "epoch": 0.38813025210084034, "grad_norm": 10.057154507380792, "learning_rate": 9.97428674225295e-06, "loss": 3.0831401348114014, "step": 739 }, { "epoch": 0.38865546218487396, "grad_norm": 9.033985715456321, "learning_rate": 9.973976280665034e-06, "loss": 2.7748661041259766, "step": 740 }, { "epoch": 0.38918067226890757, "grad_norm": 6.999164319444783, "learning_rate": 9.973663960942373e-06, "loss": 3.044131278991699, "step": 741 }, { "epoch": 0.3897058823529412, "grad_norm": 12.913416251410435, "learning_rate": 9.973349783201643e-06, "loss": 3.110964775085449, "step": 742 }, { "epoch": 0.3902310924369748, "grad_norm": 8.289008376927272, "learning_rate": 9.97303374756021e-06, "loss": 3.1857125759124756, "step": 743 }, { "epoch": 0.3907563025210084, "grad_norm": 12.390872525617006, "learning_rate": 9.97271585413614e-06, "loss": 2.7156639099121094, "step": 744 }, { "epoch": 0.391281512605042, "grad_norm": 9.704288372807804, "learning_rate": 9.972396103048184e-06, "loss": 2.800199031829834, "step": 745 }, { "epoch": 0.3918067226890756, "grad_norm": 5.503059294865545, "learning_rate": 9.972074494415794e-06, "loss": 2.6597952842712402, "step": 746 }, { "epoch": 0.39233193277310924, "grad_norm": 14.629486559448079, "learning_rate": 9.971751028359113e-06, "loss": 2.8860340118408203, "step": 747 }, { "epoch": 0.39285714285714285, "grad_norm": 9.865892989389492, "learning_rate": 9.971425704998979e-06, "loss": 2.8084075450897217, "step": 748 }, { "epoch": 0.39338235294117646, "grad_norm": 13.660807237898522, "learning_rate": 9.971098524456925e-06, "loss": 2.41198468208313, "step": 749 }, { "epoch": 0.3939075630252101, "grad_norm": 16.02642625865953, "learning_rate": 9.970769486855175e-06, "loss": 2.6530814170837402, "step": 750 }, { "epoch": 0.3944327731092437, "grad_norm": 9.93788810955185, "learning_rate": 9.970438592316646e-06, "loss": 2.247192859649658, "step": 751 }, { "epoch": 0.3949579831932773, "grad_norm": 17.000130461529395, "learning_rate": 9.970105840964954e-06, "loss": 2.9603047370910645, "step": 752 }, { "epoch": 0.3954831932773109, "grad_norm": 18.104368897922804, "learning_rate": 9.969771232924404e-06, "loss": 2.4636471271514893, "step": 753 }, { "epoch": 0.3960084033613445, "grad_norm": 11.678759570892067, "learning_rate": 9.969434768319994e-06, "loss": 2.67864990234375, "step": 754 }, { "epoch": 0.39653361344537813, "grad_norm": 9.954516151022306, "learning_rate": 9.969096447277421e-06, "loss": 2.601876735687256, "step": 755 }, { "epoch": 0.39705882352941174, "grad_norm": 10.04194683094288, "learning_rate": 9.96875626992307e-06, "loss": 2.7278666496276855, "step": 756 }, { "epoch": 0.39758403361344535, "grad_norm": 9.859608719702951, "learning_rate": 9.968414236384022e-06, "loss": 2.5173091888427734, "step": 757 }, { "epoch": 0.39810924369747897, "grad_norm": 8.445956395716482, "learning_rate": 9.968070346788052e-06, "loss": 2.7311062812805176, "step": 758 }, { "epoch": 0.39863445378151263, "grad_norm": 11.538558785627409, "learning_rate": 9.967724601263624e-06, "loss": 2.5492570400238037, "step": 759 }, { "epoch": 0.39915966386554624, "grad_norm": 16.59494308416059, "learning_rate": 9.967376999939902e-06, "loss": 3.0493645668029785, "step": 760 }, { "epoch": 0.39968487394957986, "grad_norm": 11.943515039644934, "learning_rate": 9.967027542946739e-06, "loss": 3.117658853530884, "step": 761 }, { "epoch": 0.40021008403361347, "grad_norm": 12.660847363691728, "learning_rate": 9.96667623041468e-06, "loss": 2.7667980194091797, "step": 762 }, { "epoch": 0.4007352941176471, "grad_norm": 11.360923456683278, "learning_rate": 9.96632306247497e-06, "loss": 3.1187093257904053, "step": 763 }, { "epoch": 0.4012605042016807, "grad_norm": 18.860213599392665, "learning_rate": 9.965968039259537e-06, "loss": 2.6678061485290527, "step": 764 }, { "epoch": 0.4017857142857143, "grad_norm": 5.953990702595949, "learning_rate": 9.965611160901008e-06, "loss": 2.969163417816162, "step": 765 }, { "epoch": 0.4023109243697479, "grad_norm": 11.83044277913696, "learning_rate": 9.965252427532707e-06, "loss": 2.681096315383911, "step": 766 }, { "epoch": 0.4028361344537815, "grad_norm": 48.370836508184965, "learning_rate": 9.964891839288644e-06, "loss": 4.605452537536621, "step": 767 }, { "epoch": 0.40336134453781514, "grad_norm": 17.52603230116782, "learning_rate": 9.964529396303524e-06, "loss": 3.2965004444122314, "step": 768 }, { "epoch": 0.40388655462184875, "grad_norm": 9.843106150407221, "learning_rate": 9.964165098712745e-06, "loss": 3.0530214309692383, "step": 769 }, { "epoch": 0.40441176470588236, "grad_norm": 60.38131547853319, "learning_rate": 9.9637989466524e-06, "loss": 2.8514883518218994, "step": 770 }, { "epoch": 0.40493697478991597, "grad_norm": 12.992236346655288, "learning_rate": 9.96343094025927e-06, "loss": 2.743839979171753, "step": 771 }, { "epoch": 0.4054621848739496, "grad_norm": 7.789092687155614, "learning_rate": 9.963061079670833e-06, "loss": 2.968172788619995, "step": 772 }, { "epoch": 0.4059873949579832, "grad_norm": 11.105990576237419, "learning_rate": 9.962689365025259e-06, "loss": 2.259955406188965, "step": 773 }, { "epoch": 0.4065126050420168, "grad_norm": 14.171652130560842, "learning_rate": 9.96231579646141e-06, "loss": 3.6340267658233643, "step": 774 }, { "epoch": 0.4070378151260504, "grad_norm": 7.142663329318153, "learning_rate": 9.96194037411884e-06, "loss": 2.7550954818725586, "step": 775 }, { "epoch": 0.40756302521008403, "grad_norm": 20.856166454476007, "learning_rate": 9.961563098137795e-06, "loss": 2.0924081802368164, "step": 776 }, { "epoch": 0.40808823529411764, "grad_norm": 9.725403375549687, "learning_rate": 9.961183968659217e-06, "loss": 2.6671392917633057, "step": 777 }, { "epoch": 0.40861344537815125, "grad_norm": 18.13216494899129, "learning_rate": 9.960802985824734e-06, "loss": 3.811979293823242, "step": 778 }, { "epoch": 0.40913865546218486, "grad_norm": 9.603302379402304, "learning_rate": 9.960420149776674e-06, "loss": 3.0309672355651855, "step": 779 }, { "epoch": 0.4096638655462185, "grad_norm": 7.60530093348673, "learning_rate": 9.960035460658052e-06, "loss": 2.6865861415863037, "step": 780 }, { "epoch": 0.4101890756302521, "grad_norm": 9.150805363464082, "learning_rate": 9.959648918612576e-06, "loss": 2.356776714324951, "step": 781 }, { "epoch": 0.4107142857142857, "grad_norm": 19.764875833019317, "learning_rate": 9.959260523784648e-06, "loss": 2.4339756965637207, "step": 782 }, { "epoch": 0.4112394957983193, "grad_norm": 6.714849553660578, "learning_rate": 9.958870276319364e-06, "loss": 2.9411540031433105, "step": 783 }, { "epoch": 0.4117647058823529, "grad_norm": 12.882346390209301, "learning_rate": 9.958478176362503e-06, "loss": 2.516554355621338, "step": 784 }, { "epoch": 0.41228991596638653, "grad_norm": 15.075731597661168, "learning_rate": 9.958084224060547e-06, "loss": 2.9573659896850586, "step": 785 }, { "epoch": 0.41281512605042014, "grad_norm": 18.918437370618513, "learning_rate": 9.957688419560662e-06, "loss": 3.919832229614258, "step": 786 }, { "epoch": 0.41334033613445376, "grad_norm": 42.32396530125621, "learning_rate": 9.957290763010714e-06, "loss": 3.0770509243011475, "step": 787 }, { "epoch": 0.41386554621848737, "grad_norm": 33.11870145412547, "learning_rate": 9.95689125455925e-06, "loss": 3.0764617919921875, "step": 788 }, { "epoch": 0.41439075630252103, "grad_norm": 14.606636259089811, "learning_rate": 9.956489894355521e-06, "loss": 2.8072032928466797, "step": 789 }, { "epoch": 0.41491596638655465, "grad_norm": 12.868362449244204, "learning_rate": 9.95608668254946e-06, "loss": 2.5233964920043945, "step": 790 }, { "epoch": 0.41544117647058826, "grad_norm": 17.899517071794047, "learning_rate": 9.955681619291695e-06, "loss": 3.2196409702301025, "step": 791 }, { "epoch": 0.41596638655462187, "grad_norm": 11.84349501647458, "learning_rate": 9.955274704733547e-06, "loss": 2.637895107269287, "step": 792 }, { "epoch": 0.4164915966386555, "grad_norm": 13.051509389596445, "learning_rate": 9.954865939027028e-06, "loss": 2.5700843334198, "step": 793 }, { "epoch": 0.4170168067226891, "grad_norm": 22.4530983602669, "learning_rate": 9.95445532232484e-06, "loss": 3.3792872428894043, "step": 794 }, { "epoch": 0.4175420168067227, "grad_norm": 16.66009099744122, "learning_rate": 9.954042854780381e-06, "loss": 2.5564870834350586, "step": 795 }, { "epoch": 0.4180672268907563, "grad_norm": 16.41759956373412, "learning_rate": 9.953628536547732e-06, "loss": 2.229680061340332, "step": 796 }, { "epoch": 0.4185924369747899, "grad_norm": 5.95355444174316, "learning_rate": 9.953212367781675e-06, "loss": 2.8148036003112793, "step": 797 }, { "epoch": 0.41911764705882354, "grad_norm": 11.288180536237046, "learning_rate": 9.952794348637674e-06, "loss": 4.047574520111084, "step": 798 }, { "epoch": 0.41964285714285715, "grad_norm": 9.68707893790397, "learning_rate": 9.952374479271894e-06, "loss": 2.795058012008667, "step": 799 }, { "epoch": 0.42016806722689076, "grad_norm": 28.32561829635747, "learning_rate": 9.95195275984118e-06, "loss": 3.3051700592041016, "step": 800 }, { "epoch": 0.4206932773109244, "grad_norm": 11.875533469159889, "learning_rate": 9.95152919050308e-06, "loss": 2.640284299850464, "step": 801 }, { "epoch": 0.421218487394958, "grad_norm": 16.848698170104978, "learning_rate": 9.951103771415826e-06, "loss": 2.74238920211792, "step": 802 }, { "epoch": 0.4217436974789916, "grad_norm": 12.024301086715802, "learning_rate": 9.95067650273834e-06, "loss": 2.653933048248291, "step": 803 }, { "epoch": 0.4222689075630252, "grad_norm": 9.244446014771947, "learning_rate": 9.95024738463024e-06, "loss": 2.8780946731567383, "step": 804 }, { "epoch": 0.4227941176470588, "grad_norm": 13.759649085993791, "learning_rate": 9.949816417251831e-06, "loss": 2.6063790321350098, "step": 805 }, { "epoch": 0.42331932773109243, "grad_norm": 6.105600186230813, "learning_rate": 9.949383600764112e-06, "loss": 2.5861740112304688, "step": 806 }, { "epoch": 0.42384453781512604, "grad_norm": 11.61006434807629, "learning_rate": 9.948948935328766e-06, "loss": 2.3922276496887207, "step": 807 }, { "epoch": 0.42436974789915966, "grad_norm": 21.320775980203457, "learning_rate": 9.948512421108175e-06, "loss": 2.4385743141174316, "step": 808 }, { "epoch": 0.42489495798319327, "grad_norm": 15.837796454311238, "learning_rate": 9.948074058265409e-06, "loss": 3.436558246612549, "step": 809 }, { "epoch": 0.4254201680672269, "grad_norm": 12.301838472836549, "learning_rate": 9.947633846964225e-06, "loss": 2.4982149600982666, "step": 810 }, { "epoch": 0.4259453781512605, "grad_norm": 10.290693200383759, "learning_rate": 9.947191787369075e-06, "loss": 2.629467248916626, "step": 811 }, { "epoch": 0.4264705882352941, "grad_norm": 15.301896288000805, "learning_rate": 9.946747879645101e-06, "loss": 2.895120620727539, "step": 812 }, { "epoch": 0.4269957983193277, "grad_norm": 12.005977233483202, "learning_rate": 9.94630212395813e-06, "loss": 2.616076946258545, "step": 813 }, { "epoch": 0.4275210084033613, "grad_norm": 9.06535556509353, "learning_rate": 9.94585452047469e-06, "loss": 3.250288486480713, "step": 814 }, { "epoch": 0.42804621848739494, "grad_norm": 7.33540283021128, "learning_rate": 9.945405069361985e-06, "loss": 2.5006253719329834, "step": 815 }, { "epoch": 0.42857142857142855, "grad_norm": 7.137745650856612, "learning_rate": 9.944953770787924e-06, "loss": 2.8030080795288086, "step": 816 }, { "epoch": 0.42909663865546216, "grad_norm": 22.712138997813216, "learning_rate": 9.944500624921094e-06, "loss": 2.7877559661865234, "step": 817 }, { "epoch": 0.42962184873949577, "grad_norm": 22.264028742258198, "learning_rate": 9.944045631930782e-06, "loss": 2.2608823776245117, "step": 818 }, { "epoch": 0.43014705882352944, "grad_norm": 10.743172030391833, "learning_rate": 9.943588791986956e-06, "loss": 2.655961036682129, "step": 819 }, { "epoch": 0.43067226890756305, "grad_norm": 15.720922909163425, "learning_rate": 9.943130105260281e-06, "loss": 2.954380750656128, "step": 820 }, { "epoch": 0.43119747899159666, "grad_norm": 8.442283187335915, "learning_rate": 9.942669571922108e-06, "loss": 2.827402114868164, "step": 821 }, { "epoch": 0.4317226890756303, "grad_norm": 13.36121904866565, "learning_rate": 9.94220719214448e-06, "loss": 2.8097915649414062, "step": 822 }, { "epoch": 0.4322478991596639, "grad_norm": 12.94135436654749, "learning_rate": 9.941742966100128e-06, "loss": 2.3562488555908203, "step": 823 }, { "epoch": 0.4327731092436975, "grad_norm": 19.987951318309037, "learning_rate": 9.941276893962472e-06, "loss": 3.1615231037139893, "step": 824 }, { "epoch": 0.4332983193277311, "grad_norm": 11.597688274021062, "learning_rate": 9.940808975905627e-06, "loss": 2.806821584701538, "step": 825 }, { "epoch": 0.4338235294117647, "grad_norm": 14.77858087554389, "learning_rate": 9.94033921210439e-06, "loss": 3.246838331222534, "step": 826 }, { "epoch": 0.43434873949579833, "grad_norm": 12.827285468107656, "learning_rate": 9.939867602734255e-06, "loss": 3.145732879638672, "step": 827 }, { "epoch": 0.43487394957983194, "grad_norm": 12.195643133192908, "learning_rate": 9.939394147971398e-06, "loss": 2.699291706085205, "step": 828 }, { "epoch": 0.43539915966386555, "grad_norm": 8.998726860136895, "learning_rate": 9.93891884799269e-06, "loss": 2.623117446899414, "step": 829 }, { "epoch": 0.43592436974789917, "grad_norm": 9.88074255664715, "learning_rate": 9.938441702975689e-06, "loss": 2.0797412395477295, "step": 830 }, { "epoch": 0.4364495798319328, "grad_norm": 24.586238736363498, "learning_rate": 9.937962713098644e-06, "loss": 2.6907668113708496, "step": 831 }, { "epoch": 0.4369747899159664, "grad_norm": 13.438904929323614, "learning_rate": 9.93748187854049e-06, "loss": 2.883826732635498, "step": 832 }, { "epoch": 0.4375, "grad_norm": 26.15623990553852, "learning_rate": 9.936999199480854e-06, "loss": 3.016101837158203, "step": 833 }, { "epoch": 0.4380252100840336, "grad_norm": 12.732848530423388, "learning_rate": 9.936514676100049e-06, "loss": 2.8473546504974365, "step": 834 }, { "epoch": 0.4385504201680672, "grad_norm": 10.413089643503326, "learning_rate": 9.936028308579083e-06, "loss": 2.980785369873047, "step": 835 }, { "epoch": 0.43907563025210083, "grad_norm": 8.278903410838087, "learning_rate": 9.935540097099645e-06, "loss": 2.812281847000122, "step": 836 }, { "epoch": 0.43960084033613445, "grad_norm": 14.231041555841777, "learning_rate": 9.935050041844121e-06, "loss": 3.268686532974243, "step": 837 }, { "epoch": 0.44012605042016806, "grad_norm": 11.174600937877548, "learning_rate": 9.934558142995577e-06, "loss": 2.3432607650756836, "step": 838 }, { "epoch": 0.44065126050420167, "grad_norm": 10.860728364169903, "learning_rate": 9.934064400737776e-06, "loss": 1.6175007820129395, "step": 839 }, { "epoch": 0.4411764705882353, "grad_norm": 8.8522305019082, "learning_rate": 9.933568815255161e-06, "loss": 2.0455827713012695, "step": 840 }, { "epoch": 0.4417016806722689, "grad_norm": 19.059749722910492, "learning_rate": 9.933071386732874e-06, "loss": 2.358410358428955, "step": 841 }, { "epoch": 0.4422268907563025, "grad_norm": 14.716117524810409, "learning_rate": 9.932572115356738e-06, "loss": 2.637843370437622, "step": 842 }, { "epoch": 0.4427521008403361, "grad_norm": 18.457289432595132, "learning_rate": 9.932071001313265e-06, "loss": 3.0861763954162598, "step": 843 }, { "epoch": 0.4432773109243697, "grad_norm": 21.047348762353046, "learning_rate": 9.931568044789661e-06, "loss": 3.025874137878418, "step": 844 }, { "epoch": 0.44380252100840334, "grad_norm": 8.402545229270334, "learning_rate": 9.931063245973812e-06, "loss": 1.9306225776672363, "step": 845 }, { "epoch": 0.44432773109243695, "grad_norm": 10.116635921119325, "learning_rate": 9.930556605054295e-06, "loss": 2.9862217903137207, "step": 846 }, { "epoch": 0.44485294117647056, "grad_norm": 5.691637581095164, "learning_rate": 9.93004812222038e-06, "loss": 2.0409066677093506, "step": 847 }, { "epoch": 0.44537815126050423, "grad_norm": 20.385980345273836, "learning_rate": 9.929537797662022e-06, "loss": 2.8674979209899902, "step": 848 }, { "epoch": 0.44590336134453784, "grad_norm": 24.592665709309212, "learning_rate": 9.929025631569864e-06, "loss": 3.1542892456054688, "step": 849 }, { "epoch": 0.44642857142857145, "grad_norm": 10.457336827215615, "learning_rate": 9.928511624135233e-06, "loss": 2.3662195205688477, "step": 850 }, { "epoch": 0.44695378151260506, "grad_norm": 16.156344686329113, "learning_rate": 9.927995775550148e-06, "loss": 2.897277355194092, "step": 851 }, { "epoch": 0.4474789915966387, "grad_norm": 7.458575180777569, "learning_rate": 9.927478086007316e-06, "loss": 3.179873466491699, "step": 852 }, { "epoch": 0.4480042016806723, "grad_norm": 10.618063286275696, "learning_rate": 9.926958555700134e-06, "loss": 3.3985543251037598, "step": 853 }, { "epoch": 0.4485294117647059, "grad_norm": 25.100942160328817, "learning_rate": 9.926437184822679e-06, "loss": 2.3599376678466797, "step": 854 }, { "epoch": 0.4490546218487395, "grad_norm": 6.834921407287759, "learning_rate": 9.925913973569724e-06, "loss": 2.82316517829895, "step": 855 }, { "epoch": 0.4495798319327731, "grad_norm": 23.26517440469458, "learning_rate": 9.925388922136723e-06, "loss": 2.540208339691162, "step": 856 }, { "epoch": 0.45010504201680673, "grad_norm": 23.625214809457713, "learning_rate": 9.924862030719821e-06, "loss": 3.0775671005249023, "step": 857 }, { "epoch": 0.45063025210084034, "grad_norm": 12.594553707506398, "learning_rate": 9.924333299515849e-06, "loss": 2.802736759185791, "step": 858 }, { "epoch": 0.45115546218487396, "grad_norm": 13.37962919758712, "learning_rate": 9.923802728722326e-06, "loss": 2.6349904537200928, "step": 859 }, { "epoch": 0.45168067226890757, "grad_norm": 9.76251844426648, "learning_rate": 9.92327031853746e-06, "loss": 2.773221015930176, "step": 860 }, { "epoch": 0.4522058823529412, "grad_norm": 8.272212081755562, "learning_rate": 9.922736069160141e-06, "loss": 2.677144765853882, "step": 861 }, { "epoch": 0.4527310924369748, "grad_norm": 21.64902996299887, "learning_rate": 9.922199980789953e-06, "loss": 2.4768295288085938, "step": 862 }, { "epoch": 0.4532563025210084, "grad_norm": 14.574329775895738, "learning_rate": 9.92166205362716e-06, "loss": 3.221784830093384, "step": 863 }, { "epoch": 0.453781512605042, "grad_norm": 14.831081804998556, "learning_rate": 9.921122287872715e-06, "loss": 2.8046839237213135, "step": 864 }, { "epoch": 0.4543067226890756, "grad_norm": 15.585592308183534, "learning_rate": 9.920580683728263e-06, "loss": 2.464376926422119, "step": 865 }, { "epoch": 0.45483193277310924, "grad_norm": 8.324442680101022, "learning_rate": 9.920037241396129e-06, "loss": 2.3783860206604004, "step": 866 }, { "epoch": 0.45535714285714285, "grad_norm": 8.676053686038793, "learning_rate": 9.91949196107933e-06, "loss": 2.8128809928894043, "step": 867 }, { "epoch": 0.45588235294117646, "grad_norm": 19.311087301833243, "learning_rate": 9.918944842981564e-06, "loss": 2.9722740650177, "step": 868 }, { "epoch": 0.4564075630252101, "grad_norm": 18.79648430901269, "learning_rate": 9.918395887307219e-06, "loss": 2.4842166900634766, "step": 869 }, { "epoch": 0.4569327731092437, "grad_norm": 7.373131879784598, "learning_rate": 9.917845094261372e-06, "loss": 2.549220561981201, "step": 870 }, { "epoch": 0.4574579831932773, "grad_norm": 16.88136657369639, "learning_rate": 9.91729246404978e-06, "loss": 2.625797748565674, "step": 871 }, { "epoch": 0.4579831932773109, "grad_norm": 14.363476422275891, "learning_rate": 9.916737996878894e-06, "loss": 2.4376330375671387, "step": 872 }, { "epoch": 0.4585084033613445, "grad_norm": 13.177614705966699, "learning_rate": 9.916181692955841e-06, "loss": 2.9835891723632812, "step": 873 }, { "epoch": 0.45903361344537813, "grad_norm": 14.72919808589754, "learning_rate": 9.915623552488448e-06, "loss": 2.9650497436523438, "step": 874 }, { "epoch": 0.45955882352941174, "grad_norm": 18.542956700150686, "learning_rate": 9.915063575685212e-06, "loss": 2.6515233516693115, "step": 875 }, { "epoch": 0.46008403361344535, "grad_norm": 22.222519275286896, "learning_rate": 9.914501762755328e-06, "loss": 3.233870506286621, "step": 876 }, { "epoch": 0.46060924369747897, "grad_norm": 11.454153364063668, "learning_rate": 9.913938113908675e-06, "loss": 3.0363383293151855, "step": 877 }, { "epoch": 0.46113445378151263, "grad_norm": 14.735364528868368, "learning_rate": 9.913372629355814e-06, "loss": 2.7695579528808594, "step": 878 }, { "epoch": 0.46165966386554624, "grad_norm": 81.93776974235087, "learning_rate": 9.912805309307994e-06, "loss": 3.1072168350219727, "step": 879 }, { "epoch": 0.46218487394957986, "grad_norm": 15.513658898749222, "learning_rate": 9.912236153977149e-06, "loss": 2.9209001064300537, "step": 880 }, { "epoch": 0.46271008403361347, "grad_norm": 10.31433168176148, "learning_rate": 9.9116651635759e-06, "loss": 2.876655101776123, "step": 881 }, { "epoch": 0.4632352941176471, "grad_norm": 9.705145505130226, "learning_rate": 9.911092338317552e-06, "loss": 2.7037758827209473, "step": 882 }, { "epoch": 0.4637605042016807, "grad_norm": 11.63673939883521, "learning_rate": 9.910517678416097e-06, "loss": 3.012422561645508, "step": 883 }, { "epoch": 0.4642857142857143, "grad_norm": 11.117643209724529, "learning_rate": 9.90994118408621e-06, "loss": 2.6030240058898926, "step": 884 }, { "epoch": 0.4648109243697479, "grad_norm": 12.829617835999944, "learning_rate": 9.909362855543253e-06, "loss": 2.7769675254821777, "step": 885 }, { "epoch": 0.4653361344537815, "grad_norm": 11.180701627990615, "learning_rate": 9.908782693003273e-06, "loss": 2.954176425933838, "step": 886 }, { "epoch": 0.46586134453781514, "grad_norm": 14.934695274123843, "learning_rate": 9.908200696683004e-06, "loss": 1.9919966459274292, "step": 887 }, { "epoch": 0.46638655462184875, "grad_norm": 18.64002973237268, "learning_rate": 9.907616866799862e-06, "loss": 2.660306692123413, "step": 888 }, { "epoch": 0.46691176470588236, "grad_norm": 8.698588008261643, "learning_rate": 9.907031203571948e-06, "loss": 2.636436939239502, "step": 889 }, { "epoch": 0.46743697478991597, "grad_norm": 12.701214516915318, "learning_rate": 9.90644370721805e-06, "loss": 3.0882530212402344, "step": 890 }, { "epoch": 0.4679621848739496, "grad_norm": 17.167677117516885, "learning_rate": 9.90585437795764e-06, "loss": 2.685439348220825, "step": 891 }, { "epoch": 0.4684873949579832, "grad_norm": 9.862494171319884, "learning_rate": 9.905263216010873e-06, "loss": 3.0536980628967285, "step": 892 }, { "epoch": 0.4690126050420168, "grad_norm": 28.063467363917898, "learning_rate": 9.90467022159859e-06, "loss": 2.772045612335205, "step": 893 }, { "epoch": 0.4695378151260504, "grad_norm": 13.730481186958547, "learning_rate": 9.904075394942319e-06, "loss": 2.8109469413757324, "step": 894 }, { "epoch": 0.47006302521008403, "grad_norm": 10.458735326832665, "learning_rate": 9.90347873626427e-06, "loss": 3.1038899421691895, "step": 895 }, { "epoch": 0.47058823529411764, "grad_norm": 8.942485794096514, "learning_rate": 9.902880245787335e-06, "loss": 2.6819446086883545, "step": 896 }, { "epoch": 0.47111344537815125, "grad_norm": 15.133420182005043, "learning_rate": 9.902279923735093e-06, "loss": 2.5943500995635986, "step": 897 }, { "epoch": 0.47163865546218486, "grad_norm": 44.287818347999504, "learning_rate": 9.90167777033181e-06, "loss": 2.56186580657959, "step": 898 }, { "epoch": 0.4721638655462185, "grad_norm": 12.179010723789636, "learning_rate": 9.901073785802433e-06, "loss": 2.842773914337158, "step": 899 }, { "epoch": 0.4726890756302521, "grad_norm": 10.910211922784503, "learning_rate": 9.90046797037259e-06, "loss": 2.7502799034118652, "step": 900 }, { "epoch": 0.4732142857142857, "grad_norm": 12.247715820306853, "learning_rate": 9.899860324268599e-06, "loss": 2.9904348850250244, "step": 901 }, { "epoch": 0.4737394957983193, "grad_norm": 8.397186831988368, "learning_rate": 9.899250847717458e-06, "loss": 2.300837993621826, "step": 902 }, { "epoch": 0.4742647058823529, "grad_norm": 6.757791574547933, "learning_rate": 9.89863954094685e-06, "loss": 3.03016996383667, "step": 903 }, { "epoch": 0.47478991596638653, "grad_norm": 11.149010584995784, "learning_rate": 9.898026404185142e-06, "loss": 3.3881657123565674, "step": 904 }, { "epoch": 0.47531512605042014, "grad_norm": 9.926001688009785, "learning_rate": 9.897411437661386e-06, "loss": 3.2900993824005127, "step": 905 }, { "epoch": 0.47584033613445376, "grad_norm": 14.484467714997923, "learning_rate": 9.896794641605311e-06, "loss": 2.8626081943511963, "step": 906 }, { "epoch": 0.47636554621848737, "grad_norm": 8.178069070434272, "learning_rate": 9.89617601624734e-06, "loss": 2.818047046661377, "step": 907 }, { "epoch": 0.47689075630252103, "grad_norm": 10.1232775019755, "learning_rate": 9.89555556181857e-06, "loss": 2.754782199859619, "step": 908 }, { "epoch": 0.47741596638655465, "grad_norm": 37.59288764613423, "learning_rate": 9.894933278550785e-06, "loss": 2.760875940322876, "step": 909 }, { "epoch": 0.47794117647058826, "grad_norm": 8.362808010440565, "learning_rate": 9.894309166676454e-06, "loss": 3.0588538646698, "step": 910 }, { "epoch": 0.47846638655462187, "grad_norm": 11.040059976714263, "learning_rate": 9.893683226428727e-06, "loss": 2.7186901569366455, "step": 911 }, { "epoch": 0.4789915966386555, "grad_norm": 15.884369116169045, "learning_rate": 9.893055458041435e-06, "loss": 2.453359365463257, "step": 912 }, { "epoch": 0.4795168067226891, "grad_norm": 11.150449224857894, "learning_rate": 9.8924258617491e-06, "loss": 2.9110960960388184, "step": 913 }, { "epoch": 0.4800420168067227, "grad_norm": 9.861777410746715, "learning_rate": 9.891794437786913e-06, "loss": 2.450448513031006, "step": 914 }, { "epoch": 0.4805672268907563, "grad_norm": 8.704856714943997, "learning_rate": 9.891161186390762e-06, "loss": 2.253525733947754, "step": 915 }, { "epoch": 0.4810924369747899, "grad_norm": 11.210249714468798, "learning_rate": 9.89052610779721e-06, "loss": 2.6373140811920166, "step": 916 }, { "epoch": 0.48161764705882354, "grad_norm": 11.331411616830994, "learning_rate": 9.8898892022435e-06, "loss": 2.8883016109466553, "step": 917 }, { "epoch": 0.48214285714285715, "grad_norm": 14.169413016326956, "learning_rate": 9.88925046996757e-06, "loss": 2.789289951324463, "step": 918 }, { "epoch": 0.48266806722689076, "grad_norm": 9.823475437104284, "learning_rate": 9.888609911208024e-06, "loss": 2.451218605041504, "step": 919 }, { "epoch": 0.4831932773109244, "grad_norm": 13.071774200371857, "learning_rate": 9.88796752620416e-06, "loss": 2.9068379402160645, "step": 920 }, { "epoch": 0.483718487394958, "grad_norm": 7.228222229862132, "learning_rate": 9.887323315195956e-06, "loss": 2.5686075687408447, "step": 921 }, { "epoch": 0.4842436974789916, "grad_norm": 13.97857355451438, "learning_rate": 9.886677278424067e-06, "loss": 3.3692898750305176, "step": 922 }, { "epoch": 0.4847689075630252, "grad_norm": 13.189205905556747, "learning_rate": 9.886029416129837e-06, "loss": 2.647993803024292, "step": 923 }, { "epoch": 0.4852941176470588, "grad_norm": 9.959531959726037, "learning_rate": 9.885379728555287e-06, "loss": 2.748720645904541, "step": 924 }, { "epoch": 0.48581932773109243, "grad_norm": 8.566845023372595, "learning_rate": 9.884728215943122e-06, "loss": 2.3783226013183594, "step": 925 }, { "epoch": 0.48634453781512604, "grad_norm": 10.393397255084187, "learning_rate": 9.88407487853673e-06, "loss": 2.561636209487915, "step": 926 }, { "epoch": 0.48686974789915966, "grad_norm": 6.2649523285799615, "learning_rate": 9.883419716580176e-06, "loss": 3.1514012813568115, "step": 927 }, { "epoch": 0.48739495798319327, "grad_norm": 10.114420618847973, "learning_rate": 9.882762730318211e-06, "loss": 2.6970512866973877, "step": 928 }, { "epoch": 0.4879201680672269, "grad_norm": 8.933523608286832, "learning_rate": 9.882103919996268e-06, "loss": 3.23095703125, "step": 929 }, { "epoch": 0.4884453781512605, "grad_norm": 7.397712401835694, "learning_rate": 9.88144328586046e-06, "loss": 2.8616130352020264, "step": 930 }, { "epoch": 0.4889705882352941, "grad_norm": 8.29616801424702, "learning_rate": 9.880780828157574e-06, "loss": 3.153644561767578, "step": 931 }, { "epoch": 0.4894957983193277, "grad_norm": 9.896148542582118, "learning_rate": 9.880116547135094e-06, "loss": 2.85158109664917, "step": 932 }, { "epoch": 0.4900210084033613, "grad_norm": 9.7373498009189, "learning_rate": 9.879450443041172e-06, "loss": 2.8021767139434814, "step": 933 }, { "epoch": 0.49054621848739494, "grad_norm": 18.346372110792238, "learning_rate": 9.878782516124646e-06, "loss": 2.8704347610473633, "step": 934 }, { "epoch": 0.49107142857142855, "grad_norm": 10.048721317398819, "learning_rate": 9.878112766635035e-06, "loss": 2.824406385421753, "step": 935 }, { "epoch": 0.49159663865546216, "grad_norm": 12.81702897470756, "learning_rate": 9.877441194822537e-06, "loss": 3.4565329551696777, "step": 936 }, { "epoch": 0.49212184873949577, "grad_norm": 22.2970212401919, "learning_rate": 9.876767800938032e-06, "loss": 2.3091888427734375, "step": 937 }, { "epoch": 0.49264705882352944, "grad_norm": 10.793653974784883, "learning_rate": 9.876092585233081e-06, "loss": 3.0196175575256348, "step": 938 }, { "epoch": 0.49317226890756305, "grad_norm": 10.708288001889315, "learning_rate": 9.875415547959926e-06, "loss": 2.4891157150268555, "step": 939 }, { "epoch": 0.49369747899159666, "grad_norm": 8.340641456156122, "learning_rate": 9.874736689371487e-06, "loss": 2.7188596725463867, "step": 940 }, { "epoch": 0.4942226890756303, "grad_norm": 15.558688033349009, "learning_rate": 9.874056009721367e-06, "loss": 2.205338478088379, "step": 941 }, { "epoch": 0.4947478991596639, "grad_norm": 14.595148331349147, "learning_rate": 9.87337350926385e-06, "loss": 3.069061040878296, "step": 942 }, { "epoch": 0.4952731092436975, "grad_norm": 10.50970053280589, "learning_rate": 9.872689188253895e-06, "loss": 3.2438502311706543, "step": 943 }, { "epoch": 0.4957983193277311, "grad_norm": 11.852469887801579, "learning_rate": 9.872003046947148e-06, "loss": 2.4861674308776855, "step": 944 }, { "epoch": 0.4963235294117647, "grad_norm": 12.881604766503324, "learning_rate": 9.87131508559993e-06, "loss": 2.827747344970703, "step": 945 }, { "epoch": 0.49684873949579833, "grad_norm": 31.509644437963562, "learning_rate": 9.870625304469244e-06, "loss": 3.669290065765381, "step": 946 }, { "epoch": 0.49737394957983194, "grad_norm": 11.243025059684005, "learning_rate": 9.869933703812773e-06, "loss": 2.615156650543213, "step": 947 }, { "epoch": 0.49789915966386555, "grad_norm": 10.604615572148322, "learning_rate": 9.869240283888879e-06, "loss": 3.173630475997925, "step": 948 }, { "epoch": 0.49842436974789917, "grad_norm": 9.452663488404353, "learning_rate": 9.868545044956603e-06, "loss": 2.46535062789917, "step": 949 }, { "epoch": 0.4989495798319328, "grad_norm": 14.578447019798483, "learning_rate": 9.867847987275665e-06, "loss": 2.3685851097106934, "step": 950 }, { "epoch": 0.4994747899159664, "grad_norm": 16.015486062482058, "learning_rate": 9.867149111106469e-06, "loss": 3.4084792137145996, "step": 951 }, { "epoch": 0.5, "grad_norm": 8.172727766916283, "learning_rate": 9.866448416710094e-06, "loss": 2.634758472442627, "step": 952 }, { "epoch": 0.5005252100840336, "grad_norm": 7.061421839836941, "learning_rate": 9.865745904348296e-06, "loss": 3.1642332077026367, "step": 953 }, { "epoch": 0.5010504201680672, "grad_norm": 7.863379368881913, "learning_rate": 9.865041574283519e-06, "loss": 2.55232310295105, "step": 954 }, { "epoch": 0.5015756302521008, "grad_norm": 18.318706645727392, "learning_rate": 9.864335426778877e-06, "loss": 3.0581870079040527, "step": 955 }, { "epoch": 0.5021008403361344, "grad_norm": 8.24819656304432, "learning_rate": 9.863627462098166e-06, "loss": 3.321889638900757, "step": 956 }, { "epoch": 0.5026260504201681, "grad_norm": 13.940058316546246, "learning_rate": 9.862917680505863e-06, "loss": 2.7758915424346924, "step": 957 }, { "epoch": 0.5031512605042017, "grad_norm": 7.790005951709683, "learning_rate": 9.862206082267123e-06, "loss": 2.8979663848876953, "step": 958 }, { "epoch": 0.5036764705882353, "grad_norm": 31.609464745133295, "learning_rate": 9.861492667647776e-06, "loss": 3.6580424308776855, "step": 959 }, { "epoch": 0.5042016806722689, "grad_norm": 8.822434156970964, "learning_rate": 9.860777436914334e-06, "loss": 2.9936790466308594, "step": 960 }, { "epoch": 0.5047268907563025, "grad_norm": 12.867974272732651, "learning_rate": 9.860060390333988e-06, "loss": 2.7880330085754395, "step": 961 }, { "epoch": 0.5052521008403361, "grad_norm": 10.290823503263832, "learning_rate": 9.859341528174604e-06, "loss": 2.471447467803955, "step": 962 }, { "epoch": 0.5057773109243697, "grad_norm": 12.483830449845785, "learning_rate": 9.85862085070473e-06, "loss": 3.259432792663574, "step": 963 }, { "epoch": 0.5063025210084033, "grad_norm": 14.628462597437963, "learning_rate": 9.85789835819359e-06, "loss": 2.4535512924194336, "step": 964 }, { "epoch": 0.506827731092437, "grad_norm": 7.249075888465973, "learning_rate": 9.857174050911085e-06, "loss": 2.0006537437438965, "step": 965 }, { "epoch": 0.5073529411764706, "grad_norm": 7.472949235353495, "learning_rate": 9.856447929127797e-06, "loss": 1.8998805284500122, "step": 966 }, { "epoch": 0.5078781512605042, "grad_norm": 40.11983502167645, "learning_rate": 9.855719993114983e-06, "loss": 2.6687722206115723, "step": 967 }, { "epoch": 0.5084033613445378, "grad_norm": 7.657658631890199, "learning_rate": 9.854990243144583e-06, "loss": 1.9068760871887207, "step": 968 }, { "epoch": 0.5089285714285714, "grad_norm": 10.387516235717962, "learning_rate": 9.854258679489203e-06, "loss": 3.62998104095459, "step": 969 }, { "epoch": 0.509453781512605, "grad_norm": 17.102277052214465, "learning_rate": 9.853525302422143e-06, "loss": 3.2284932136535645, "step": 970 }, { "epoch": 0.5099789915966386, "grad_norm": 13.344277208761309, "learning_rate": 9.852790112217364e-06, "loss": 2.899284839630127, "step": 971 }, { "epoch": 0.5105042016806722, "grad_norm": 10.892894759474302, "learning_rate": 9.852053109149518e-06, "loss": 2.663513660430908, "step": 972 }, { "epoch": 0.5110294117647058, "grad_norm": 14.239491096036769, "learning_rate": 9.851314293493923e-06, "loss": 2.841151237487793, "step": 973 }, { "epoch": 0.5115546218487395, "grad_norm": 9.049345457055445, "learning_rate": 9.850573665526583e-06, "loss": 3.1498963832855225, "step": 974 }, { "epoch": 0.5120798319327731, "grad_norm": 24.137043144127464, "learning_rate": 9.849831225524174e-06, "loss": 2.4141013622283936, "step": 975 }, { "epoch": 0.5126050420168067, "grad_norm": 12.293035374600452, "learning_rate": 9.849086973764049e-06, "loss": 2.7569313049316406, "step": 976 }, { "epoch": 0.5131302521008403, "grad_norm": 11.148606706292894, "learning_rate": 9.848340910524243e-06, "loss": 2.7346744537353516, "step": 977 }, { "epoch": 0.5136554621848739, "grad_norm": 11.90459404326738, "learning_rate": 9.847593036083457e-06, "loss": 2.7461304664611816, "step": 978 }, { "epoch": 0.5141806722689075, "grad_norm": 13.455479167498735, "learning_rate": 9.846843350721084e-06, "loss": 2.8733315467834473, "step": 979 }, { "epoch": 0.5147058823529411, "grad_norm": 14.47955515122071, "learning_rate": 9.846091854717179e-06, "loss": 3.1680872440338135, "step": 980 }, { "epoch": 0.5152310924369747, "grad_norm": 17.18200775081459, "learning_rate": 9.845338548352482e-06, "loss": 2.253500461578369, "step": 981 }, { "epoch": 0.5157563025210085, "grad_norm": 7.010827004001508, "learning_rate": 9.844583431908404e-06, "loss": 2.8670248985290527, "step": 982 }, { "epoch": 0.5162815126050421, "grad_norm": 11.724281288596448, "learning_rate": 9.843826505667038e-06, "loss": 2.728477954864502, "step": 983 }, { "epoch": 0.5168067226890757, "grad_norm": 7.890004234599675, "learning_rate": 9.843067769911147e-06, "loss": 2.5966458320617676, "step": 984 }, { "epoch": 0.5173319327731093, "grad_norm": 9.694420979855991, "learning_rate": 9.842307224924174e-06, "loss": 2.716907024383545, "step": 985 }, { "epoch": 0.5178571428571429, "grad_norm": 12.392850667138623, "learning_rate": 9.841544870990237e-06, "loss": 2.506190776824951, "step": 986 }, { "epoch": 0.5183823529411765, "grad_norm": 9.699664556874543, "learning_rate": 9.840780708394131e-06, "loss": 3.579655647277832, "step": 987 }, { "epoch": 0.5189075630252101, "grad_norm": 11.971835575602299, "learning_rate": 9.840014737421321e-06, "loss": 2.9419727325439453, "step": 988 }, { "epoch": 0.5194327731092437, "grad_norm": 11.32733302234166, "learning_rate": 9.839246958357957e-06, "loss": 2.8499889373779297, "step": 989 }, { "epoch": 0.5199579831932774, "grad_norm": 6.737626396843377, "learning_rate": 9.838477371490857e-06, "loss": 2.7310805320739746, "step": 990 }, { "epoch": 0.520483193277311, "grad_norm": 7.7832175497025755, "learning_rate": 9.837705977107514e-06, "loss": 2.371565103530884, "step": 991 }, { "epoch": 0.5210084033613446, "grad_norm": 13.423359517757728, "learning_rate": 9.836932775496102e-06, "loss": 2.1075167655944824, "step": 992 }, { "epoch": 0.5215336134453782, "grad_norm": 30.355785767864948, "learning_rate": 9.836157766945467e-06, "loss": 2.9195847511291504, "step": 993 }, { "epoch": 0.5220588235294118, "grad_norm": 5.9085648950482215, "learning_rate": 9.835380951745128e-06, "loss": 2.653118133544922, "step": 994 }, { "epoch": 0.5225840336134454, "grad_norm": 8.624755576924365, "learning_rate": 9.834602330185282e-06, "loss": 2.7378313541412354, "step": 995 }, { "epoch": 0.523109243697479, "grad_norm": 15.447229177233407, "learning_rate": 9.833821902556799e-06, "loss": 3.0610413551330566, "step": 996 }, { "epoch": 0.5236344537815126, "grad_norm": 14.138344909614812, "learning_rate": 9.833039669151225e-06, "loss": 2.086059331893921, "step": 997 }, { "epoch": 0.5241596638655462, "grad_norm": 25.891742154338104, "learning_rate": 9.832255630260781e-06, "loss": 2.698242664337158, "step": 998 }, { "epoch": 0.5246848739495799, "grad_norm": 13.534664830891156, "learning_rate": 9.831469786178359e-06, "loss": 2.078580141067505, "step": 999 }, { "epoch": 0.5252100840336135, "grad_norm": 20.311726552736538, "learning_rate": 9.83068213719753e-06, "loss": 3.3323657512664795, "step": 1000 }, { "epoch": 0.5257352941176471, "grad_norm": 10.316669873142736, "learning_rate": 9.829892683612535e-06, "loss": 3.5711441040039062, "step": 1001 }, { "epoch": 0.5262605042016807, "grad_norm": 11.07522520228112, "learning_rate": 9.829101425718293e-06, "loss": 3.0521323680877686, "step": 1002 }, { "epoch": 0.5267857142857143, "grad_norm": 10.12534774251354, "learning_rate": 9.828308363810392e-06, "loss": 3.1976470947265625, "step": 1003 }, { "epoch": 0.5273109243697479, "grad_norm": 11.155283266128201, "learning_rate": 9.827513498185102e-06, "loss": 2.9771289825439453, "step": 1004 }, { "epoch": 0.5278361344537815, "grad_norm": 10.864857422412713, "learning_rate": 9.826716829139358e-06, "loss": 1.943270206451416, "step": 1005 }, { "epoch": 0.5283613445378151, "grad_norm": 10.85085375472981, "learning_rate": 9.825918356970776e-06, "loss": 3.0275015830993652, "step": 1006 }, { "epoch": 0.5288865546218487, "grad_norm": 18.464630613651543, "learning_rate": 9.82511808197764e-06, "loss": 2.402885913848877, "step": 1007 }, { "epoch": 0.5294117647058824, "grad_norm": 12.385173244701438, "learning_rate": 9.82431600445891e-06, "loss": 3.268373966217041, "step": 1008 }, { "epoch": 0.529936974789916, "grad_norm": 12.78405094599589, "learning_rate": 9.82351212471422e-06, "loss": 3.336167335510254, "step": 1009 }, { "epoch": 0.5304621848739496, "grad_norm": 13.404236320599205, "learning_rate": 9.822706443043874e-06, "loss": 2.69339656829834, "step": 1010 }, { "epoch": 0.5309873949579832, "grad_norm": 5.971076613892638, "learning_rate": 9.821898959748856e-06, "loss": 2.8977434635162354, "step": 1011 }, { "epoch": 0.5315126050420168, "grad_norm": 28.995985825273255, "learning_rate": 9.821089675130816e-06, "loss": 2.4791910648345947, "step": 1012 }, { "epoch": 0.5320378151260504, "grad_norm": 10.501767999262777, "learning_rate": 9.820278589492076e-06, "loss": 2.9573850631713867, "step": 1013 }, { "epoch": 0.532563025210084, "grad_norm": 16.06805414193256, "learning_rate": 9.81946570313564e-06, "loss": 2.753512144088745, "step": 1014 }, { "epoch": 0.5330882352941176, "grad_norm": 18.288090694705243, "learning_rate": 9.818651016365177e-06, "loss": 2.611023426055908, "step": 1015 }, { "epoch": 0.5336134453781513, "grad_norm": 9.715589897537523, "learning_rate": 9.817834529485031e-06, "loss": 3.0187296867370605, "step": 1016 }, { "epoch": 0.5341386554621849, "grad_norm": 11.483477923856451, "learning_rate": 9.817016242800215e-06, "loss": 2.56632661819458, "step": 1017 }, { "epoch": 0.5346638655462185, "grad_norm": 6.972928016485257, "learning_rate": 9.816196156616422e-06, "loss": 2.8569421768188477, "step": 1018 }, { "epoch": 0.5351890756302521, "grad_norm": 23.941456471416537, "learning_rate": 9.81537427124001e-06, "loss": 3.0288453102111816, "step": 1019 }, { "epoch": 0.5357142857142857, "grad_norm": 10.271680924637222, "learning_rate": 9.814550586978012e-06, "loss": 2.467142105102539, "step": 1020 }, { "epoch": 0.5362394957983193, "grad_norm": 29.999809621685696, "learning_rate": 9.813725104138133e-06, "loss": 3.313124179840088, "step": 1021 }, { "epoch": 0.5367647058823529, "grad_norm": 11.858343928359286, "learning_rate": 9.81289782302875e-06, "loss": 2.8632054328918457, "step": 1022 }, { "epoch": 0.5372899159663865, "grad_norm": 8.625622958347869, "learning_rate": 9.812068743958912e-06, "loss": 3.371345281600952, "step": 1023 }, { "epoch": 0.5378151260504201, "grad_norm": 13.456332343689956, "learning_rate": 9.811237867238337e-06, "loss": 2.4938735961914062, "step": 1024 }, { "epoch": 0.5383403361344538, "grad_norm": 13.887856457216152, "learning_rate": 9.810405193177418e-06, "loss": 2.6703624725341797, "step": 1025 }, { "epoch": 0.5388655462184874, "grad_norm": 7.315954892490282, "learning_rate": 9.809570722087219e-06, "loss": 2.005222797393799, "step": 1026 }, { "epoch": 0.539390756302521, "grad_norm": 32.4494701372808, "learning_rate": 9.808734454279473e-06, "loss": 1.9920377731323242, "step": 1027 }, { "epoch": 0.5399159663865546, "grad_norm": 4.742357389219866, "learning_rate": 9.807896390066587e-06, "loss": 2.665032386779785, "step": 1028 }, { "epoch": 0.5404411764705882, "grad_norm": 9.890452547582996, "learning_rate": 9.807056529761637e-06, "loss": 2.693915605545044, "step": 1029 }, { "epoch": 0.5409663865546218, "grad_norm": 15.832264264465673, "learning_rate": 9.80621487367837e-06, "loss": 3.056929588317871, "step": 1030 }, { "epoch": 0.5414915966386554, "grad_norm": 7.848495756271129, "learning_rate": 9.805371422131205e-06, "loss": 3.023015022277832, "step": 1031 }, { "epoch": 0.542016806722689, "grad_norm": 11.169805960673793, "learning_rate": 9.804526175435231e-06, "loss": 2.842832088470459, "step": 1032 }, { "epoch": 0.5425420168067226, "grad_norm": 12.867254489345834, "learning_rate": 9.80367913390621e-06, "loss": 2.984562873840332, "step": 1033 }, { "epoch": 0.5430672268907563, "grad_norm": 11.364377695454339, "learning_rate": 9.80283029786057e-06, "loss": 3.140024185180664, "step": 1034 }, { "epoch": 0.5435924369747899, "grad_norm": 12.253614600348552, "learning_rate": 9.801979667615414e-06, "loss": 2.9512271881103516, "step": 1035 }, { "epoch": 0.5441176470588235, "grad_norm": 9.566896336471828, "learning_rate": 9.80112724348851e-06, "loss": 2.9276838302612305, "step": 1036 }, { "epoch": 0.5446428571428571, "grad_norm": 8.150817738952002, "learning_rate": 9.800273025798302e-06, "loss": 2.9314827919006348, "step": 1037 }, { "epoch": 0.5451680672268907, "grad_norm": 8.378956214234197, "learning_rate": 9.7994170148639e-06, "loss": 2.8263697624206543, "step": 1038 }, { "epoch": 0.5456932773109243, "grad_norm": 12.837581687837934, "learning_rate": 9.798559211005083e-06, "loss": 2.652360439300537, "step": 1039 }, { "epoch": 0.5462184873949579, "grad_norm": 17.339944801121778, "learning_rate": 9.797699614542307e-06, "loss": 2.6018738746643066, "step": 1040 }, { "epoch": 0.5467436974789915, "grad_norm": 9.574990718351845, "learning_rate": 9.796838225796688e-06, "loss": 2.670292615890503, "step": 1041 }, { "epoch": 0.5472689075630253, "grad_norm": 11.442446768722633, "learning_rate": 9.795975045090017e-06, "loss": 2.682929039001465, "step": 1042 }, { "epoch": 0.5477941176470589, "grad_norm": 6.5451412317335835, "learning_rate": 9.795110072744756e-06, "loss": 2.639051914215088, "step": 1043 }, { "epoch": 0.5483193277310925, "grad_norm": 6.994297772162765, "learning_rate": 9.794243309084031e-06, "loss": 2.4831275939941406, "step": 1044 }, { "epoch": 0.5488445378151261, "grad_norm": 10.901868740772803, "learning_rate": 9.793374754431642e-06, "loss": 3.0070412158966064, "step": 1045 }, { "epoch": 0.5493697478991597, "grad_norm": 9.049053594948145, "learning_rate": 9.792504409112054e-06, "loss": 2.586428642272949, "step": 1046 }, { "epoch": 0.5498949579831933, "grad_norm": 10.556684856379588, "learning_rate": 9.791632273450405e-06, "loss": 2.67769193649292, "step": 1047 }, { "epoch": 0.5504201680672269, "grad_norm": 12.770715147815533, "learning_rate": 9.790758347772498e-06, "loss": 2.2092323303222656, "step": 1048 }, { "epoch": 0.5509453781512605, "grad_norm": 13.465225428274353, "learning_rate": 9.789882632404809e-06, "loss": 2.8107967376708984, "step": 1049 }, { "epoch": 0.5514705882352942, "grad_norm": 13.452802113852654, "learning_rate": 9.789005127674478e-06, "loss": 3.391347885131836, "step": 1050 }, { "epoch": 0.5519957983193278, "grad_norm": 8.405309040460729, "learning_rate": 9.788125833909316e-06, "loss": 2.8124945163726807, "step": 1051 }, { "epoch": 0.5525210084033614, "grad_norm": 13.498104235942153, "learning_rate": 9.787244751437802e-06, "loss": 2.5720763206481934, "step": 1052 }, { "epoch": 0.553046218487395, "grad_norm": 8.942492315347012, "learning_rate": 9.786361880589084e-06, "loss": 3.0965919494628906, "step": 1053 }, { "epoch": 0.5535714285714286, "grad_norm": 6.713967963971811, "learning_rate": 9.785477221692976e-06, "loss": 2.4715917110443115, "step": 1054 }, { "epoch": 0.5540966386554622, "grad_norm": 19.302702706185897, "learning_rate": 9.784590775079964e-06, "loss": 3.064605951309204, "step": 1055 }, { "epoch": 0.5546218487394958, "grad_norm": 12.641159395376391, "learning_rate": 9.783702541081192e-06, "loss": 2.564366102218628, "step": 1056 }, { "epoch": 0.5551470588235294, "grad_norm": 11.001270774413294, "learning_rate": 9.782812520028487e-06, "loss": 2.6874303817749023, "step": 1057 }, { "epoch": 0.555672268907563, "grad_norm": 11.818495269271782, "learning_rate": 9.781920712254332e-06, "loss": 2.3377063274383545, "step": 1058 }, { "epoch": 0.5561974789915967, "grad_norm": 9.477767430911584, "learning_rate": 9.781027118091879e-06, "loss": 2.503865957260132, "step": 1059 }, { "epoch": 0.5567226890756303, "grad_norm": 13.254389673708763, "learning_rate": 9.780131737874949e-06, "loss": 2.5261001586914062, "step": 1060 }, { "epoch": 0.5572478991596639, "grad_norm": 7.470005500306703, "learning_rate": 9.779234571938034e-06, "loss": 2.468996047973633, "step": 1061 }, { "epoch": 0.5577731092436975, "grad_norm": 13.319679733182621, "learning_rate": 9.778335620616289e-06, "loss": 2.8894853591918945, "step": 1062 }, { "epoch": 0.5582983193277311, "grad_norm": 11.54918471756888, "learning_rate": 9.777434884245533e-06, "loss": 2.8393735885620117, "step": 1063 }, { "epoch": 0.5588235294117647, "grad_norm": 12.172219585570327, "learning_rate": 9.776532363162257e-06, "loss": 2.831796407699585, "step": 1064 }, { "epoch": 0.5593487394957983, "grad_norm": 9.415239467747737, "learning_rate": 9.775628057703616e-06, "loss": 2.703184127807617, "step": 1065 }, { "epoch": 0.5598739495798319, "grad_norm": 11.61775890406432, "learning_rate": 9.774721968207437e-06, "loss": 2.5092737674713135, "step": 1066 }, { "epoch": 0.5603991596638656, "grad_norm": 10.469216639388009, "learning_rate": 9.773814095012202e-06, "loss": 2.348267078399658, "step": 1067 }, { "epoch": 0.5609243697478992, "grad_norm": 9.0730502251458, "learning_rate": 9.772904438457071e-06, "loss": 2.873717784881592, "step": 1068 }, { "epoch": 0.5614495798319328, "grad_norm": 8.565259361577203, "learning_rate": 9.771992998881865e-06, "loss": 2.824530839920044, "step": 1069 }, { "epoch": 0.5619747899159664, "grad_norm": 19.17277434009704, "learning_rate": 9.771079776627072e-06, "loss": 3.1064677238464355, "step": 1070 }, { "epoch": 0.5625, "grad_norm": 8.140366855205347, "learning_rate": 9.770164772033845e-06, "loss": 2.7749385833740234, "step": 1071 }, { "epoch": 0.5630252100840336, "grad_norm": 13.198742671609411, "learning_rate": 9.769247985444002e-06, "loss": 3.2205395698547363, "step": 1072 }, { "epoch": 0.5635504201680672, "grad_norm": 8.367019081966903, "learning_rate": 9.768329417200029e-06, "loss": 2.4534449577331543, "step": 1073 }, { "epoch": 0.5640756302521008, "grad_norm": 7.681181918194523, "learning_rate": 9.767409067645078e-06, "loss": 2.577038526535034, "step": 1074 }, { "epoch": 0.5646008403361344, "grad_norm": 17.178070210175466, "learning_rate": 9.766486937122964e-06, "loss": 2.671081066131592, "step": 1075 }, { "epoch": 0.5651260504201681, "grad_norm": 9.812669714608955, "learning_rate": 9.765563025978169e-06, "loss": 2.4747838973999023, "step": 1076 }, { "epoch": 0.5656512605042017, "grad_norm": 12.844645992485237, "learning_rate": 9.76463733455584e-06, "loss": 2.7280406951904297, "step": 1077 }, { "epoch": 0.5661764705882353, "grad_norm": 20.60693930535657, "learning_rate": 9.763709863201789e-06, "loss": 2.888597011566162, "step": 1078 }, { "epoch": 0.5667016806722689, "grad_norm": 22.08845641204313, "learning_rate": 9.76278061226249e-06, "loss": 2.7614669799804688, "step": 1079 }, { "epoch": 0.5672268907563025, "grad_norm": 7.83779863883514, "learning_rate": 9.761849582085086e-06, "loss": 3.2007977962493896, "step": 1080 }, { "epoch": 0.5677521008403361, "grad_norm": 8.731312539445284, "learning_rate": 9.760916773017386e-06, "loss": 2.5763912200927734, "step": 1081 }, { "epoch": 0.5682773109243697, "grad_norm": 13.329280642009538, "learning_rate": 9.759982185407855e-06, "loss": 2.9799554347991943, "step": 1082 }, { "epoch": 0.5688025210084033, "grad_norm": 9.73542764189317, "learning_rate": 9.759045819605635e-06, "loss": 2.7777011394500732, "step": 1083 }, { "epoch": 0.569327731092437, "grad_norm": 15.11561949994785, "learning_rate": 9.758107675960518e-06, "loss": 3.3915722370147705, "step": 1084 }, { "epoch": 0.5698529411764706, "grad_norm": 8.70761881829688, "learning_rate": 9.757167754822974e-06, "loss": 2.9978628158569336, "step": 1085 }, { "epoch": 0.5703781512605042, "grad_norm": 8.89129308068863, "learning_rate": 9.756226056544128e-06, "loss": 3.0361058712005615, "step": 1086 }, { "epoch": 0.5709033613445378, "grad_norm": 11.70241312709555, "learning_rate": 9.755282581475769e-06, "loss": 2.65017032623291, "step": 1087 }, { "epoch": 0.5714285714285714, "grad_norm": 16.74924117531719, "learning_rate": 9.754337329970355e-06, "loss": 2.61814284324646, "step": 1088 }, { "epoch": 0.571953781512605, "grad_norm": 9.32895799787225, "learning_rate": 9.753390302381006e-06, "loss": 2.870701789855957, "step": 1089 }, { "epoch": 0.5724789915966386, "grad_norm": 22.462449181708536, "learning_rate": 9.7524414990615e-06, "loss": 4.07295036315918, "step": 1090 }, { "epoch": 0.5730042016806722, "grad_norm": 18.906745516318605, "learning_rate": 9.751490920366287e-06, "loss": 3.0024361610412598, "step": 1091 }, { "epoch": 0.5735294117647058, "grad_norm": 10.263059841388436, "learning_rate": 9.750538566650474e-06, "loss": 2.908892869949341, "step": 1092 }, { "epoch": 0.5740546218487395, "grad_norm": 18.951927624919907, "learning_rate": 9.749584438269833e-06, "loss": 2.9717984199523926, "step": 1093 }, { "epoch": 0.5745798319327731, "grad_norm": 8.213729091608988, "learning_rate": 9.748628535580798e-06, "loss": 2.613170623779297, "step": 1094 }, { "epoch": 0.5751050420168067, "grad_norm": 11.704280389856635, "learning_rate": 9.747670858940468e-06, "loss": 2.078094959259033, "step": 1095 }, { "epoch": 0.5756302521008403, "grad_norm": 8.876822687488534, "learning_rate": 9.746711408706607e-06, "loss": 1.7200992107391357, "step": 1096 }, { "epoch": 0.5761554621848739, "grad_norm": 9.230253639578661, "learning_rate": 9.74575018523763e-06, "loss": 2.741290330886841, "step": 1097 }, { "epoch": 0.5766806722689075, "grad_norm": 12.412888033861083, "learning_rate": 9.74478718889263e-06, "loss": 2.882063865661621, "step": 1098 }, { "epoch": 0.5772058823529411, "grad_norm": 9.543578956177342, "learning_rate": 9.74382242003135e-06, "loss": 2.2171170711517334, "step": 1099 }, { "epoch": 0.5777310924369747, "grad_norm": 15.084306947965192, "learning_rate": 9.742855879014202e-06, "loss": 2.855663776397705, "step": 1100 }, { "epoch": 0.5782563025210085, "grad_norm": 14.23142601301457, "learning_rate": 9.741887566202259e-06, "loss": 2.897360324859619, "step": 1101 }, { "epoch": 0.5787815126050421, "grad_norm": 13.059435415100035, "learning_rate": 9.740917481957253e-06, "loss": 3.0059051513671875, "step": 1102 }, { "epoch": 0.5793067226890757, "grad_norm": 8.932422966842017, "learning_rate": 9.739945626641579e-06, "loss": 2.6640849113464355, "step": 1103 }, { "epoch": 0.5798319327731093, "grad_norm": 9.796607570591627, "learning_rate": 9.738972000618296e-06, "loss": 2.855018377304077, "step": 1104 }, { "epoch": 0.5803571428571429, "grad_norm": 16.558214916245234, "learning_rate": 9.737996604251124e-06, "loss": 2.8122780323028564, "step": 1105 }, { "epoch": 0.5808823529411765, "grad_norm": 11.418000282461028, "learning_rate": 9.737019437904438e-06, "loss": 2.5800719261169434, "step": 1106 }, { "epoch": 0.5814075630252101, "grad_norm": 8.156757069327533, "learning_rate": 9.736040501943285e-06, "loss": 2.724315643310547, "step": 1107 }, { "epoch": 0.5819327731092437, "grad_norm": 17.815625375197854, "learning_rate": 9.735059796733363e-06, "loss": 3.6219096183776855, "step": 1108 }, { "epoch": 0.5824579831932774, "grad_norm": 10.501857298537288, "learning_rate": 9.73407732264104e-06, "loss": 3.04555082321167, "step": 1109 }, { "epoch": 0.582983193277311, "grad_norm": 9.555867042957924, "learning_rate": 9.733093080033335e-06, "loss": 2.9941024780273438, "step": 1110 }, { "epoch": 0.5835084033613446, "grad_norm": 11.498471043031197, "learning_rate": 9.732107069277936e-06, "loss": 2.79038667678833, "step": 1111 }, { "epoch": 0.5840336134453782, "grad_norm": 18.205145944991067, "learning_rate": 9.731119290743188e-06, "loss": 2.674743413925171, "step": 1112 }, { "epoch": 0.5845588235294118, "grad_norm": 8.555916624666907, "learning_rate": 9.730129744798096e-06, "loss": 2.9587817192077637, "step": 1113 }, { "epoch": 0.5850840336134454, "grad_norm": 8.748068827463866, "learning_rate": 9.729138431812327e-06, "loss": 2.8857421875, "step": 1114 }, { "epoch": 0.585609243697479, "grad_norm": 7.666558845996576, "learning_rate": 9.728145352156203e-06, "loss": 2.5033679008483887, "step": 1115 }, { "epoch": 0.5861344537815126, "grad_norm": 20.32998550651072, "learning_rate": 9.727150506200715e-06, "loss": 3.6410534381866455, "step": 1116 }, { "epoch": 0.5866596638655462, "grad_norm": 7.739201012658456, "learning_rate": 9.726153894317508e-06, "loss": 2.701643943786621, "step": 1117 }, { "epoch": 0.5871848739495799, "grad_norm": 11.367614124351507, "learning_rate": 9.725155516878886e-06, "loss": 3.276360273361206, "step": 1118 }, { "epoch": 0.5877100840336135, "grad_norm": 11.862499535452493, "learning_rate": 9.724155374257816e-06, "loss": 2.570164203643799, "step": 1119 }, { "epoch": 0.5882352941176471, "grad_norm": 13.119065243909068, "learning_rate": 9.72315346682792e-06, "loss": 2.958350419998169, "step": 1120 }, { "epoch": 0.5887605042016807, "grad_norm": 19.464185695283074, "learning_rate": 9.722149794963483e-06, "loss": 2.5229337215423584, "step": 1121 }, { "epoch": 0.5892857142857143, "grad_norm": 8.71824757303977, "learning_rate": 9.721144359039448e-06, "loss": 2.857860565185547, "step": 1122 }, { "epoch": 0.5898109243697479, "grad_norm": 7.191404205099183, "learning_rate": 9.720137159431418e-06, "loss": 3.283879518508911, "step": 1123 }, { "epoch": 0.5903361344537815, "grad_norm": 10.53641421236998, "learning_rate": 9.719128196515653e-06, "loss": 2.851640224456787, "step": 1124 }, { "epoch": 0.5908613445378151, "grad_norm": 13.396472099026342, "learning_rate": 9.718117470669072e-06, "loss": 3.0612597465515137, "step": 1125 }, { "epoch": 0.5913865546218487, "grad_norm": 10.271472877546353, "learning_rate": 9.717104982269257e-06, "loss": 2.798976421356201, "step": 1126 }, { "epoch": 0.5919117647058824, "grad_norm": 12.100564214276803, "learning_rate": 9.716090731694439e-06, "loss": 2.4026825428009033, "step": 1127 }, { "epoch": 0.592436974789916, "grad_norm": 16.904155491225104, "learning_rate": 9.715074719323515e-06, "loss": 3.1392836570739746, "step": 1128 }, { "epoch": 0.5929621848739496, "grad_norm": 12.195628748562735, "learning_rate": 9.714056945536039e-06, "loss": 2.194385051727295, "step": 1129 }, { "epoch": 0.5934873949579832, "grad_norm": 9.206581903261434, "learning_rate": 9.713037410712222e-06, "loss": 2.4214446544647217, "step": 1130 }, { "epoch": 0.5940126050420168, "grad_norm": 12.966888511805234, "learning_rate": 9.712016115232932e-06, "loss": 2.4819564819335938, "step": 1131 }, { "epoch": 0.5945378151260504, "grad_norm": 9.888297193550352, "learning_rate": 9.710993059479695e-06, "loss": 3.1289210319519043, "step": 1132 }, { "epoch": 0.595063025210084, "grad_norm": 10.95708527935894, "learning_rate": 9.709968243834698e-06, "loss": 2.821739673614502, "step": 1133 }, { "epoch": 0.5955882352941176, "grad_norm": 13.524970991193987, "learning_rate": 9.70894166868078e-06, "loss": 3.322603702545166, "step": 1134 }, { "epoch": 0.5961134453781513, "grad_norm": 8.123611701582872, "learning_rate": 9.707913334401441e-06, "loss": 2.6867642402648926, "step": 1135 }, { "epoch": 0.5966386554621849, "grad_norm": 37.803610433950304, "learning_rate": 9.706883241380838e-06, "loss": 2.36342191696167, "step": 1136 }, { "epoch": 0.5971638655462185, "grad_norm": 9.012080624683833, "learning_rate": 9.705851390003783e-06, "loss": 2.456573486328125, "step": 1137 }, { "epoch": 0.5976890756302521, "grad_norm": 8.45889112860806, "learning_rate": 9.704817780655746e-06, "loss": 2.8405826091766357, "step": 1138 }, { "epoch": 0.5982142857142857, "grad_norm": 15.385597854811314, "learning_rate": 9.703782413722856e-06, "loss": 2.8452396392822266, "step": 1139 }, { "epoch": 0.5987394957983193, "grad_norm": 12.92550188250134, "learning_rate": 9.702745289591892e-06, "loss": 2.7421834468841553, "step": 1140 }, { "epoch": 0.5992647058823529, "grad_norm": 12.738073299096635, "learning_rate": 9.7017064086503e-06, "loss": 3.2678005695343018, "step": 1141 }, { "epoch": 0.5997899159663865, "grad_norm": 8.289371569526935, "learning_rate": 9.70066577128617e-06, "loss": 2.658457040786743, "step": 1142 }, { "epoch": 0.6003151260504201, "grad_norm": 10.833496630831867, "learning_rate": 9.699623377888256e-06, "loss": 2.4036011695861816, "step": 1143 }, { "epoch": 0.6008403361344538, "grad_norm": 15.492760361146278, "learning_rate": 9.69857922884597e-06, "loss": 2.4981842041015625, "step": 1144 }, { "epoch": 0.6013655462184874, "grad_norm": 12.11767186572831, "learning_rate": 9.697533324549371e-06, "loss": 3.1888647079467773, "step": 1145 }, { "epoch": 0.601890756302521, "grad_norm": 10.573838040176359, "learning_rate": 9.69648566538918e-06, "loss": 2.7450263500213623, "step": 1146 }, { "epoch": 0.6024159663865546, "grad_norm": 22.53615825568267, "learning_rate": 9.695436251756775e-06, "loss": 3.4375998973846436, "step": 1147 }, { "epoch": 0.6029411764705882, "grad_norm": 13.678486235698054, "learning_rate": 9.694385084044185e-06, "loss": 3.148430347442627, "step": 1148 }, { "epoch": 0.6034663865546218, "grad_norm": 15.516844451611417, "learning_rate": 9.693332162644095e-06, "loss": 2.8116562366485596, "step": 1149 }, { "epoch": 0.6039915966386554, "grad_norm": 9.826042722686092, "learning_rate": 9.692277487949849e-06, "loss": 2.690570831298828, "step": 1150 }, { "epoch": 0.604516806722689, "grad_norm": 13.307863596273979, "learning_rate": 9.69122106035544e-06, "loss": 2.812605857849121, "step": 1151 }, { "epoch": 0.6050420168067226, "grad_norm": 11.440540259437942, "learning_rate": 9.690162880255521e-06, "loss": 3.1070003509521484, "step": 1152 }, { "epoch": 0.6055672268907563, "grad_norm": 12.259735929652518, "learning_rate": 9.689102948045398e-06, "loss": 3.474421501159668, "step": 1153 }, { "epoch": 0.6060924369747899, "grad_norm": 10.129940184206724, "learning_rate": 9.688041264121031e-06, "loss": 3.1540329456329346, "step": 1154 }, { "epoch": 0.6066176470588235, "grad_norm": 10.913585193843712, "learning_rate": 9.686977828879033e-06, "loss": 2.887948989868164, "step": 1155 }, { "epoch": 0.6071428571428571, "grad_norm": 13.432164188694037, "learning_rate": 9.685912642716674e-06, "loss": 2.8469412326812744, "step": 1156 }, { "epoch": 0.6076680672268907, "grad_norm": 11.19226800007551, "learning_rate": 9.684845706031878e-06, "loss": 2.606762170791626, "step": 1157 }, { "epoch": 0.6081932773109243, "grad_norm": 7.666606187379233, "learning_rate": 9.683777019223219e-06, "loss": 2.2800896167755127, "step": 1158 }, { "epoch": 0.6087184873949579, "grad_norm": 8.199448460822813, "learning_rate": 9.682706582689932e-06, "loss": 2.4749555587768555, "step": 1159 }, { "epoch": 0.6092436974789915, "grad_norm": 13.320939746045067, "learning_rate": 9.681634396831899e-06, "loss": 3.058583974838257, "step": 1160 }, { "epoch": 0.6097689075630253, "grad_norm": 7.818142107419255, "learning_rate": 9.680560462049657e-06, "loss": 2.5522444248199463, "step": 1161 }, { "epoch": 0.6102941176470589, "grad_norm": 15.02939473129863, "learning_rate": 9.679484778744396e-06, "loss": 2.9182796478271484, "step": 1162 }, { "epoch": 0.6108193277310925, "grad_norm": 6.127233982969524, "learning_rate": 9.678407347317967e-06, "loss": 2.725381374359131, "step": 1163 }, { "epoch": 0.6113445378151261, "grad_norm": 20.265398877815926, "learning_rate": 9.677328168172858e-06, "loss": 3.0166091918945312, "step": 1164 }, { "epoch": 0.6118697478991597, "grad_norm": 12.918754190555548, "learning_rate": 9.676247241712228e-06, "loss": 2.6854474544525146, "step": 1165 }, { "epoch": 0.6123949579831933, "grad_norm": 12.011574482145152, "learning_rate": 9.675164568339875e-06, "loss": 2.832428455352783, "step": 1166 }, { "epoch": 0.6129201680672269, "grad_norm": 16.585008813925967, "learning_rate": 9.674080148460257e-06, "loss": 3.083036422729492, "step": 1167 }, { "epoch": 0.6134453781512605, "grad_norm": 33.33992251292681, "learning_rate": 9.67299398247848e-06, "loss": 3.9661097526550293, "step": 1168 }, { "epoch": 0.6139705882352942, "grad_norm": 10.746932335764692, "learning_rate": 9.671906070800307e-06, "loss": 2.783630132675171, "step": 1169 }, { "epoch": 0.6144957983193278, "grad_norm": 9.840503537133605, "learning_rate": 9.670816413832147e-06, "loss": 2.8105361461639404, "step": 1170 }, { "epoch": 0.6150210084033614, "grad_norm": 13.050816812308216, "learning_rate": 9.669725011981068e-06, "loss": 2.0451159477233887, "step": 1171 }, { "epoch": 0.615546218487395, "grad_norm": 12.585068193664572, "learning_rate": 9.668631865654786e-06, "loss": 2.873997688293457, "step": 1172 }, { "epoch": 0.6160714285714286, "grad_norm": 15.938353723255233, "learning_rate": 9.667536975261667e-06, "loss": 3.156132936477661, "step": 1173 }, { "epoch": 0.6165966386554622, "grad_norm": 10.430976638419382, "learning_rate": 9.666440341210732e-06, "loss": 2.876246929168701, "step": 1174 }, { "epoch": 0.6171218487394958, "grad_norm": 7.6978361066179115, "learning_rate": 9.665341963911653e-06, "loss": 3.0936636924743652, "step": 1175 }, { "epoch": 0.6176470588235294, "grad_norm": 22.572543740555304, "learning_rate": 9.66424184377475e-06, "loss": 3.228053092956543, "step": 1176 }, { "epoch": 0.618172268907563, "grad_norm": 8.045368216788974, "learning_rate": 9.663139981210998e-06, "loss": 2.4929630756378174, "step": 1177 }, { "epoch": 0.6186974789915967, "grad_norm": 14.198458888679136, "learning_rate": 9.66203637663202e-06, "loss": 3.2256741523742676, "step": 1178 }, { "epoch": 0.6192226890756303, "grad_norm": 8.97046477193514, "learning_rate": 9.660931030450092e-06, "loss": 2.9510498046875, "step": 1179 }, { "epoch": 0.6197478991596639, "grad_norm": 8.751914637591376, "learning_rate": 9.65982394307814e-06, "loss": 2.904456853866577, "step": 1180 }, { "epoch": 0.6202731092436975, "grad_norm": 6.6941921577657375, "learning_rate": 9.658715114929737e-06, "loss": 2.760448455810547, "step": 1181 }, { "epoch": 0.6207983193277311, "grad_norm": 7.5692966304314435, "learning_rate": 9.657604546419114e-06, "loss": 2.975520610809326, "step": 1182 }, { "epoch": 0.6213235294117647, "grad_norm": 29.173755091178357, "learning_rate": 9.656492237961143e-06, "loss": 1.9504122734069824, "step": 1183 }, { "epoch": 0.6218487394957983, "grad_norm": 20.213219631267187, "learning_rate": 9.655378189971354e-06, "loss": 2.0044260025024414, "step": 1184 }, { "epoch": 0.6223739495798319, "grad_norm": 10.135311570606858, "learning_rate": 9.654262402865922e-06, "loss": 3.064586639404297, "step": 1185 }, { "epoch": 0.6228991596638656, "grad_norm": 10.221859142786899, "learning_rate": 9.653144877061672e-06, "loss": 2.7229180335998535, "step": 1186 }, { "epoch": 0.6234243697478992, "grad_norm": 13.950082720956196, "learning_rate": 9.652025612976082e-06, "loss": 2.699241876602173, "step": 1187 }, { "epoch": 0.6239495798319328, "grad_norm": 17.39937903729645, "learning_rate": 9.650904611027275e-06, "loss": 3.4351096153259277, "step": 1188 }, { "epoch": 0.6244747899159664, "grad_norm": 8.560759495398628, "learning_rate": 9.649781871634025e-06, "loss": 2.8936972618103027, "step": 1189 }, { "epoch": 0.625, "grad_norm": 11.049356382816127, "learning_rate": 9.648657395215756e-06, "loss": 3.0585176944732666, "step": 1190 }, { "epoch": 0.6255252100840336, "grad_norm": 17.913817531741632, "learning_rate": 9.647531182192542e-06, "loss": 2.4928512573242188, "step": 1191 }, { "epoch": 0.6260504201680672, "grad_norm": 10.590209884627793, "learning_rate": 9.6464032329851e-06, "loss": 3.0131940841674805, "step": 1192 }, { "epoch": 0.6265756302521008, "grad_norm": 8.177485325467538, "learning_rate": 9.6452735480148e-06, "loss": 1.599998116493225, "step": 1193 }, { "epoch": 0.6271008403361344, "grad_norm": 6.972074972722784, "learning_rate": 9.644142127703662e-06, "loss": 2.8549647331237793, "step": 1194 }, { "epoch": 0.6276260504201681, "grad_norm": 7.967186455233471, "learning_rate": 9.643008972474352e-06, "loss": 2.789647102355957, "step": 1195 }, { "epoch": 0.6281512605042017, "grad_norm": 10.401127520833763, "learning_rate": 9.641874082750185e-06, "loss": 2.69581937789917, "step": 1196 }, { "epoch": 0.6286764705882353, "grad_norm": 11.500047151382882, "learning_rate": 9.64073745895512e-06, "loss": 2.8014025688171387, "step": 1197 }, { "epoch": 0.6292016806722689, "grad_norm": 12.130292686014862, "learning_rate": 9.639599101513769e-06, "loss": 2.4452357292175293, "step": 1198 }, { "epoch": 0.6297268907563025, "grad_norm": 31.67487720255507, "learning_rate": 9.638459010851389e-06, "loss": 2.4740800857543945, "step": 1199 }, { "epoch": 0.6302521008403361, "grad_norm": 13.336000861839528, "learning_rate": 9.637317187393885e-06, "loss": 2.855738878250122, "step": 1200 }, { "epoch": 0.6307773109243697, "grad_norm": 10.426998643566288, "learning_rate": 9.636173631567812e-06, "loss": 2.6344735622406006, "step": 1201 }, { "epoch": 0.6313025210084033, "grad_norm": 6.536286840045967, "learning_rate": 9.635028343800365e-06, "loss": 2.8178374767303467, "step": 1202 }, { "epoch": 0.631827731092437, "grad_norm": 9.08335092165054, "learning_rate": 9.633881324519397e-06, "loss": 2.8191640377044678, "step": 1203 }, { "epoch": 0.6323529411764706, "grad_norm": 8.273507395941941, "learning_rate": 9.632732574153393e-06, "loss": 2.8788299560546875, "step": 1204 }, { "epoch": 0.6328781512605042, "grad_norm": 8.536280407992857, "learning_rate": 9.631582093131501e-06, "loss": 2.449352264404297, "step": 1205 }, { "epoch": 0.6334033613445378, "grad_norm": 17.05668954424723, "learning_rate": 9.630429881883506e-06, "loss": 2.9622299671173096, "step": 1206 }, { "epoch": 0.6339285714285714, "grad_norm": 16.360472118594963, "learning_rate": 9.629275940839838e-06, "loss": 3.2144505977630615, "step": 1207 }, { "epoch": 0.634453781512605, "grad_norm": 36.55273344134769, "learning_rate": 9.628120270431579e-06, "loss": 2.3319010734558105, "step": 1208 }, { "epoch": 0.6349789915966386, "grad_norm": 8.306416270383187, "learning_rate": 9.62696287109045e-06, "loss": 2.8658809661865234, "step": 1209 }, { "epoch": 0.6355042016806722, "grad_norm": 13.216888673053313, "learning_rate": 9.625803743248828e-06, "loss": 2.698309898376465, "step": 1210 }, { "epoch": 0.6360294117647058, "grad_norm": 12.55219430082199, "learning_rate": 9.624642887339726e-06, "loss": 2.3321962356567383, "step": 1211 }, { "epoch": 0.6365546218487395, "grad_norm": 13.689849334238781, "learning_rate": 9.623480303796809e-06, "loss": 3.1201837062835693, "step": 1212 }, { "epoch": 0.6370798319327731, "grad_norm": 7.734765266763488, "learning_rate": 9.622315993054384e-06, "loss": 3.217038154602051, "step": 1213 }, { "epoch": 0.6376050420168067, "grad_norm": 6.64830203506993, "learning_rate": 9.621149955547401e-06, "loss": 2.8321728706359863, "step": 1214 }, { "epoch": 0.6381302521008403, "grad_norm": 9.338230711426522, "learning_rate": 9.619982191711462e-06, "loss": 2.633908271789551, "step": 1215 }, { "epoch": 0.6386554621848739, "grad_norm": 13.63438630462051, "learning_rate": 9.618812701982808e-06, "loss": 3.059248924255371, "step": 1216 }, { "epoch": 0.6391806722689075, "grad_norm": 14.973627433044415, "learning_rate": 9.61764148679833e-06, "loss": 3.526207685470581, "step": 1217 }, { "epoch": 0.6397058823529411, "grad_norm": 9.72191311741727, "learning_rate": 9.616468546595556e-06, "loss": 2.847991466522217, "step": 1218 }, { "epoch": 0.6402310924369747, "grad_norm": 9.733912205577893, "learning_rate": 9.615293881812666e-06, "loss": 2.928663492202759, "step": 1219 }, { "epoch": 0.6407563025210085, "grad_norm": 15.627623677478192, "learning_rate": 9.614117492888479e-06, "loss": 3.010359525680542, "step": 1220 }, { "epoch": 0.6412815126050421, "grad_norm": 8.65174457200257, "learning_rate": 9.61293938026246e-06, "loss": 2.901470184326172, "step": 1221 }, { "epoch": 0.6418067226890757, "grad_norm": 23.717025681574377, "learning_rate": 9.611759544374719e-06, "loss": 3.327909231185913, "step": 1222 }, { "epoch": 0.6423319327731093, "grad_norm": 23.457512475487516, "learning_rate": 9.61057798566601e-06, "loss": 2.769148588180542, "step": 1223 }, { "epoch": 0.6428571428571429, "grad_norm": 11.314647776230938, "learning_rate": 9.609394704577728e-06, "loss": 2.668102502822876, "step": 1224 }, { "epoch": 0.6433823529411765, "grad_norm": 9.855746697379743, "learning_rate": 9.608209701551913e-06, "loss": 2.606781482696533, "step": 1225 }, { "epoch": 0.6439075630252101, "grad_norm": 12.64896610603209, "learning_rate": 9.607022977031247e-06, "loss": 2.465226173400879, "step": 1226 }, { "epoch": 0.6444327731092437, "grad_norm": 12.015146340376507, "learning_rate": 9.60583453145906e-06, "loss": 1.7274982929229736, "step": 1227 }, { "epoch": 0.6449579831932774, "grad_norm": 12.108841692254453, "learning_rate": 9.604644365279316e-06, "loss": 2.740081310272217, "step": 1228 }, { "epoch": 0.645483193277311, "grad_norm": 10.128874048559293, "learning_rate": 9.60345247893663e-06, "loss": 2.892533302307129, "step": 1229 }, { "epoch": 0.6460084033613446, "grad_norm": 12.230768852722859, "learning_rate": 9.602258872876256e-06, "loss": 2.738703727722168, "step": 1230 }, { "epoch": 0.6465336134453782, "grad_norm": 13.988678221971234, "learning_rate": 9.60106354754409e-06, "loss": 2.6789562702178955, "step": 1231 }, { "epoch": 0.6470588235294118, "grad_norm": 10.144174129686618, "learning_rate": 9.599866503386673e-06, "loss": 2.8041086196899414, "step": 1232 }, { "epoch": 0.6475840336134454, "grad_norm": 11.550532444108269, "learning_rate": 9.598667740851187e-06, "loss": 2.9156932830810547, "step": 1233 }, { "epoch": 0.648109243697479, "grad_norm": 13.964702510735874, "learning_rate": 9.597467260385452e-06, "loss": 2.6370129585266113, "step": 1234 }, { "epoch": 0.6486344537815126, "grad_norm": 17.928296596902026, "learning_rate": 9.596265062437933e-06, "loss": 3.0181162357330322, "step": 1235 }, { "epoch": 0.6491596638655462, "grad_norm": 7.731089233325417, "learning_rate": 9.59506114745774e-06, "loss": 2.9872469902038574, "step": 1236 }, { "epoch": 0.6496848739495799, "grad_norm": 10.283607724998227, "learning_rate": 9.59385551589462e-06, "loss": 2.948763608932495, "step": 1237 }, { "epoch": 0.6502100840336135, "grad_norm": 17.150723223022805, "learning_rate": 9.592648168198961e-06, "loss": 2.2981417179107666, "step": 1238 }, { "epoch": 0.6507352941176471, "grad_norm": 20.940451908364867, "learning_rate": 9.591439104821795e-06, "loss": 3.1842682361602783, "step": 1239 }, { "epoch": 0.6512605042016807, "grad_norm": 8.770053306727121, "learning_rate": 9.590228326214794e-06, "loss": 2.823775053024292, "step": 1240 }, { "epoch": 0.6517857142857143, "grad_norm": 12.110727698299284, "learning_rate": 9.589015832830267e-06, "loss": 2.3811817169189453, "step": 1241 }, { "epoch": 0.6523109243697479, "grad_norm": 9.106069774662254, "learning_rate": 9.587801625121167e-06, "loss": 2.5018489360809326, "step": 1242 }, { "epoch": 0.6528361344537815, "grad_norm": 21.827303844607535, "learning_rate": 9.586585703541092e-06, "loss": 2.984823703765869, "step": 1243 }, { "epoch": 0.6533613445378151, "grad_norm": 9.74472805235711, "learning_rate": 9.58536806854427e-06, "loss": 3.300760269165039, "step": 1244 }, { "epoch": 0.6538865546218487, "grad_norm": 11.01056962582444, "learning_rate": 9.584148720585575e-06, "loss": 2.8390417098999023, "step": 1245 }, { "epoch": 0.6544117647058824, "grad_norm": 8.826552371043519, "learning_rate": 9.582927660120524e-06, "loss": 3.0983588695526123, "step": 1246 }, { "epoch": 0.654936974789916, "grad_norm": 11.04747213581895, "learning_rate": 9.581704887605267e-06, "loss": 3.279728889465332, "step": 1247 }, { "epoch": 0.6554621848739496, "grad_norm": 17.88074046271364, "learning_rate": 9.580480403496599e-06, "loss": 2.770259380340576, "step": 1248 }, { "epoch": 0.6559873949579832, "grad_norm": 10.30587242756858, "learning_rate": 9.57925420825195e-06, "loss": 2.5752410888671875, "step": 1249 }, { "epoch": 0.6565126050420168, "grad_norm": 10.025829680080944, "learning_rate": 9.578026302329391e-06, "loss": 2.874560832977295, "step": 1250 }, { "epoch": 0.6570378151260504, "grad_norm": 8.244483440682384, "learning_rate": 9.576796686187635e-06, "loss": 2.392122745513916, "step": 1251 }, { "epoch": 0.657563025210084, "grad_norm": 9.694849465338885, "learning_rate": 9.57556536028603e-06, "loss": 2.232323408126831, "step": 1252 }, { "epoch": 0.6580882352941176, "grad_norm": 18.1786080559346, "learning_rate": 9.574332325084564e-06, "loss": 2.4674391746520996, "step": 1253 }, { "epoch": 0.6586134453781513, "grad_norm": 8.37795044251335, "learning_rate": 9.573097581043861e-06, "loss": 3.0313472747802734, "step": 1254 }, { "epoch": 0.6591386554621849, "grad_norm": 7.786784506589957, "learning_rate": 9.571861128625191e-06, "loss": 2.654280185699463, "step": 1255 }, { "epoch": 0.6596638655462185, "grad_norm": 11.678518611114994, "learning_rate": 9.570622968290455e-06, "loss": 2.807952880859375, "step": 1256 }, { "epoch": 0.6601890756302521, "grad_norm": 11.8002752101054, "learning_rate": 9.569383100502193e-06, "loss": 2.6909966468811035, "step": 1257 }, { "epoch": 0.6607142857142857, "grad_norm": 19.067675110443254, "learning_rate": 9.568141525723582e-06, "loss": 2.5166993141174316, "step": 1258 }, { "epoch": 0.6612394957983193, "grad_norm": 18.388895506648996, "learning_rate": 9.566898244418443e-06, "loss": 3.5859007835388184, "step": 1259 }, { "epoch": 0.6617647058823529, "grad_norm": 9.61108395228637, "learning_rate": 9.565653257051228e-06, "loss": 2.6796000003814697, "step": 1260 }, { "epoch": 0.6622899159663865, "grad_norm": 7.540243622628439, "learning_rate": 9.564406564087032e-06, "loss": 2.68310809135437, "step": 1261 }, { "epoch": 0.6628151260504201, "grad_norm": 6.658163745066767, "learning_rate": 9.563158165991577e-06, "loss": 2.541107177734375, "step": 1262 }, { "epoch": 0.6633403361344538, "grad_norm": 13.042698254835107, "learning_rate": 9.561908063231234e-06, "loss": 2.2719717025756836, "step": 1263 }, { "epoch": 0.6638655462184874, "grad_norm": 14.826951460449694, "learning_rate": 9.560656256273004e-06, "loss": 2.5673341751098633, "step": 1264 }, { "epoch": 0.664390756302521, "grad_norm": 8.500017556503874, "learning_rate": 9.559402745584527e-06, "loss": 2.7344629764556885, "step": 1265 }, { "epoch": 0.6649159663865546, "grad_norm": 13.155321458061042, "learning_rate": 9.558147531634076e-06, "loss": 2.5384857654571533, "step": 1266 }, { "epoch": 0.6654411764705882, "grad_norm": 6.01182696383257, "learning_rate": 9.556890614890565e-06, "loss": 2.630429267883301, "step": 1267 }, { "epoch": 0.6659663865546218, "grad_norm": 6.627709500576783, "learning_rate": 9.555631995823543e-06, "loss": 2.5639748573303223, "step": 1268 }, { "epoch": 0.6664915966386554, "grad_norm": 7.936485985229376, "learning_rate": 9.554371674903191e-06, "loss": 2.715306282043457, "step": 1269 }, { "epoch": 0.667016806722689, "grad_norm": 11.047934970062675, "learning_rate": 9.55310965260033e-06, "loss": 2.6093361377716064, "step": 1270 }, { "epoch": 0.6675420168067226, "grad_norm": 9.885619999004115, "learning_rate": 9.551845929386417e-06, "loss": 2.436250686645508, "step": 1271 }, { "epoch": 0.6680672268907563, "grad_norm": 6.637540996061952, "learning_rate": 9.55058050573354e-06, "loss": 2.684870481491089, "step": 1272 }, { "epoch": 0.6685924369747899, "grad_norm": 11.160500558974611, "learning_rate": 9.549313382114427e-06, "loss": 2.517242431640625, "step": 1273 }, { "epoch": 0.6691176470588235, "grad_norm": 9.121291347461248, "learning_rate": 9.548044559002439e-06, "loss": 2.7190897464752197, "step": 1274 }, { "epoch": 0.6696428571428571, "grad_norm": 10.318891847989532, "learning_rate": 9.54677403687157e-06, "loss": 3.291470766067505, "step": 1275 }, { "epoch": 0.6701680672268907, "grad_norm": 20.547739071752503, "learning_rate": 9.545501816196452e-06, "loss": 2.768106698989868, "step": 1276 }, { "epoch": 0.6706932773109243, "grad_norm": 6.558821932019025, "learning_rate": 9.54422789745235e-06, "loss": 2.909510612487793, "step": 1277 }, { "epoch": 0.6712184873949579, "grad_norm": 17.38320702308684, "learning_rate": 9.542952281115163e-06, "loss": 3.3048973083496094, "step": 1278 }, { "epoch": 0.6717436974789915, "grad_norm": 15.238255200305627, "learning_rate": 9.541674967661424e-06, "loss": 2.5014166831970215, "step": 1279 }, { "epoch": 0.6722689075630253, "grad_norm": 12.917675640362509, "learning_rate": 9.540395957568303e-06, "loss": 2.706674575805664, "step": 1280 }, { "epoch": 0.6727941176470589, "grad_norm": 11.220456694925208, "learning_rate": 9.5391152513136e-06, "loss": 3.067174196243286, "step": 1281 }, { "epoch": 0.6733193277310925, "grad_norm": 13.304256076713015, "learning_rate": 9.53783284937575e-06, "loss": 2.654634475708008, "step": 1282 }, { "epoch": 0.6738445378151261, "grad_norm": 20.843161074031297, "learning_rate": 9.536548752233822e-06, "loss": 3.2768654823303223, "step": 1283 }, { "epoch": 0.6743697478991597, "grad_norm": 46.99042684881799, "learning_rate": 9.535262960367517e-06, "loss": 2.1927356719970703, "step": 1284 }, { "epoch": 0.6748949579831933, "grad_norm": 25.37929267140401, "learning_rate": 9.533975474257171e-06, "loss": 3.0900778770446777, "step": 1285 }, { "epoch": 0.6754201680672269, "grad_norm": 13.050063420656867, "learning_rate": 9.53268629438375e-06, "loss": 3.2228505611419678, "step": 1286 }, { "epoch": 0.6759453781512605, "grad_norm": 11.1449400416276, "learning_rate": 9.531395421228857e-06, "loss": 2.9872071743011475, "step": 1287 }, { "epoch": 0.6764705882352942, "grad_norm": 9.184043673654573, "learning_rate": 9.530102855274723e-06, "loss": 2.888235569000244, "step": 1288 }, { "epoch": 0.6769957983193278, "grad_norm": 9.77657945924281, "learning_rate": 9.528808597004216e-06, "loss": 2.0844593048095703, "step": 1289 }, { "epoch": 0.6775210084033614, "grad_norm": 11.846016703107276, "learning_rate": 9.527512646900832e-06, "loss": 2.4693167209625244, "step": 1290 }, { "epoch": 0.678046218487395, "grad_norm": 17.739386299001715, "learning_rate": 9.5262150054487e-06, "loss": 2.933748722076416, "step": 1291 }, { "epoch": 0.6785714285714286, "grad_norm": 6.279664041307915, "learning_rate": 9.524915673132584e-06, "loss": 2.602099895477295, "step": 1292 }, { "epoch": 0.6790966386554622, "grad_norm": 9.629477822347484, "learning_rate": 9.523614650437876e-06, "loss": 2.6288552284240723, "step": 1293 }, { "epoch": 0.6796218487394958, "grad_norm": 14.619570781758624, "learning_rate": 9.522311937850599e-06, "loss": 2.5347964763641357, "step": 1294 }, { "epoch": 0.6801470588235294, "grad_norm": 16.33077187580003, "learning_rate": 9.521007535857412e-06, "loss": 2.9023921489715576, "step": 1295 }, { "epoch": 0.680672268907563, "grad_norm": 12.231716396344995, "learning_rate": 9.5197014449456e-06, "loss": 2.02052640914917, "step": 1296 }, { "epoch": 0.6811974789915967, "grad_norm": 11.85750275712929, "learning_rate": 9.518393665603084e-06, "loss": 2.457960844039917, "step": 1297 }, { "epoch": 0.6817226890756303, "grad_norm": 45.000785607816304, "learning_rate": 9.517084198318408e-06, "loss": 2.775736093521118, "step": 1298 }, { "epoch": 0.6822478991596639, "grad_norm": 11.848366686860444, "learning_rate": 9.515773043580754e-06, "loss": 2.7515206336975098, "step": 1299 }, { "epoch": 0.6827731092436975, "grad_norm": 8.044014029627554, "learning_rate": 9.514460201879933e-06, "loss": 3.122994899749756, "step": 1300 }, { "epoch": 0.6832983193277311, "grad_norm": 7.709408715358316, "learning_rate": 9.513145673706383e-06, "loss": 2.763450860977173, "step": 1301 }, { "epoch": 0.6838235294117647, "grad_norm": 16.596621425114005, "learning_rate": 9.511829459551177e-06, "loss": 1.674150824546814, "step": 1302 }, { "epoch": 0.6843487394957983, "grad_norm": 16.944579906628196, "learning_rate": 9.51051155990601e-06, "loss": 3.0965023040771484, "step": 1303 }, { "epoch": 0.6848739495798319, "grad_norm": 29.684456904202968, "learning_rate": 9.509191975263214e-06, "loss": 3.5322749614715576, "step": 1304 }, { "epoch": 0.6853991596638656, "grad_norm": 20.17744729818729, "learning_rate": 9.507870706115749e-06, "loss": 4.938513278961182, "step": 1305 }, { "epoch": 0.6859243697478992, "grad_norm": 9.05047495142821, "learning_rate": 9.506547752957202e-06, "loss": 2.6075875759124756, "step": 1306 }, { "epoch": 0.6864495798319328, "grad_norm": 7.172005664837628, "learning_rate": 9.505223116281792e-06, "loss": 2.5574395656585693, "step": 1307 }, { "epoch": 0.6869747899159664, "grad_norm": 5.460805864901304, "learning_rate": 9.503896796584363e-06, "loss": 1.2071025371551514, "step": 1308 }, { "epoch": 0.6875, "grad_norm": 11.696722291676378, "learning_rate": 9.50256879436039e-06, "loss": 2.3375420570373535, "step": 1309 }, { "epoch": 0.6880252100840336, "grad_norm": 15.393794232695843, "learning_rate": 9.501239110105977e-06, "loss": 2.381495237350464, "step": 1310 }, { "epoch": 0.6885504201680672, "grad_norm": 8.985583895994047, "learning_rate": 9.499907744317857e-06, "loss": 2.9014463424682617, "step": 1311 }, { "epoch": 0.6890756302521008, "grad_norm": 10.51140471946597, "learning_rate": 9.49857469749339e-06, "loss": 2.4108285903930664, "step": 1312 }, { "epoch": 0.6896008403361344, "grad_norm": 23.733160801090765, "learning_rate": 9.497239970130561e-06, "loss": 2.544302225112915, "step": 1313 }, { "epoch": 0.6901260504201681, "grad_norm": 10.113600080830354, "learning_rate": 9.49590356272799e-06, "loss": 3.2044105529785156, "step": 1314 }, { "epoch": 0.6906512605042017, "grad_norm": 11.225461610266919, "learning_rate": 9.494565475784918e-06, "loss": 2.7142252922058105, "step": 1315 }, { "epoch": 0.6911764705882353, "grad_norm": 12.805029589555923, "learning_rate": 9.493225709801215e-06, "loss": 2.8982505798339844, "step": 1316 }, { "epoch": 0.6917016806722689, "grad_norm": 8.345505498879067, "learning_rate": 9.491884265277383e-06, "loss": 2.6981797218322754, "step": 1317 }, { "epoch": 0.6922268907563025, "grad_norm": 12.937650980741134, "learning_rate": 9.490541142714542e-06, "loss": 2.7911643981933594, "step": 1318 }, { "epoch": 0.6927521008403361, "grad_norm": 22.116933179052825, "learning_rate": 9.489196342614447e-06, "loss": 2.021183967590332, "step": 1319 }, { "epoch": 0.6932773109243697, "grad_norm": 14.357157701259643, "learning_rate": 9.487849865479477e-06, "loss": 2.7909629344940186, "step": 1320 }, { "epoch": 0.6938025210084033, "grad_norm": 10.90785986875978, "learning_rate": 9.486501711812637e-06, "loss": 2.944089889526367, "step": 1321 }, { "epoch": 0.694327731092437, "grad_norm": 15.35413406540934, "learning_rate": 9.485151882117556e-06, "loss": 3.075840950012207, "step": 1322 }, { "epoch": 0.6948529411764706, "grad_norm": 10.921413024324112, "learning_rate": 9.483800376898496e-06, "loss": 2.9666800498962402, "step": 1323 }, { "epoch": 0.6953781512605042, "grad_norm": 6.931937472194988, "learning_rate": 9.482447196660338e-06, "loss": 2.744246006011963, "step": 1324 }, { "epoch": 0.6959033613445378, "grad_norm": 22.815648595154784, "learning_rate": 9.481092341908591e-06, "loss": 2.2377512454986572, "step": 1325 }, { "epoch": 0.6964285714285714, "grad_norm": 33.745252950177466, "learning_rate": 9.47973581314939e-06, "loss": 3.381443500518799, "step": 1326 }, { "epoch": 0.696953781512605, "grad_norm": 13.747870396917808, "learning_rate": 9.478377610889495e-06, "loss": 3.339777708053589, "step": 1327 }, { "epoch": 0.6974789915966386, "grad_norm": 14.754600586020432, "learning_rate": 9.477017735636294e-06, "loss": 2.6213040351867676, "step": 1328 }, { "epoch": 0.6980042016806722, "grad_norm": 14.243546829313178, "learning_rate": 9.475656187897794e-06, "loss": 2.7175559997558594, "step": 1329 }, { "epoch": 0.6985294117647058, "grad_norm": 17.71429842481506, "learning_rate": 9.47429296818263e-06, "loss": 2.6168465614318848, "step": 1330 }, { "epoch": 0.6990546218487395, "grad_norm": 10.504564201612084, "learning_rate": 9.472928077000066e-06, "loss": 2.1036758422851562, "step": 1331 }, { "epoch": 0.6995798319327731, "grad_norm": 12.309961034642136, "learning_rate": 9.47156151485998e-06, "loss": 2.7158048152923584, "step": 1332 }, { "epoch": 0.7001050420168067, "grad_norm": 7.9598002812863164, "learning_rate": 9.470193282272886e-06, "loss": 2.008297920227051, "step": 1333 }, { "epoch": 0.7006302521008403, "grad_norm": 12.860207803140918, "learning_rate": 9.468823379749915e-06, "loss": 2.714076519012451, "step": 1334 }, { "epoch": 0.7011554621848739, "grad_norm": 17.560070558918074, "learning_rate": 9.467451807802821e-06, "loss": 2.544034481048584, "step": 1335 }, { "epoch": 0.7016806722689075, "grad_norm": 17.977807037833866, "learning_rate": 9.466078566943985e-06, "loss": 2.744539737701416, "step": 1336 }, { "epoch": 0.7022058823529411, "grad_norm": 18.045075243022165, "learning_rate": 9.464703657686412e-06, "loss": 2.770423412322998, "step": 1337 }, { "epoch": 0.7027310924369747, "grad_norm": 8.796807059905332, "learning_rate": 9.463327080543726e-06, "loss": 2.601559638977051, "step": 1338 }, { "epoch": 0.7032563025210085, "grad_norm": 9.770323235779015, "learning_rate": 9.461948836030179e-06, "loss": 3.0226902961730957, "step": 1339 }, { "epoch": 0.7037815126050421, "grad_norm": 9.670867365270533, "learning_rate": 9.460568924660642e-06, "loss": 2.2374281883239746, "step": 1340 }, { "epoch": 0.7043067226890757, "grad_norm": 11.038223135908881, "learning_rate": 9.45918734695061e-06, "loss": 2.7864830493927, "step": 1341 }, { "epoch": 0.7048319327731093, "grad_norm": 16.745596707482306, "learning_rate": 9.457804103416201e-06, "loss": 2.5133237838745117, "step": 1342 }, { "epoch": 0.7053571428571429, "grad_norm": 11.036184780862653, "learning_rate": 9.456419194574158e-06, "loss": 2.864564895629883, "step": 1343 }, { "epoch": 0.7058823529411765, "grad_norm": 19.094604602634462, "learning_rate": 9.45503262094184e-06, "loss": 3.54667067527771, "step": 1344 }, { "epoch": 0.7064075630252101, "grad_norm": 12.94870110586718, "learning_rate": 9.453644383037232e-06, "loss": 2.3254120349884033, "step": 1345 }, { "epoch": 0.7069327731092437, "grad_norm": 14.861646673492746, "learning_rate": 9.45225448137894e-06, "loss": 2.5209128856658936, "step": 1346 }, { "epoch": 0.7074579831932774, "grad_norm": 13.398996249782893, "learning_rate": 9.45086291648619e-06, "loss": 2.752837657928467, "step": 1347 }, { "epoch": 0.707983193277311, "grad_norm": 11.404093547038189, "learning_rate": 9.449469688878833e-06, "loss": 2.8514773845672607, "step": 1348 }, { "epoch": 0.7085084033613446, "grad_norm": 7.661170507555201, "learning_rate": 9.448074799077337e-06, "loss": 2.347379684448242, "step": 1349 }, { "epoch": 0.7090336134453782, "grad_norm": 12.638915089631368, "learning_rate": 9.446678247602794e-06, "loss": 2.823115348815918, "step": 1350 }, { "epoch": 0.7095588235294118, "grad_norm": 18.12805299575099, "learning_rate": 9.445280034976916e-06, "loss": 2.608738422393799, "step": 1351 }, { "epoch": 0.7100840336134454, "grad_norm": 8.642232967922208, "learning_rate": 9.443880161722032e-06, "loss": 3.023998737335205, "step": 1352 }, { "epoch": 0.710609243697479, "grad_norm": 19.50477675862717, "learning_rate": 9.442478628361098e-06, "loss": 2.913578748703003, "step": 1353 }, { "epoch": 0.7111344537815126, "grad_norm": 21.706233749621184, "learning_rate": 9.441075435417683e-06, "loss": 3.0246872901916504, "step": 1354 }, { "epoch": 0.7116596638655462, "grad_norm": 17.67378038551585, "learning_rate": 9.439670583415984e-06, "loss": 3.0222532749176025, "step": 1355 }, { "epoch": 0.7121848739495799, "grad_norm": 8.837601410314631, "learning_rate": 9.438264072880811e-06, "loss": 2.9659714698791504, "step": 1356 }, { "epoch": 0.7127100840336135, "grad_norm": 15.012524158066952, "learning_rate": 9.436855904337596e-06, "loss": 2.4705705642700195, "step": 1357 }, { "epoch": 0.7132352941176471, "grad_norm": 27.40170647298477, "learning_rate": 9.435446078312389e-06, "loss": 2.984248161315918, "step": 1358 }, { "epoch": 0.7137605042016807, "grad_norm": 13.497646711735237, "learning_rate": 9.434034595331863e-06, "loss": 3.160773515701294, "step": 1359 }, { "epoch": 0.7142857142857143, "grad_norm": 9.35232386887726, "learning_rate": 9.432621455923308e-06, "loss": 2.279153347015381, "step": 1360 }, { "epoch": 0.7148109243697479, "grad_norm": 12.189984058563963, "learning_rate": 9.43120666061463e-06, "loss": 2.089343547821045, "step": 1361 }, { "epoch": 0.7153361344537815, "grad_norm": 5.050789962910078, "learning_rate": 9.429790209934355e-06, "loss": 2.4052462577819824, "step": 1362 }, { "epoch": 0.7158613445378151, "grad_norm": 8.971997861991584, "learning_rate": 9.428372104411632e-06, "loss": 2.5842559337615967, "step": 1363 }, { "epoch": 0.7163865546218487, "grad_norm": 12.103027223944814, "learning_rate": 9.426952344576222e-06, "loss": 2.938389301300049, "step": 1364 }, { "epoch": 0.7169117647058824, "grad_norm": 11.86887475831686, "learning_rate": 9.425530930958507e-06, "loss": 3.1199917793273926, "step": 1365 }, { "epoch": 0.717436974789916, "grad_norm": 6.708273631905648, "learning_rate": 9.424107864089485e-06, "loss": 2.9766764640808105, "step": 1366 }, { "epoch": 0.7179621848739496, "grad_norm": 8.621264404336527, "learning_rate": 9.422683144500775e-06, "loss": 2.8513436317443848, "step": 1367 }, { "epoch": 0.7184873949579832, "grad_norm": 15.458155449734308, "learning_rate": 9.421256772724612e-06, "loss": 3.151705503463745, "step": 1368 }, { "epoch": 0.7190126050420168, "grad_norm": 14.290268675960716, "learning_rate": 9.419828749293845e-06, "loss": 2.5013177394866943, "step": 1369 }, { "epoch": 0.7195378151260504, "grad_norm": 18.682865499597426, "learning_rate": 9.418399074741943e-06, "loss": 2.460247755050659, "step": 1370 }, { "epoch": 0.720063025210084, "grad_norm": 24.451169695375615, "learning_rate": 9.416967749602996e-06, "loss": 2.5819268226623535, "step": 1371 }, { "epoch": 0.7205882352941176, "grad_norm": 10.02373184540919, "learning_rate": 9.4155347744117e-06, "loss": 2.1001505851745605, "step": 1372 }, { "epoch": 0.7211134453781513, "grad_norm": 8.777736770119411, "learning_rate": 9.414100149703373e-06, "loss": 2.957872152328491, "step": 1373 }, { "epoch": 0.7216386554621849, "grad_norm": 8.456445117374907, "learning_rate": 9.412663876013954e-06, "loss": 2.311070442199707, "step": 1374 }, { "epoch": 0.7221638655462185, "grad_norm": 11.263860119699258, "learning_rate": 9.411225953879993e-06, "loss": 2.642958641052246, "step": 1375 }, { "epoch": 0.7226890756302521, "grad_norm": 12.662610939522205, "learning_rate": 9.409786383838653e-06, "loss": 3.2040657997131348, "step": 1376 }, { "epoch": 0.7232142857142857, "grad_norm": 9.783216528684383, "learning_rate": 9.40834516642772e-06, "loss": 2.6354026794433594, "step": 1377 }, { "epoch": 0.7237394957983193, "grad_norm": 13.818754228405075, "learning_rate": 9.406902302185587e-06, "loss": 2.6107289791107178, "step": 1378 }, { "epoch": 0.7242647058823529, "grad_norm": 15.054292525217258, "learning_rate": 9.405457791651272e-06, "loss": 2.8895740509033203, "step": 1379 }, { "epoch": 0.7247899159663865, "grad_norm": 11.901605027729738, "learning_rate": 9.4040116353644e-06, "loss": 2.9344370365142822, "step": 1380 }, { "epoch": 0.7253151260504201, "grad_norm": 10.969129039598197, "learning_rate": 9.402563833865213e-06, "loss": 2.6658058166503906, "step": 1381 }, { "epoch": 0.7258403361344538, "grad_norm": 12.989795592487805, "learning_rate": 9.401114387694568e-06, "loss": 3.1829166412353516, "step": 1382 }, { "epoch": 0.7263655462184874, "grad_norm": 16.549151471583183, "learning_rate": 9.399663297393937e-06, "loss": 3.0087943077087402, "step": 1383 }, { "epoch": 0.726890756302521, "grad_norm": 10.953146233522348, "learning_rate": 9.398210563505405e-06, "loss": 2.890185832977295, "step": 1384 }, { "epoch": 0.7274159663865546, "grad_norm": 33.01528017378158, "learning_rate": 9.396756186571672e-06, "loss": 1.9646806716918945, "step": 1385 }, { "epoch": 0.7279411764705882, "grad_norm": 11.884339298915373, "learning_rate": 9.395300167136055e-06, "loss": 2.821507453918457, "step": 1386 }, { "epoch": 0.7284663865546218, "grad_norm": 32.47387416330914, "learning_rate": 9.393842505742474e-06, "loss": 2.535316228866577, "step": 1387 }, { "epoch": 0.7289915966386554, "grad_norm": 9.075975777726198, "learning_rate": 9.392383202935476e-06, "loss": 2.6755433082580566, "step": 1388 }, { "epoch": 0.729516806722689, "grad_norm": 12.860652796524104, "learning_rate": 9.39092225926021e-06, "loss": 2.2261745929718018, "step": 1389 }, { "epoch": 0.7300420168067226, "grad_norm": 8.64320308642805, "learning_rate": 9.389459675262446e-06, "loss": 2.6248598098754883, "step": 1390 }, { "epoch": 0.7305672268907563, "grad_norm": 10.380043046434807, "learning_rate": 9.387995451488561e-06, "loss": 3.0444443225860596, "step": 1391 }, { "epoch": 0.7310924369747899, "grad_norm": 7.138907418937429, "learning_rate": 9.386529588485549e-06, "loss": 3.195737361907959, "step": 1392 }, { "epoch": 0.7316176470588235, "grad_norm": 28.015383111308655, "learning_rate": 9.385062086801013e-06, "loss": 2.8851213455200195, "step": 1393 }, { "epoch": 0.7321428571428571, "grad_norm": 10.214467168943983, "learning_rate": 9.38359294698317e-06, "loss": 2.9917964935302734, "step": 1394 }, { "epoch": 0.7326680672268907, "grad_norm": 17.77067206107327, "learning_rate": 9.382122169580848e-06, "loss": 2.2778615951538086, "step": 1395 }, { "epoch": 0.7331932773109243, "grad_norm": 7.571215813251031, "learning_rate": 9.380649755143488e-06, "loss": 2.527552366256714, "step": 1396 }, { "epoch": 0.7337184873949579, "grad_norm": 7.717230317008868, "learning_rate": 9.379175704221139e-06, "loss": 1.7584331035614014, "step": 1397 }, { "epoch": 0.7342436974789915, "grad_norm": 11.353245551534629, "learning_rate": 9.377700017364469e-06, "loss": 1.99480402469635, "step": 1398 }, { "epoch": 0.7347689075630253, "grad_norm": 8.640384284021444, "learning_rate": 9.376222695124748e-06, "loss": 2.5055298805236816, "step": 1399 }, { "epoch": 0.7352941176470589, "grad_norm": 13.5071496145152, "learning_rate": 9.374743738053862e-06, "loss": 2.9797515869140625, "step": 1400 }, { "epoch": 0.7358193277310925, "grad_norm": 15.081587837232423, "learning_rate": 9.37326314670431e-06, "loss": 2.2903127670288086, "step": 1401 }, { "epoch": 0.7363445378151261, "grad_norm": 10.918947750645197, "learning_rate": 9.371780921629195e-06, "loss": 2.863656520843506, "step": 1402 }, { "epoch": 0.7368697478991597, "grad_norm": 10.670799315171976, "learning_rate": 9.370297063382235e-06, "loss": 2.4964540004730225, "step": 1403 }, { "epoch": 0.7373949579831933, "grad_norm": 9.520859840288391, "learning_rate": 9.368811572517756e-06, "loss": 2.362823724746704, "step": 1404 }, { "epoch": 0.7379201680672269, "grad_norm": 6.835728974180949, "learning_rate": 9.367324449590694e-06, "loss": 3.0204315185546875, "step": 1405 }, { "epoch": 0.7384453781512605, "grad_norm": 14.863406548725411, "learning_rate": 9.3658356951566e-06, "loss": 2.7434098720550537, "step": 1406 }, { "epoch": 0.7389705882352942, "grad_norm": 12.858452907659908, "learning_rate": 9.364345309771624e-06, "loss": 2.655700445175171, "step": 1407 }, { "epoch": 0.7394957983193278, "grad_norm": 14.65548674711664, "learning_rate": 9.362853293992535e-06, "loss": 2.302583694458008, "step": 1408 }, { "epoch": 0.7400210084033614, "grad_norm": 8.260087260299908, "learning_rate": 9.361359648376707e-06, "loss": 3.3217902183532715, "step": 1409 }, { "epoch": 0.740546218487395, "grad_norm": 15.076035124946063, "learning_rate": 9.359864373482122e-06, "loss": 3.1235344409942627, "step": 1410 }, { "epoch": 0.7410714285714286, "grad_norm": 10.722930307754114, "learning_rate": 9.358367469867372e-06, "loss": 2.8075900077819824, "step": 1411 }, { "epoch": 0.7415966386554622, "grad_norm": 12.983006343518566, "learning_rate": 9.356868938091655e-06, "loss": 2.9995510578155518, "step": 1412 }, { "epoch": 0.7421218487394958, "grad_norm": 9.379237597493933, "learning_rate": 9.355368778714784e-06, "loss": 2.6002068519592285, "step": 1413 }, { "epoch": 0.7426470588235294, "grad_norm": 26.27711212533193, "learning_rate": 9.353866992297174e-06, "loss": 2.375796318054199, "step": 1414 }, { "epoch": 0.743172268907563, "grad_norm": 11.222766076275274, "learning_rate": 9.352363579399846e-06, "loss": 2.8118574619293213, "step": 1415 }, { "epoch": 0.7436974789915967, "grad_norm": 13.51524891050891, "learning_rate": 9.350858540584437e-06, "loss": 3.0930118560791016, "step": 1416 }, { "epoch": 0.7442226890756303, "grad_norm": 13.005699518462123, "learning_rate": 9.349351876413181e-06, "loss": 3.2080953121185303, "step": 1417 }, { "epoch": 0.7447478991596639, "grad_norm": 9.71357191671031, "learning_rate": 9.347843587448931e-06, "loss": 2.5146827697753906, "step": 1418 }, { "epoch": 0.7452731092436975, "grad_norm": 8.728947305262524, "learning_rate": 9.346333674255132e-06, "loss": 2.7985026836395264, "step": 1419 }, { "epoch": 0.7457983193277311, "grad_norm": 12.299315161082067, "learning_rate": 9.344822137395853e-06, "loss": 2.5758912563323975, "step": 1420 }, { "epoch": 0.7463235294117647, "grad_norm": 17.672666297977287, "learning_rate": 9.343308977435754e-06, "loss": 2.5825915336608887, "step": 1421 }, { "epoch": 0.7468487394957983, "grad_norm": 12.718365767124839, "learning_rate": 9.34179419494011e-06, "loss": 2.511111259460449, "step": 1422 }, { "epoch": 0.7473739495798319, "grad_norm": 14.565970508358097, "learning_rate": 9.340277790474804e-06, "loss": 2.6805567741394043, "step": 1423 }, { "epoch": 0.7478991596638656, "grad_norm": 11.315382129257491, "learning_rate": 9.338759764606318e-06, "loss": 2.303293228149414, "step": 1424 }, { "epoch": 0.7484243697478992, "grad_norm": 8.386154954984145, "learning_rate": 9.337240117901742e-06, "loss": 2.525210380554199, "step": 1425 }, { "epoch": 0.7489495798319328, "grad_norm": 15.996249867978909, "learning_rate": 9.335718850928772e-06, "loss": 2.724491834640503, "step": 1426 }, { "epoch": 0.7494747899159664, "grad_norm": 13.992497994363099, "learning_rate": 9.334195964255713e-06, "loss": 2.6540184020996094, "step": 1427 }, { "epoch": 0.75, "grad_norm": 9.7380823568118, "learning_rate": 9.33267145845147e-06, "loss": 3.3706037998199463, "step": 1428 }, { "epoch": 0.7505252100840336, "grad_norm": 12.893009723038702, "learning_rate": 9.331145334085554e-06, "loss": 3.2180933952331543, "step": 1429 }, { "epoch": 0.7510504201680672, "grad_norm": 10.33314172045796, "learning_rate": 9.32961759172808e-06, "loss": 2.725083827972412, "step": 1430 }, { "epoch": 0.7515756302521008, "grad_norm": 12.770106629086934, "learning_rate": 9.328088231949773e-06, "loss": 2.0833425521850586, "step": 1431 }, { "epoch": 0.7521008403361344, "grad_norm": 10.541104698964073, "learning_rate": 9.326557255321955e-06, "loss": 2.642286539077759, "step": 1432 }, { "epoch": 0.7526260504201681, "grad_norm": 10.584788013068913, "learning_rate": 9.325024662416553e-06, "loss": 3.445159435272217, "step": 1433 }, { "epoch": 0.7531512605042017, "grad_norm": 11.674629230911592, "learning_rate": 9.323490453806105e-06, "loss": 3.027078628540039, "step": 1434 }, { "epoch": 0.7536764705882353, "grad_norm": 13.392299254395775, "learning_rate": 9.321954630063742e-06, "loss": 3.040900468826294, "step": 1435 }, { "epoch": 0.7542016806722689, "grad_norm": 8.216103167302693, "learning_rate": 9.320417191763207e-06, "loss": 2.767831563949585, "step": 1436 }, { "epoch": 0.7547268907563025, "grad_norm": 13.229314420547931, "learning_rate": 9.318878139478842e-06, "loss": 2.596747398376465, "step": 1437 }, { "epoch": 0.7552521008403361, "grad_norm": 10.64212348650333, "learning_rate": 9.31733747378559e-06, "loss": 3.714005947113037, "step": 1438 }, { "epoch": 0.7557773109243697, "grad_norm": 15.23119062631491, "learning_rate": 9.315795195259003e-06, "loss": 3.0364737510681152, "step": 1439 }, { "epoch": 0.7563025210084033, "grad_norm": 17.52760220774232, "learning_rate": 9.314251304475233e-06, "loss": 3.9018704891204834, "step": 1440 }, { "epoch": 0.756827731092437, "grad_norm": 14.535598647006452, "learning_rate": 9.312705802011029e-06, "loss": 2.916362762451172, "step": 1441 }, { "epoch": 0.7573529411764706, "grad_norm": 15.613857086491242, "learning_rate": 9.31115868844375e-06, "loss": 2.829233169555664, "step": 1442 }, { "epoch": 0.7578781512605042, "grad_norm": 18.91393207585664, "learning_rate": 9.30960996435135e-06, "loss": 3.149411678314209, "step": 1443 }, { "epoch": 0.7584033613445378, "grad_norm": 20.954511727143665, "learning_rate": 9.308059630312391e-06, "loss": 2.4108259677886963, "step": 1444 }, { "epoch": 0.7589285714285714, "grad_norm": 13.117738732374297, "learning_rate": 9.306507686906033e-06, "loss": 3.152261257171631, "step": 1445 }, { "epoch": 0.759453781512605, "grad_norm": 11.940719191817243, "learning_rate": 9.304954134712034e-06, "loss": 2.6755151748657227, "step": 1446 }, { "epoch": 0.7599789915966386, "grad_norm": 6.49025082292054, "learning_rate": 9.30339897431076e-06, "loss": 2.705385446548462, "step": 1447 }, { "epoch": 0.7605042016806722, "grad_norm": 12.99172427229987, "learning_rate": 9.301842206283173e-06, "loss": 2.8926711082458496, "step": 1448 }, { "epoch": 0.7610294117647058, "grad_norm": 12.412417581013589, "learning_rate": 9.300283831210838e-06, "loss": 2.6391053199768066, "step": 1449 }, { "epoch": 0.7615546218487395, "grad_norm": 5.9448044218751, "learning_rate": 9.298723849675916e-06, "loss": 2.8540122509002686, "step": 1450 }, { "epoch": 0.7620798319327731, "grad_norm": 10.00337964899393, "learning_rate": 9.297162262261174e-06, "loss": 2.409593343734741, "step": 1451 }, { "epoch": 0.7626050420168067, "grad_norm": 11.179943644593255, "learning_rate": 9.295599069549977e-06, "loss": 2.9136595726013184, "step": 1452 }, { "epoch": 0.7631302521008403, "grad_norm": 12.333575441933972, "learning_rate": 9.294034272126286e-06, "loss": 3.040872097015381, "step": 1453 }, { "epoch": 0.7636554621848739, "grad_norm": 7.27507806736491, "learning_rate": 9.292467870574669e-06, "loss": 2.6332709789276123, "step": 1454 }, { "epoch": 0.7641806722689075, "grad_norm": 6.542470785682123, "learning_rate": 9.290899865480283e-06, "loss": 3.02650785446167, "step": 1455 }, { "epoch": 0.7647058823529411, "grad_norm": 9.093817469440369, "learning_rate": 9.289330257428894e-06, "loss": 2.564438819885254, "step": 1456 }, { "epoch": 0.7652310924369747, "grad_norm": 13.11724678279817, "learning_rate": 9.28775904700686e-06, "loss": 2.877781629562378, "step": 1457 }, { "epoch": 0.7657563025210085, "grad_norm": 7.9570471475054845, "learning_rate": 9.286186234801142e-06, "loss": 3.0255775451660156, "step": 1458 }, { "epoch": 0.7662815126050421, "grad_norm": 12.617675806442744, "learning_rate": 9.284611821399295e-06, "loss": 3.3234949111938477, "step": 1459 }, { "epoch": 0.7668067226890757, "grad_norm": 7.22554471917852, "learning_rate": 9.283035807389477e-06, "loss": 2.3798437118530273, "step": 1460 }, { "epoch": 0.7673319327731093, "grad_norm": 10.132569658780335, "learning_rate": 9.281458193360442e-06, "loss": 2.7522311210632324, "step": 1461 }, { "epoch": 0.7678571428571429, "grad_norm": 12.685584791423695, "learning_rate": 9.279878979901538e-06, "loss": 2.3602542877197266, "step": 1462 }, { "epoch": 0.7683823529411765, "grad_norm": 21.579192126219223, "learning_rate": 9.278298167602716e-06, "loss": 3.0811705589294434, "step": 1463 }, { "epoch": 0.7689075630252101, "grad_norm": 16.272382663327445, "learning_rate": 9.276715757054523e-06, "loss": 2.832975387573242, "step": 1464 }, { "epoch": 0.7694327731092437, "grad_norm": 11.503856614346121, "learning_rate": 9.2751317488481e-06, "loss": 2.9317173957824707, "step": 1465 }, { "epoch": 0.7699579831932774, "grad_norm": 11.352763244351618, "learning_rate": 9.27354614357519e-06, "loss": 2.6808085441589355, "step": 1466 }, { "epoch": 0.770483193277311, "grad_norm": 13.73363768149438, "learning_rate": 9.271958941828125e-06, "loss": 3.0309529304504395, "step": 1467 }, { "epoch": 0.7710084033613446, "grad_norm": 11.346693259463262, "learning_rate": 9.270370144199843e-06, "loss": 3.280457019805908, "step": 1468 }, { "epoch": 0.7715336134453782, "grad_norm": 9.832419580041483, "learning_rate": 9.26877975128387e-06, "loss": 2.869136333465576, "step": 1469 }, { "epoch": 0.7720588235294118, "grad_norm": 8.297634210710237, "learning_rate": 9.267187763674332e-06, "loss": 2.3520267009735107, "step": 1470 }, { "epoch": 0.7725840336134454, "grad_norm": 9.492541847936554, "learning_rate": 9.26559418196595e-06, "loss": 2.6950676441192627, "step": 1471 }, { "epoch": 0.773109243697479, "grad_norm": 7.0219316314305305, "learning_rate": 9.263999006754041e-06, "loss": 2.259110927581787, "step": 1472 }, { "epoch": 0.7736344537815126, "grad_norm": 17.116096402526342, "learning_rate": 9.262402238634514e-06, "loss": 3.1345925331115723, "step": 1473 }, { "epoch": 0.7741596638655462, "grad_norm": 13.56774431196218, "learning_rate": 9.26080387820388e-06, "loss": 2.7827768325805664, "step": 1474 }, { "epoch": 0.7746848739495799, "grad_norm": 14.308155016498903, "learning_rate": 9.259203926059237e-06, "loss": 2.0597972869873047, "step": 1475 }, { "epoch": 0.7752100840336135, "grad_norm": 8.428954987828398, "learning_rate": 9.257602382798283e-06, "loss": 3.4040629863739014, "step": 1476 }, { "epoch": 0.7757352941176471, "grad_norm": 28.692532317833248, "learning_rate": 9.255999249019307e-06, "loss": 2.8586981296539307, "step": 1477 }, { "epoch": 0.7762605042016807, "grad_norm": 8.195887413422152, "learning_rate": 9.254394525321195e-06, "loss": 3.0691356658935547, "step": 1478 }, { "epoch": 0.7767857142857143, "grad_norm": 9.057303498856644, "learning_rate": 9.252788212303426e-06, "loss": 2.7064573764801025, "step": 1479 }, { "epoch": 0.7773109243697479, "grad_norm": 7.786703582739036, "learning_rate": 9.251180310566073e-06, "loss": 2.972693681716919, "step": 1480 }, { "epoch": 0.7778361344537815, "grad_norm": 12.046110411363257, "learning_rate": 9.2495708207098e-06, "loss": 3.323434591293335, "step": 1481 }, { "epoch": 0.7783613445378151, "grad_norm": 13.599940686488399, "learning_rate": 9.247959743335865e-06, "loss": 2.790578842163086, "step": 1482 }, { "epoch": 0.7788865546218487, "grad_norm": 20.60611191528932, "learning_rate": 9.246347079046124e-06, "loss": 2.6960320472717285, "step": 1483 }, { "epoch": 0.7794117647058824, "grad_norm": 10.075570960784976, "learning_rate": 9.244732828443021e-06, "loss": 2.6370906829833984, "step": 1484 }, { "epoch": 0.779936974789916, "grad_norm": 18.088967248656378, "learning_rate": 9.243116992129593e-06, "loss": 3.318084716796875, "step": 1485 }, { "epoch": 0.7804621848739496, "grad_norm": 10.461301597730223, "learning_rate": 9.241499570709468e-06, "loss": 3.348841428756714, "step": 1486 }, { "epoch": 0.7809873949579832, "grad_norm": 10.365807099386126, "learning_rate": 9.239880564786871e-06, "loss": 2.163569211959839, "step": 1487 }, { "epoch": 0.7815126050420168, "grad_norm": 8.677495079018977, "learning_rate": 9.238259974966617e-06, "loss": 1.077256441116333, "step": 1488 }, { "epoch": 0.7820378151260504, "grad_norm": 6.116014149170227, "learning_rate": 9.23663780185411e-06, "loss": 2.9423060417175293, "step": 1489 }, { "epoch": 0.782563025210084, "grad_norm": 12.069884185295985, "learning_rate": 9.235014046055347e-06, "loss": 2.5108654499053955, "step": 1490 }, { "epoch": 0.7830882352941176, "grad_norm": 18.854346897658274, "learning_rate": 9.233388708176918e-06, "loss": 2.888367176055908, "step": 1491 }, { "epoch": 0.7836134453781513, "grad_norm": 10.104713621849985, "learning_rate": 9.231761788826e-06, "loss": 2.9687886238098145, "step": 1492 }, { "epoch": 0.7841386554621849, "grad_norm": 15.259579316561895, "learning_rate": 9.230133288610366e-06, "loss": 2.781655788421631, "step": 1493 }, { "epoch": 0.7846638655462185, "grad_norm": 9.22616047144127, "learning_rate": 9.228503208138377e-06, "loss": 2.727569818496704, "step": 1494 }, { "epoch": 0.7851890756302521, "grad_norm": 12.794092005035468, "learning_rate": 9.226871548018982e-06, "loss": 2.737361431121826, "step": 1495 }, { "epoch": 0.7857142857142857, "grad_norm": 14.958976430762513, "learning_rate": 9.225238308861723e-06, "loss": 3.536166191101074, "step": 1496 }, { "epoch": 0.7862394957983193, "grad_norm": 14.319205644234918, "learning_rate": 9.223603491276733e-06, "loss": 3.352311611175537, "step": 1497 }, { "epoch": 0.7867647058823529, "grad_norm": 8.920169385437928, "learning_rate": 9.221967095874733e-06, "loss": 2.385847806930542, "step": 1498 }, { "epoch": 0.7872899159663865, "grad_norm": 7.281576255543728, "learning_rate": 9.220329123267031e-06, "loss": 2.830510377883911, "step": 1499 }, { "epoch": 0.7878151260504201, "grad_norm": 10.422748287727703, "learning_rate": 9.218689574065526e-06, "loss": 2.962968111038208, "step": 1500 }, { "epoch": 0.7883403361344538, "grad_norm": 9.245955210912829, "learning_rate": 9.217048448882711e-06, "loss": 2.6180567741394043, "step": 1501 }, { "epoch": 0.7888655462184874, "grad_norm": 13.38146211504871, "learning_rate": 9.21540574833166e-06, "loss": 2.6950042247772217, "step": 1502 }, { "epoch": 0.789390756302521, "grad_norm": 8.810541645626765, "learning_rate": 9.213761473026039e-06, "loss": 3.007662773132324, "step": 1503 }, { "epoch": 0.7899159663865546, "grad_norm": 24.004316030977126, "learning_rate": 9.212115623580101e-06, "loss": 3.302671432495117, "step": 1504 }, { "epoch": 0.7904411764705882, "grad_norm": 7.767136488283772, "learning_rate": 9.210468200608691e-06, "loss": 2.2067933082580566, "step": 1505 }, { "epoch": 0.7909663865546218, "grad_norm": 25.218202744013663, "learning_rate": 9.208819204727236e-06, "loss": 1.941161036491394, "step": 1506 }, { "epoch": 0.7914915966386554, "grad_norm": 7.890959883648676, "learning_rate": 9.207168636551755e-06, "loss": 2.6079719066619873, "step": 1507 }, { "epoch": 0.792016806722689, "grad_norm": 9.973775377953485, "learning_rate": 9.205516496698854e-06, "loss": 2.7024857997894287, "step": 1508 }, { "epoch": 0.7925420168067226, "grad_norm": 6.297199469233552, "learning_rate": 9.203862785785724e-06, "loss": 2.8793561458587646, "step": 1509 }, { "epoch": 0.7930672268907563, "grad_norm": 10.631547124866534, "learning_rate": 9.202207504430142e-06, "loss": 3.013770580291748, "step": 1510 }, { "epoch": 0.7935924369747899, "grad_norm": 13.987690774179669, "learning_rate": 9.200550653250477e-06, "loss": 2.1392877101898193, "step": 1511 }, { "epoch": 0.7941176470588235, "grad_norm": 7.284021811211223, "learning_rate": 9.198892232865677e-06, "loss": 2.730876922607422, "step": 1512 }, { "epoch": 0.7946428571428571, "grad_norm": 18.10239370273535, "learning_rate": 9.197232243895285e-06, "loss": 2.7417426109313965, "step": 1513 }, { "epoch": 0.7951680672268907, "grad_norm": 9.887471795104913, "learning_rate": 9.195570686959421e-06, "loss": 2.0390450954437256, "step": 1514 }, { "epoch": 0.7956932773109243, "grad_norm": 29.397270516471195, "learning_rate": 9.193907562678797e-06, "loss": 3.2995693683624268, "step": 1515 }, { "epoch": 0.7962184873949579, "grad_norm": 13.347163496386369, "learning_rate": 9.192242871674708e-06, "loss": 2.5495872497558594, "step": 1516 }, { "epoch": 0.7967436974789915, "grad_norm": 11.775924365812614, "learning_rate": 9.190576614569035e-06, "loss": 3.3346171379089355, "step": 1517 }, { "epoch": 0.7972689075630253, "grad_norm": 17.052818914961268, "learning_rate": 9.188908791984245e-06, "loss": 3.410426616668701, "step": 1518 }, { "epoch": 0.7977941176470589, "grad_norm": 14.978064758373597, "learning_rate": 9.187239404543387e-06, "loss": 3.411181926727295, "step": 1519 }, { "epoch": 0.7983193277310925, "grad_norm": 7.920223723765901, "learning_rate": 9.185568452870097e-06, "loss": 2.756586790084839, "step": 1520 }, { "epoch": 0.7988445378151261, "grad_norm": 6.622689475441205, "learning_rate": 9.183895937588594e-06, "loss": 2.8143153190612793, "step": 1521 }, { "epoch": 0.7993697478991597, "grad_norm": 12.922525913327187, "learning_rate": 9.18222185932368e-06, "loss": 2.860788106918335, "step": 1522 }, { "epoch": 0.7998949579831933, "grad_norm": 7.11749063393575, "learning_rate": 9.180546218700748e-06, "loss": 2.6753156185150146, "step": 1523 }, { "epoch": 0.8004201680672269, "grad_norm": 11.026417179632636, "learning_rate": 9.178869016345764e-06, "loss": 2.7370190620422363, "step": 1524 }, { "epoch": 0.8009453781512605, "grad_norm": 9.530852095755332, "learning_rate": 9.177190252885285e-06, "loss": 2.4391965866088867, "step": 1525 }, { "epoch": 0.8014705882352942, "grad_norm": 17.67156318578249, "learning_rate": 9.175509928946446e-06, "loss": 4.128862380981445, "step": 1526 }, { "epoch": 0.8019957983193278, "grad_norm": 6.481976886450993, "learning_rate": 9.173828045156971e-06, "loss": 2.7322418689727783, "step": 1527 }, { "epoch": 0.8025210084033614, "grad_norm": 7.48871876982714, "learning_rate": 9.172144602145165e-06, "loss": 3.071397066116333, "step": 1528 }, { "epoch": 0.803046218487395, "grad_norm": 10.492401140740409, "learning_rate": 9.17045960053991e-06, "loss": 2.743645668029785, "step": 1529 }, { "epoch": 0.8035714285714286, "grad_norm": 9.789274538067815, "learning_rate": 9.168773040970676e-06, "loss": 3.117504835128784, "step": 1530 }, { "epoch": 0.8040966386554622, "grad_norm": 12.287467666758925, "learning_rate": 9.167084924067511e-06, "loss": 2.697328805923462, "step": 1531 }, { "epoch": 0.8046218487394958, "grad_norm": 7.365104896606811, "learning_rate": 9.165395250461051e-06, "loss": 2.556335210800171, "step": 1532 }, { "epoch": 0.8051470588235294, "grad_norm": 11.367552528006652, "learning_rate": 9.163704020782507e-06, "loss": 2.795506477355957, "step": 1533 }, { "epoch": 0.805672268907563, "grad_norm": 11.154260289316959, "learning_rate": 9.162011235663673e-06, "loss": 3.1348161697387695, "step": 1534 }, { "epoch": 0.8061974789915967, "grad_norm": 8.879832471703603, "learning_rate": 9.16031689573693e-06, "loss": 2.4550201892852783, "step": 1535 }, { "epoch": 0.8067226890756303, "grad_norm": 14.317067424861127, "learning_rate": 9.158621001635227e-06, "loss": 2.69781494140625, "step": 1536 }, { "epoch": 0.8072478991596639, "grad_norm": 10.35837871977168, "learning_rate": 9.156923553992107e-06, "loss": 2.9104769229888916, "step": 1537 }, { "epoch": 0.8077731092436975, "grad_norm": 9.992905004281724, "learning_rate": 9.155224553441686e-06, "loss": 2.8530702590942383, "step": 1538 }, { "epoch": 0.8082983193277311, "grad_norm": 13.675190289591084, "learning_rate": 9.153524000618663e-06, "loss": 3.2071030139923096, "step": 1539 }, { "epoch": 0.8088235294117647, "grad_norm": 26.7946812082005, "learning_rate": 9.151821896158314e-06, "loss": 2.7262039184570312, "step": 1540 }, { "epoch": 0.8093487394957983, "grad_norm": 7.218018270970506, "learning_rate": 9.150118240696497e-06, "loss": 2.899076461791992, "step": 1541 }, { "epoch": 0.8098739495798319, "grad_norm": 14.037592531540101, "learning_rate": 9.148413034869647e-06, "loss": 3.045046329498291, "step": 1542 }, { "epoch": 0.8103991596638656, "grad_norm": 10.852758138254117, "learning_rate": 9.146706279314786e-06, "loss": 2.4369218349456787, "step": 1543 }, { "epoch": 0.8109243697478992, "grad_norm": 24.53162774572155, "learning_rate": 9.144997974669501e-06, "loss": 2.9881112575531006, "step": 1544 }, { "epoch": 0.8114495798319328, "grad_norm": 8.110464226350164, "learning_rate": 9.14328812157197e-06, "loss": 2.4895076751708984, "step": 1545 }, { "epoch": 0.8119747899159664, "grad_norm": 19.66295977199353, "learning_rate": 9.141576720660946e-06, "loss": 3.2586450576782227, "step": 1546 }, { "epoch": 0.8125, "grad_norm": 13.08501660759478, "learning_rate": 9.139863772575755e-06, "loss": 3.2728970050811768, "step": 1547 }, { "epoch": 0.8130252100840336, "grad_norm": 20.743029134853124, "learning_rate": 9.138149277956307e-06, "loss": 3.0937044620513916, "step": 1548 }, { "epoch": 0.8135504201680672, "grad_norm": 14.957686663631023, "learning_rate": 9.136433237443093e-06, "loss": 2.2607643604278564, "step": 1549 }, { "epoch": 0.8140756302521008, "grad_norm": 12.977473079137107, "learning_rate": 9.134715651677168e-06, "loss": 2.7575249671936035, "step": 1550 }, { "epoch": 0.8146008403361344, "grad_norm": 8.057678458393218, "learning_rate": 9.132996521300178e-06, "loss": 2.641611337661743, "step": 1551 }, { "epoch": 0.8151260504201681, "grad_norm": 6.814595006980323, "learning_rate": 9.13127584695434e-06, "loss": 2.5166702270507812, "step": 1552 }, { "epoch": 0.8156512605042017, "grad_norm": 12.068766752736664, "learning_rate": 9.129553629282448e-06, "loss": 2.5141143798828125, "step": 1553 }, { "epoch": 0.8161764705882353, "grad_norm": 11.878379327278529, "learning_rate": 9.127829868927873e-06, "loss": 2.772965431213379, "step": 1554 }, { "epoch": 0.8167016806722689, "grad_norm": 8.460163189018663, "learning_rate": 9.126104566534565e-06, "loss": 2.6576623916625977, "step": 1555 }, { "epoch": 0.8172268907563025, "grad_norm": 9.554429558763722, "learning_rate": 9.124377722747041e-06, "loss": 2.7791483402252197, "step": 1556 }, { "epoch": 0.8177521008403361, "grad_norm": 12.88122105903733, "learning_rate": 9.122649338210407e-06, "loss": 2.890918731689453, "step": 1557 }, { "epoch": 0.8182773109243697, "grad_norm": 11.189030906316328, "learning_rate": 9.120919413570335e-06, "loss": 2.879544734954834, "step": 1558 }, { "epoch": 0.8188025210084033, "grad_norm": 6.702927071024383, "learning_rate": 9.119187949473075e-06, "loss": 3.0606887340545654, "step": 1559 }, { "epoch": 0.819327731092437, "grad_norm": 11.643616072444471, "learning_rate": 9.117454946565452e-06, "loss": 3.1395931243896484, "step": 1560 }, { "epoch": 0.8198529411764706, "grad_norm": 8.0768184982513, "learning_rate": 9.115720405494868e-06, "loss": 2.4597113132476807, "step": 1561 }, { "epoch": 0.8203781512605042, "grad_norm": 14.236125147142554, "learning_rate": 9.113984326909295e-06, "loss": 2.946138381958008, "step": 1562 }, { "epoch": 0.8209033613445378, "grad_norm": 16.782998241316438, "learning_rate": 9.112246711457284e-06, "loss": 3.0810306072235107, "step": 1563 }, { "epoch": 0.8214285714285714, "grad_norm": 17.928544530056907, "learning_rate": 9.110507559787959e-06, "loss": 2.6110334396362305, "step": 1564 }, { "epoch": 0.821953781512605, "grad_norm": 9.778722921509674, "learning_rate": 9.108766872551016e-06, "loss": 2.1677751541137695, "step": 1565 }, { "epoch": 0.8224789915966386, "grad_norm": 17.178141544970295, "learning_rate": 9.107024650396725e-06, "loss": 2.8547592163085938, "step": 1566 }, { "epoch": 0.8230042016806722, "grad_norm": 9.291618929323715, "learning_rate": 9.105280893975931e-06, "loss": 2.4209301471710205, "step": 1567 }, { "epoch": 0.8235294117647058, "grad_norm": 10.98518228802169, "learning_rate": 9.10353560394005e-06, "loss": 2.8176021575927734, "step": 1568 }, { "epoch": 0.8240546218487395, "grad_norm": 18.370639315206148, "learning_rate": 9.101788780941076e-06, "loss": 2.550027370452881, "step": 1569 }, { "epoch": 0.8245798319327731, "grad_norm": 6.883194881280978, "learning_rate": 9.10004042563157e-06, "loss": 2.9312143325805664, "step": 1570 }, { "epoch": 0.8251050420168067, "grad_norm": 8.575683307401519, "learning_rate": 9.098290538664665e-06, "loss": 3.1522021293640137, "step": 1571 }, { "epoch": 0.8256302521008403, "grad_norm": 9.918808821214965, "learning_rate": 9.09653912069407e-06, "loss": 2.621941328048706, "step": 1572 }, { "epoch": 0.8261554621848739, "grad_norm": 5.701315798905515, "learning_rate": 9.094786172374066e-06, "loss": 2.666314125061035, "step": 1573 }, { "epoch": 0.8266806722689075, "grad_norm": 9.470981150117249, "learning_rate": 9.093031694359503e-06, "loss": 2.6916842460632324, "step": 1574 }, { "epoch": 0.8272058823529411, "grad_norm": 10.999721066285408, "learning_rate": 9.091275687305804e-06, "loss": 3.0902199745178223, "step": 1575 }, { "epoch": 0.8277310924369747, "grad_norm": 10.424283783181096, "learning_rate": 9.089518151868961e-06, "loss": 2.500002861022949, "step": 1576 }, { "epoch": 0.8282563025210085, "grad_norm": 18.008637670608827, "learning_rate": 9.087759088705541e-06, "loss": 2.706355094909668, "step": 1577 }, { "epoch": 0.8287815126050421, "grad_norm": 17.672786450639272, "learning_rate": 9.08599849847268e-06, "loss": 3.002948760986328, "step": 1578 }, { "epoch": 0.8293067226890757, "grad_norm": 12.34897547508807, "learning_rate": 9.08423638182808e-06, "loss": 2.652674674987793, "step": 1579 }, { "epoch": 0.8298319327731093, "grad_norm": 19.342554527070632, "learning_rate": 9.082472739430022e-06, "loss": 3.3123912811279297, "step": 1580 }, { "epoch": 0.8303571428571429, "grad_norm": 13.128261899532248, "learning_rate": 9.08070757193735e-06, "loss": 2.7712619304656982, "step": 1581 }, { "epoch": 0.8308823529411765, "grad_norm": 23.14870368573147, "learning_rate": 9.07894088000948e-06, "loss": 2.1335580348968506, "step": 1582 }, { "epoch": 0.8314075630252101, "grad_norm": 18.28553629166134, "learning_rate": 9.077172664306396e-06, "loss": 2.875230312347412, "step": 1583 }, { "epoch": 0.8319327731092437, "grad_norm": 10.018573350157663, "learning_rate": 9.075402925488654e-06, "loss": 3.035533905029297, "step": 1584 }, { "epoch": 0.8324579831932774, "grad_norm": 14.89725356913745, "learning_rate": 9.07363166421738e-06, "loss": 2.926954507827759, "step": 1585 }, { "epoch": 0.832983193277311, "grad_norm": 7.394915894853549, "learning_rate": 9.071858881154262e-06, "loss": 2.3858561515808105, "step": 1586 }, { "epoch": 0.8335084033613446, "grad_norm": 7.845961783757571, "learning_rate": 9.070084576961563e-06, "loss": 2.6644482612609863, "step": 1587 }, { "epoch": 0.8340336134453782, "grad_norm": 19.23898419687951, "learning_rate": 9.068308752302113e-06, "loss": 2.29180908203125, "step": 1588 }, { "epoch": 0.8345588235294118, "grad_norm": 9.90657725166347, "learning_rate": 9.066531407839307e-06, "loss": 2.924722671508789, "step": 1589 }, { "epoch": 0.8350840336134454, "grad_norm": 11.93098730535124, "learning_rate": 9.06475254423711e-06, "loss": 2.1523523330688477, "step": 1590 }, { "epoch": 0.835609243697479, "grad_norm": 14.862881541887209, "learning_rate": 9.062972162160058e-06, "loss": 3.141295909881592, "step": 1591 }, { "epoch": 0.8361344537815126, "grad_norm": 13.99486401192248, "learning_rate": 9.061190262273245e-06, "loss": 2.4941205978393555, "step": 1592 }, { "epoch": 0.8366596638655462, "grad_norm": 7.333693688632184, "learning_rate": 9.059406845242343e-06, "loss": 2.8340518474578857, "step": 1593 }, { "epoch": 0.8371848739495799, "grad_norm": 9.769839112158232, "learning_rate": 9.057621911733581e-06, "loss": 2.4519026279449463, "step": 1594 }, { "epoch": 0.8377100840336135, "grad_norm": 20.089062803616674, "learning_rate": 9.055835462413763e-06, "loss": 2.875101089477539, "step": 1595 }, { "epoch": 0.8382352941176471, "grad_norm": 10.346942444729434, "learning_rate": 9.05404749795025e-06, "loss": 2.7726926803588867, "step": 1596 }, { "epoch": 0.8387605042016807, "grad_norm": 16.022987427837723, "learning_rate": 9.05225801901098e-06, "loss": 2.792201519012451, "step": 1597 }, { "epoch": 0.8392857142857143, "grad_norm": 14.344387140750028, "learning_rate": 9.050467026264448e-06, "loss": 2.3667960166931152, "step": 1598 }, { "epoch": 0.8398109243697479, "grad_norm": 23.523762614195864, "learning_rate": 9.048674520379715e-06, "loss": 2.5323946475982666, "step": 1599 }, { "epoch": 0.8403361344537815, "grad_norm": 10.0204642840144, "learning_rate": 9.046880502026414e-06, "loss": 2.6077938079833984, "step": 1600 }, { "epoch": 0.8408613445378151, "grad_norm": 11.639795625878955, "learning_rate": 9.045084971874738e-06, "loss": 2.2000503540039062, "step": 1601 }, { "epoch": 0.8413865546218487, "grad_norm": 11.649591327935388, "learning_rate": 9.043287930595444e-06, "loss": 3.045988082885742, "step": 1602 }, { "epoch": 0.8419117647058824, "grad_norm": 7.682753411560617, "learning_rate": 9.041489378859856e-06, "loss": 2.8375630378723145, "step": 1603 }, { "epoch": 0.842436974789916, "grad_norm": 14.816083596152376, "learning_rate": 9.039689317339861e-06, "loss": 2.669459581375122, "step": 1604 }, { "epoch": 0.8429621848739496, "grad_norm": 10.175232572354465, "learning_rate": 9.03788774670791e-06, "loss": 2.890617609024048, "step": 1605 }, { "epoch": 0.8434873949579832, "grad_norm": 10.31357183244063, "learning_rate": 9.036084667637018e-06, "loss": 2.981811285018921, "step": 1606 }, { "epoch": 0.8440126050420168, "grad_norm": 9.441625823542742, "learning_rate": 9.034280080800764e-06, "loss": 2.2013072967529297, "step": 1607 }, { "epoch": 0.8445378151260504, "grad_norm": 12.058196662308841, "learning_rate": 9.03247398687329e-06, "loss": 2.2728679180145264, "step": 1608 }, { "epoch": 0.845063025210084, "grad_norm": 8.613017389365172, "learning_rate": 9.030666386529303e-06, "loss": 2.5344839096069336, "step": 1609 }, { "epoch": 0.8455882352941176, "grad_norm": 19.199649351118968, "learning_rate": 9.028857280444066e-06, "loss": 3.035724639892578, "step": 1610 }, { "epoch": 0.8461134453781513, "grad_norm": 14.928108605529136, "learning_rate": 9.027046669293411e-06, "loss": 3.178629159927368, "step": 1611 }, { "epoch": 0.8466386554621849, "grad_norm": 11.755087123322417, "learning_rate": 9.025234553753733e-06, "loss": 2.9528074264526367, "step": 1612 }, { "epoch": 0.8471638655462185, "grad_norm": 6.5512554572117025, "learning_rate": 9.023420934501981e-06, "loss": 2.4574341773986816, "step": 1613 }, { "epoch": 0.8476890756302521, "grad_norm": 6.431330019113703, "learning_rate": 9.021605812215675e-06, "loss": 3.0148768424987793, "step": 1614 }, { "epoch": 0.8482142857142857, "grad_norm": 12.369231388742493, "learning_rate": 9.019789187572891e-06, "loss": 2.9979090690612793, "step": 1615 }, { "epoch": 0.8487394957983193, "grad_norm": 12.324555670892899, "learning_rate": 9.01797106125227e-06, "loss": 2.5081629753112793, "step": 1616 }, { "epoch": 0.8492647058823529, "grad_norm": 6.567047520562435, "learning_rate": 9.01615143393301e-06, "loss": 2.941706418991089, "step": 1617 }, { "epoch": 0.8497899159663865, "grad_norm": 6.613055280987097, "learning_rate": 9.014330306294872e-06, "loss": 2.133114814758301, "step": 1618 }, { "epoch": 0.8503151260504201, "grad_norm": 17.371922489063806, "learning_rate": 9.012507679018177e-06, "loss": 2.7291688919067383, "step": 1619 }, { "epoch": 0.8508403361344538, "grad_norm": 8.979174214683121, "learning_rate": 9.010683552783805e-06, "loss": 2.460345506668091, "step": 1620 }, { "epoch": 0.8513655462184874, "grad_norm": 13.877560062602784, "learning_rate": 9.008857928273199e-06, "loss": 2.723776340484619, "step": 1621 }, { "epoch": 0.851890756302521, "grad_norm": 19.243310596262656, "learning_rate": 9.00703080616836e-06, "loss": 2.996762275695801, "step": 1622 }, { "epoch": 0.8524159663865546, "grad_norm": 7.832357435651682, "learning_rate": 9.005202187151845e-06, "loss": 2.9319770336151123, "step": 1623 }, { "epoch": 0.8529411764705882, "grad_norm": 7.262182086236559, "learning_rate": 9.003372071906778e-06, "loss": 2.6601133346557617, "step": 1624 }, { "epoch": 0.8534663865546218, "grad_norm": 8.113081596555123, "learning_rate": 9.001540461116835e-06, "loss": 2.4604804515838623, "step": 1625 }, { "epoch": 0.8539915966386554, "grad_norm": 9.882974896597206, "learning_rate": 8.999707355466254e-06, "loss": 2.5082545280456543, "step": 1626 }, { "epoch": 0.854516806722689, "grad_norm": 22.703642831626503, "learning_rate": 8.997872755639831e-06, "loss": 2.6613025665283203, "step": 1627 }, { "epoch": 0.8550420168067226, "grad_norm": 13.242193210134968, "learning_rate": 8.996036662322917e-06, "loss": 3.289824962615967, "step": 1628 }, { "epoch": 0.8555672268907563, "grad_norm": 12.04027646676417, "learning_rate": 8.994199076201428e-06, "loss": 3.009115219116211, "step": 1629 }, { "epoch": 0.8560924369747899, "grad_norm": 36.66965855685249, "learning_rate": 8.992359997961833e-06, "loss": 2.9523236751556396, "step": 1630 }, { "epoch": 0.8566176470588235, "grad_norm": 11.801502784635273, "learning_rate": 8.990519428291156e-06, "loss": 2.5500600337982178, "step": 1631 }, { "epoch": 0.8571428571428571, "grad_norm": 7.449739602478073, "learning_rate": 8.988677367876981e-06, "loss": 3.536142349243164, "step": 1632 }, { "epoch": 0.8576680672268907, "grad_norm": 7.682464339065975, "learning_rate": 8.98683381740745e-06, "loss": 2.782010316848755, "step": 1633 }, { "epoch": 0.8581932773109243, "grad_norm": 6.836296703770633, "learning_rate": 8.984988777571262e-06, "loss": 2.6691298484802246, "step": 1634 }, { "epoch": 0.8587184873949579, "grad_norm": 13.059193020003558, "learning_rate": 8.98314224905767e-06, "loss": 3.201601266860962, "step": 1635 }, { "epoch": 0.8592436974789915, "grad_norm": 6.560365363362825, "learning_rate": 8.981294232556484e-06, "loss": 2.8478498458862305, "step": 1636 }, { "epoch": 0.8597689075630253, "grad_norm": 12.339699916876029, "learning_rate": 8.979444728758067e-06, "loss": 2.8949522972106934, "step": 1637 }, { "epoch": 0.8602941176470589, "grad_norm": 11.011868694472776, "learning_rate": 8.977593738353346e-06, "loss": 2.8054542541503906, "step": 1638 }, { "epoch": 0.8608193277310925, "grad_norm": 8.218158324670545, "learning_rate": 8.975741262033793e-06, "loss": 2.6899096965789795, "step": 1639 }, { "epoch": 0.8613445378151261, "grad_norm": 8.696709895043137, "learning_rate": 8.973887300491442e-06, "loss": 3.0374200344085693, "step": 1640 }, { "epoch": 0.8618697478991597, "grad_norm": 6.921135339435363, "learning_rate": 8.97203185441888e-06, "loss": 2.734715700149536, "step": 1641 }, { "epoch": 0.8623949579831933, "grad_norm": 12.337835404848839, "learning_rate": 8.970174924509247e-06, "loss": 2.811793327331543, "step": 1642 }, { "epoch": 0.8629201680672269, "grad_norm": 21.3494362008051, "learning_rate": 8.968316511456241e-06, "loss": 2.6970551013946533, "step": 1643 }, { "epoch": 0.8634453781512605, "grad_norm": 11.8210308738897, "learning_rate": 8.966456615954112e-06, "loss": 2.7946279048919678, "step": 1644 }, { "epoch": 0.8639705882352942, "grad_norm": 23.76275781703291, "learning_rate": 8.964595238697659e-06, "loss": 2.2927374839782715, "step": 1645 }, { "epoch": 0.8644957983193278, "grad_norm": 18.33915813125871, "learning_rate": 8.962732380382246e-06, "loss": 2.313857316970825, "step": 1646 }, { "epoch": 0.8650210084033614, "grad_norm": 9.160965929064266, "learning_rate": 8.960868041703777e-06, "loss": 2.387964963912964, "step": 1647 }, { "epoch": 0.865546218487395, "grad_norm": 8.598635522741514, "learning_rate": 8.95900222335872e-06, "loss": 2.843458890914917, "step": 1648 }, { "epoch": 0.8660714285714286, "grad_norm": 9.765346338635439, "learning_rate": 8.957134926044088e-06, "loss": 2.3733465671539307, "step": 1649 }, { "epoch": 0.8665966386554622, "grad_norm": 13.556767115272129, "learning_rate": 8.955266150457452e-06, "loss": 2.921518087387085, "step": 1650 }, { "epoch": 0.8671218487394958, "grad_norm": 14.685458691050979, "learning_rate": 8.953395897296929e-06, "loss": 2.679130792617798, "step": 1651 }, { "epoch": 0.8676470588235294, "grad_norm": 11.97178655513145, "learning_rate": 8.951524167261197e-06, "loss": 2.4244470596313477, "step": 1652 }, { "epoch": 0.868172268907563, "grad_norm": 8.201418695356, "learning_rate": 8.949650961049479e-06, "loss": 2.051436185836792, "step": 1653 }, { "epoch": 0.8686974789915967, "grad_norm": 14.017589373766244, "learning_rate": 8.94777627936155e-06, "loss": 2.378939151763916, "step": 1654 }, { "epoch": 0.8692226890756303, "grad_norm": 18.71807707716699, "learning_rate": 8.945900122897735e-06, "loss": 3.1834826469421387, "step": 1655 }, { "epoch": 0.8697478991596639, "grad_norm": 17.128359264542194, "learning_rate": 8.944022492358917e-06, "loss": 3.1574788093566895, "step": 1656 }, { "epoch": 0.8702731092436975, "grad_norm": 6.959753715832457, "learning_rate": 8.942143388446522e-06, "loss": 2.0516068935394287, "step": 1657 }, { "epoch": 0.8707983193277311, "grad_norm": 19.255530601752262, "learning_rate": 8.94026281186253e-06, "loss": 2.8824527263641357, "step": 1658 }, { "epoch": 0.8713235294117647, "grad_norm": 15.055252679385482, "learning_rate": 8.93838076330947e-06, "loss": 2.9361789226531982, "step": 1659 }, { "epoch": 0.8718487394957983, "grad_norm": 12.447043391356829, "learning_rate": 8.936497243490422e-06, "loss": 2.820972442626953, "step": 1660 }, { "epoch": 0.8723739495798319, "grad_norm": 12.634858670537199, "learning_rate": 8.934612253109017e-06, "loss": 1.623253583908081, "step": 1661 }, { "epoch": 0.8728991596638656, "grad_norm": 18.08487720174843, "learning_rate": 8.932725792869427e-06, "loss": 2.883774757385254, "step": 1662 }, { "epoch": 0.8734243697478992, "grad_norm": 8.014012097683683, "learning_rate": 8.930837863476386e-06, "loss": 3.228044033050537, "step": 1663 }, { "epoch": 0.8739495798319328, "grad_norm": 14.519615184017779, "learning_rate": 8.928948465635168e-06, "loss": 2.461923360824585, "step": 1664 }, { "epoch": 0.8744747899159664, "grad_norm": 7.6317719677332425, "learning_rate": 8.927057600051594e-06, "loss": 3.0148236751556396, "step": 1665 }, { "epoch": 0.875, "grad_norm": 13.162314049139713, "learning_rate": 8.925165267432044e-06, "loss": 3.121001958847046, "step": 1666 }, { "epoch": 0.8755252100840336, "grad_norm": 17.913142458560134, "learning_rate": 8.923271468483434e-06, "loss": 2.5421323776245117, "step": 1667 }, { "epoch": 0.8760504201680672, "grad_norm": 8.57593747128098, "learning_rate": 8.921376203913235e-06, "loss": 2.5172932147979736, "step": 1668 }, { "epoch": 0.8765756302521008, "grad_norm": 15.23364369766554, "learning_rate": 8.919479474429462e-06, "loss": 3.1511287689208984, "step": 1669 }, { "epoch": 0.8771008403361344, "grad_norm": 7.184408074539215, "learning_rate": 8.917581280740678e-06, "loss": 2.1041531562805176, "step": 1670 }, { "epoch": 0.8776260504201681, "grad_norm": 13.539497022029291, "learning_rate": 8.915681623556e-06, "loss": 2.539247751235962, "step": 1671 }, { "epoch": 0.8781512605042017, "grad_norm": 10.215448042272689, "learning_rate": 8.913780503585076e-06, "loss": 2.619029998779297, "step": 1672 }, { "epoch": 0.8786764705882353, "grad_norm": 16.407774137063473, "learning_rate": 8.911877921538117e-06, "loss": 2.8678293228149414, "step": 1673 }, { "epoch": 0.8792016806722689, "grad_norm": 7.616181387849795, "learning_rate": 8.90997387812587e-06, "loss": 2.9257354736328125, "step": 1674 }, { "epoch": 0.8797268907563025, "grad_norm": 11.510999061577067, "learning_rate": 8.90806837405963e-06, "loss": 2.6372413635253906, "step": 1675 }, { "epoch": 0.8802521008403361, "grad_norm": 10.154170848148102, "learning_rate": 8.906161410051243e-06, "loss": 2.90159273147583, "step": 1676 }, { "epoch": 0.8807773109243697, "grad_norm": 23.0698419512271, "learning_rate": 8.904252986813091e-06, "loss": 3.2704274654388428, "step": 1677 }, { "epoch": 0.8813025210084033, "grad_norm": 8.452106369556175, "learning_rate": 8.902343105058108e-06, "loss": 2.906270742416382, "step": 1678 }, { "epoch": 0.881827731092437, "grad_norm": 19.2658822183441, "learning_rate": 8.900431765499773e-06, "loss": 2.535334348678589, "step": 1679 }, { "epoch": 0.8823529411764706, "grad_norm": 13.918139299702606, "learning_rate": 8.898518968852106e-06, "loss": 2.8137190341949463, "step": 1680 }, { "epoch": 0.8828781512605042, "grad_norm": 17.074616182965027, "learning_rate": 8.896604715829671e-06, "loss": 2.617612361907959, "step": 1681 }, { "epoch": 0.8834033613445378, "grad_norm": 13.439043201251245, "learning_rate": 8.89468900714758e-06, "loss": 2.9933958053588867, "step": 1682 }, { "epoch": 0.8839285714285714, "grad_norm": 9.45811011094876, "learning_rate": 8.892771843521487e-06, "loss": 2.933568239212036, "step": 1683 }, { "epoch": 0.884453781512605, "grad_norm": 10.572139165361671, "learning_rate": 8.890853225667588e-06, "loss": 3.162017822265625, "step": 1684 }, { "epoch": 0.8849789915966386, "grad_norm": 45.81132027610318, "learning_rate": 8.888933154302626e-06, "loss": 2.919490098953247, "step": 1685 }, { "epoch": 0.8855042016806722, "grad_norm": 9.79457843340495, "learning_rate": 8.887011630143881e-06, "loss": 3.1591439247131348, "step": 1686 }, { "epoch": 0.8860294117647058, "grad_norm": 32.897605463053544, "learning_rate": 8.885088653909182e-06, "loss": 3.183682918548584, "step": 1687 }, { "epoch": 0.8865546218487395, "grad_norm": 8.911375505162102, "learning_rate": 8.883164226316897e-06, "loss": 2.6541295051574707, "step": 1688 }, { "epoch": 0.8870798319327731, "grad_norm": 15.724794674305066, "learning_rate": 8.881238348085936e-06, "loss": 2.4864635467529297, "step": 1689 }, { "epoch": 0.8876050420168067, "grad_norm": 10.486028943660472, "learning_rate": 8.879311019935752e-06, "loss": 2.7954046726226807, "step": 1690 }, { "epoch": 0.8881302521008403, "grad_norm": 11.23597454548207, "learning_rate": 8.877382242586341e-06, "loss": 2.1166534423828125, "step": 1691 }, { "epoch": 0.8886554621848739, "grad_norm": 21.30826530801347, "learning_rate": 8.875452016758239e-06, "loss": 2.9630651473999023, "step": 1692 }, { "epoch": 0.8891806722689075, "grad_norm": 13.404603348241913, "learning_rate": 8.87352034317252e-06, "loss": 2.4266562461853027, "step": 1693 }, { "epoch": 0.8897058823529411, "grad_norm": 6.502915576741012, "learning_rate": 8.871587222550805e-06, "loss": 2.5795416831970215, "step": 1694 }, { "epoch": 0.8902310924369747, "grad_norm": 11.119334651662573, "learning_rate": 8.86965265561525e-06, "loss": 2.777190685272217, "step": 1695 }, { "epoch": 0.8907563025210085, "grad_norm": 17.86791152944123, "learning_rate": 8.867716643088556e-06, "loss": 3.3912010192871094, "step": 1696 }, { "epoch": 0.8912815126050421, "grad_norm": 17.06221085051761, "learning_rate": 8.865779185693957e-06, "loss": 2.7764787673950195, "step": 1697 }, { "epoch": 0.8918067226890757, "grad_norm": 14.97933346471872, "learning_rate": 8.863840284155238e-06, "loss": 2.9894399642944336, "step": 1698 }, { "epoch": 0.8923319327731093, "grad_norm": 22.168720702326574, "learning_rate": 8.861899939196713e-06, "loss": 2.7285947799682617, "step": 1699 }, { "epoch": 0.8928571428571429, "grad_norm": 9.30645480741557, "learning_rate": 8.85995815154324e-06, "loss": 2.4073543548583984, "step": 1700 }, { "epoch": 0.8933823529411765, "grad_norm": 24.347057275120157, "learning_rate": 8.858014921920215e-06, "loss": 2.8383591175079346, "step": 1701 }, { "epoch": 0.8939075630252101, "grad_norm": 8.858399302051355, "learning_rate": 8.856070251053572e-06, "loss": 2.7161450386047363, "step": 1702 }, { "epoch": 0.8944327731092437, "grad_norm": 9.047301836053926, "learning_rate": 8.854124139669786e-06, "loss": 3.419163465499878, "step": 1703 }, { "epoch": 0.8949579831932774, "grad_norm": 13.228462933779975, "learning_rate": 8.852176588495867e-06, "loss": 3.4164323806762695, "step": 1704 }, { "epoch": 0.895483193277311, "grad_norm": 12.974830432527467, "learning_rate": 8.850227598259365e-06, "loss": 2.6973154544830322, "step": 1705 }, { "epoch": 0.8960084033613446, "grad_norm": 11.184728469837097, "learning_rate": 8.848277169688367e-06, "loss": 2.7660531997680664, "step": 1706 }, { "epoch": 0.8965336134453782, "grad_norm": 14.59336775482168, "learning_rate": 8.846325303511496e-06, "loss": 2.886784076690674, "step": 1707 }, { "epoch": 0.8970588235294118, "grad_norm": 12.71634831979801, "learning_rate": 8.844372000457912e-06, "loss": 2.6247849464416504, "step": 1708 }, { "epoch": 0.8975840336134454, "grad_norm": 8.224274295081706, "learning_rate": 8.842417261257316e-06, "loss": 2.5844714641571045, "step": 1709 }, { "epoch": 0.898109243697479, "grad_norm": 11.057156827735009, "learning_rate": 8.840461086639942e-06, "loss": 2.1547021865844727, "step": 1710 }, { "epoch": 0.8986344537815126, "grad_norm": 6.008911221601693, "learning_rate": 8.83850347733656e-06, "loss": 2.193338394165039, "step": 1711 }, { "epoch": 0.8991596638655462, "grad_norm": 9.797789057581019, "learning_rate": 8.836544434078473e-06, "loss": 2.0880467891693115, "step": 1712 }, { "epoch": 0.8996848739495799, "grad_norm": 8.385600858999325, "learning_rate": 8.83458395759753e-06, "loss": 2.9709341526031494, "step": 1713 }, { "epoch": 0.9002100840336135, "grad_norm": 11.02729520920482, "learning_rate": 8.832622048626104e-06, "loss": 2.6536970138549805, "step": 1714 }, { "epoch": 0.9007352941176471, "grad_norm": 6.5241815816256095, "learning_rate": 8.830658707897111e-06, "loss": 2.5592041015625, "step": 1715 }, { "epoch": 0.9012605042016807, "grad_norm": 8.289439200032332, "learning_rate": 8.828693936143995e-06, "loss": 2.1298305988311768, "step": 1716 }, { "epoch": 0.9017857142857143, "grad_norm": 9.131984002984787, "learning_rate": 8.826727734100742e-06, "loss": 2.1312835216522217, "step": 1717 }, { "epoch": 0.9023109243697479, "grad_norm": 9.74110691329254, "learning_rate": 8.824760102501865e-06, "loss": 2.2512333393096924, "step": 1718 }, { "epoch": 0.9028361344537815, "grad_norm": 19.930437776451477, "learning_rate": 8.82279104208242e-06, "loss": 2.780616521835327, "step": 1719 }, { "epoch": 0.9033613445378151, "grad_norm": 15.732270777879222, "learning_rate": 8.820820553577985e-06, "loss": 2.844557046890259, "step": 1720 }, { "epoch": 0.9038865546218487, "grad_norm": 11.653246048171543, "learning_rate": 8.818848637724681e-06, "loss": 2.871880054473877, "step": 1721 }, { "epoch": 0.9044117647058824, "grad_norm": 22.783994398798196, "learning_rate": 8.816875295259162e-06, "loss": 2.7712903022766113, "step": 1722 }, { "epoch": 0.904936974789916, "grad_norm": 14.66931092739854, "learning_rate": 8.814900526918608e-06, "loss": 2.664705753326416, "step": 1723 }, { "epoch": 0.9054621848739496, "grad_norm": 13.507599300840212, "learning_rate": 8.812924333440736e-06, "loss": 2.4884164333343506, "step": 1724 }, { "epoch": 0.9059873949579832, "grad_norm": 8.016607953026961, "learning_rate": 8.810946715563798e-06, "loss": 2.5067238807678223, "step": 1725 }, { "epoch": 0.9065126050420168, "grad_norm": 9.126188841154358, "learning_rate": 8.808967674026572e-06, "loss": 2.2832911014556885, "step": 1726 }, { "epoch": 0.9070378151260504, "grad_norm": 9.858863483209218, "learning_rate": 8.806987209568372e-06, "loss": 2.6985623836517334, "step": 1727 }, { "epoch": 0.907563025210084, "grad_norm": 10.771258049018208, "learning_rate": 8.805005322929046e-06, "loss": 1.8074414730072021, "step": 1728 }, { "epoch": 0.9080882352941176, "grad_norm": 21.656788517948772, "learning_rate": 8.803022014848966e-06, "loss": 2.817106246948242, "step": 1729 }, { "epoch": 0.9086134453781513, "grad_norm": 14.257204034945724, "learning_rate": 8.80103728606904e-06, "loss": 2.712790012359619, "step": 1730 }, { "epoch": 0.9091386554621849, "grad_norm": 16.62495737178655, "learning_rate": 8.799051137330705e-06, "loss": 3.398012161254883, "step": 1731 }, { "epoch": 0.9096638655462185, "grad_norm": 13.059757449688918, "learning_rate": 8.79706356937593e-06, "loss": 3.0172595977783203, "step": 1732 }, { "epoch": 0.9101890756302521, "grad_norm": 10.693082411575082, "learning_rate": 8.795074582947214e-06, "loss": 2.965136766433716, "step": 1733 }, { "epoch": 0.9107142857142857, "grad_norm": 9.40965957844002, "learning_rate": 8.793084178787586e-06, "loss": 2.7919960021972656, "step": 1734 }, { "epoch": 0.9112394957983193, "grad_norm": 13.491129075692312, "learning_rate": 8.7910923576406e-06, "loss": 2.837684392929077, "step": 1735 }, { "epoch": 0.9117647058823529, "grad_norm": 8.610810495422943, "learning_rate": 8.789099120250346e-06, "loss": 2.698582172393799, "step": 1736 }, { "epoch": 0.9122899159663865, "grad_norm": 14.92563382940718, "learning_rate": 8.787104467361442e-06, "loss": 2.46994686126709, "step": 1737 }, { "epoch": 0.9128151260504201, "grad_norm": 18.577083028769557, "learning_rate": 8.785108399719029e-06, "loss": 2.800351619720459, "step": 1738 }, { "epoch": 0.9133403361344538, "grad_norm": 13.499774219775034, "learning_rate": 8.783110918068784e-06, "loss": 2.3970227241516113, "step": 1739 }, { "epoch": 0.9138655462184874, "grad_norm": 5.99452123879766, "learning_rate": 8.781112023156906e-06, "loss": 2.9113054275512695, "step": 1740 }, { "epoch": 0.914390756302521, "grad_norm": 8.66218967429379, "learning_rate": 8.779111715730127e-06, "loss": 2.531561851501465, "step": 1741 }, { "epoch": 0.9149159663865546, "grad_norm": 12.395263930738281, "learning_rate": 8.777109996535701e-06, "loss": 2.794752597808838, "step": 1742 }, { "epoch": 0.9154411764705882, "grad_norm": 16.55794421118958, "learning_rate": 8.775106866321419e-06, "loss": 2.6315114498138428, "step": 1743 }, { "epoch": 0.9159663865546218, "grad_norm": 12.322031152831002, "learning_rate": 8.773102325835587e-06, "loss": 3.066227674484253, "step": 1744 }, { "epoch": 0.9164915966386554, "grad_norm": 14.013293060084008, "learning_rate": 8.771096375827047e-06, "loss": 3.158912181854248, "step": 1745 }, { "epoch": 0.917016806722689, "grad_norm": 9.40695823246611, "learning_rate": 8.769089017045163e-06, "loss": 3.091387987136841, "step": 1746 }, { "epoch": 0.9175420168067226, "grad_norm": 7.715469404017814, "learning_rate": 8.767080250239826e-06, "loss": 2.7237539291381836, "step": 1747 }, { "epoch": 0.9180672268907563, "grad_norm": 14.007152391202004, "learning_rate": 8.765070076161457e-06, "loss": 2.5782065391540527, "step": 1748 }, { "epoch": 0.9185924369747899, "grad_norm": 16.994065017834338, "learning_rate": 8.763058495560994e-06, "loss": 2.9540176391601562, "step": 1749 }, { "epoch": 0.9191176470588235, "grad_norm": 11.371718126473317, "learning_rate": 8.761045509189912e-06, "loss": 3.109292507171631, "step": 1750 }, { "epoch": 0.9196428571428571, "grad_norm": 17.264147344626977, "learning_rate": 8.7590311178002e-06, "loss": 3.1204617023468018, "step": 1751 }, { "epoch": 0.9201680672268907, "grad_norm": 6.1149940045501525, "learning_rate": 8.757015322144377e-06, "loss": 2.7700257301330566, "step": 1752 }, { "epoch": 0.9206932773109243, "grad_norm": 21.35446981291788, "learning_rate": 8.754998122975489e-06, "loss": 2.498889684677124, "step": 1753 }, { "epoch": 0.9212184873949579, "grad_norm": 13.79860645915577, "learning_rate": 8.752979521047103e-06, "loss": 2.9995710849761963, "step": 1754 }, { "epoch": 0.9217436974789915, "grad_norm": 16.40276401841837, "learning_rate": 8.750959517113309e-06, "loss": 2.588589668273926, "step": 1755 }, { "epoch": 0.9222689075630253, "grad_norm": 15.780013826258518, "learning_rate": 8.748938111928723e-06, "loss": 2.793461322784424, "step": 1756 }, { "epoch": 0.9227941176470589, "grad_norm": 14.078165717018452, "learning_rate": 8.746915306248488e-06, "loss": 2.1291205883026123, "step": 1757 }, { "epoch": 0.9233193277310925, "grad_norm": 17.000730244844867, "learning_rate": 8.744891100828261e-06, "loss": 3.3476784229278564, "step": 1758 }, { "epoch": 0.9238445378151261, "grad_norm": 21.73777257059286, "learning_rate": 8.742865496424228e-06, "loss": 3.958864212036133, "step": 1759 }, { "epoch": 0.9243697478991597, "grad_norm": 8.904155981796297, "learning_rate": 8.740838493793098e-06, "loss": 2.3914618492126465, "step": 1760 }, { "epoch": 0.9248949579831933, "grad_norm": 8.28742029991206, "learning_rate": 8.7388100936921e-06, "loss": 2.660982847213745, "step": 1761 }, { "epoch": 0.9254201680672269, "grad_norm": 8.878507569356453, "learning_rate": 8.736780296878988e-06, "loss": 2.3615212440490723, "step": 1762 }, { "epoch": 0.9259453781512605, "grad_norm": 11.62026842833458, "learning_rate": 8.734749104112032e-06, "loss": 3.145231008529663, "step": 1763 }, { "epoch": 0.9264705882352942, "grad_norm": 8.896661371514389, "learning_rate": 8.732716516150032e-06, "loss": 2.599561929702759, "step": 1764 }, { "epoch": 0.9269957983193278, "grad_norm": 10.29556865582247, "learning_rate": 8.730682533752301e-06, "loss": 2.501940965652466, "step": 1765 }, { "epoch": 0.9275210084033614, "grad_norm": 10.101323750608474, "learning_rate": 8.728647157678678e-06, "loss": 2.709500789642334, "step": 1766 }, { "epoch": 0.928046218487395, "grad_norm": 8.644139455011867, "learning_rate": 8.72661038868952e-06, "loss": 2.2276365756988525, "step": 1767 }, { "epoch": 0.9285714285714286, "grad_norm": 6.485331202539536, "learning_rate": 8.724572227545707e-06, "loss": 2.9125149250030518, "step": 1768 }, { "epoch": 0.9290966386554622, "grad_norm": 11.062385891642846, "learning_rate": 8.722532675008635e-06, "loss": 2.6444571018218994, "step": 1769 }, { "epoch": 0.9296218487394958, "grad_norm": 8.506277266884535, "learning_rate": 8.720491731840223e-06, "loss": 2.7320895195007324, "step": 1770 }, { "epoch": 0.9301470588235294, "grad_norm": 20.332919090775746, "learning_rate": 8.718449398802914e-06, "loss": 2.454099416732788, "step": 1771 }, { "epoch": 0.930672268907563, "grad_norm": 11.21722087123936, "learning_rate": 8.716405676659656e-06, "loss": 3.093299388885498, "step": 1772 }, { "epoch": 0.9311974789915967, "grad_norm": 13.852780182447871, "learning_rate": 8.714360566173932e-06, "loss": 2.987112045288086, "step": 1773 }, { "epoch": 0.9317226890756303, "grad_norm": 8.142773274599534, "learning_rate": 8.712314068109732e-06, "loss": 3.0356099605560303, "step": 1774 }, { "epoch": 0.9322478991596639, "grad_norm": 15.95072720515174, "learning_rate": 8.710266183231574e-06, "loss": 3.1858644485473633, "step": 1775 }, { "epoch": 0.9327731092436975, "grad_norm": 35.115395441844655, "learning_rate": 8.708216912304484e-06, "loss": 2.502962827682495, "step": 1776 }, { "epoch": 0.9332983193277311, "grad_norm": 13.547142890784473, "learning_rate": 8.706166256094013e-06, "loss": 3.0994725227355957, "step": 1777 }, { "epoch": 0.9338235294117647, "grad_norm": 14.441962799616894, "learning_rate": 8.704114215366228e-06, "loss": 2.9976320266723633, "step": 1778 }, { "epoch": 0.9343487394957983, "grad_norm": 11.323843563316196, "learning_rate": 8.70206079088771e-06, "loss": 2.6001384258270264, "step": 1779 }, { "epoch": 0.9348739495798319, "grad_norm": 14.418821458631701, "learning_rate": 8.700005983425562e-06, "loss": 2.52199649810791, "step": 1780 }, { "epoch": 0.9353991596638656, "grad_norm": 15.704931815547031, "learning_rate": 8.6979497937474e-06, "loss": 2.6769728660583496, "step": 1781 }, { "epoch": 0.9359243697478992, "grad_norm": 10.456791049827286, "learning_rate": 8.695892222621359e-06, "loss": 2.4125683307647705, "step": 1782 }, { "epoch": 0.9364495798319328, "grad_norm": 19.293268901498354, "learning_rate": 8.693833270816083e-06, "loss": 2.435770034790039, "step": 1783 }, { "epoch": 0.9369747899159664, "grad_norm": 9.749255660128235, "learning_rate": 8.691772939100745e-06, "loss": 2.727022409439087, "step": 1784 }, { "epoch": 0.9375, "grad_norm": 12.592670289014633, "learning_rate": 8.689711228245021e-06, "loss": 2.8270492553710938, "step": 1785 }, { "epoch": 0.9380252100840336, "grad_norm": 10.547352093485244, "learning_rate": 8.687648139019107e-06, "loss": 2.904310464859009, "step": 1786 }, { "epoch": 0.9385504201680672, "grad_norm": 7.256107660315933, "learning_rate": 8.685583672193716e-06, "loss": 2.8405511379241943, "step": 1787 }, { "epoch": 0.9390756302521008, "grad_norm": 38.87294542436356, "learning_rate": 8.683517828540074e-06, "loss": 2.3819591999053955, "step": 1788 }, { "epoch": 0.9396008403361344, "grad_norm": 8.689467624525195, "learning_rate": 8.681450608829916e-06, "loss": 2.5236239433288574, "step": 1789 }, { "epoch": 0.9401260504201681, "grad_norm": 14.207355681187238, "learning_rate": 8.679382013835502e-06, "loss": 2.255479097366333, "step": 1790 }, { "epoch": 0.9406512605042017, "grad_norm": 9.98703450472335, "learning_rate": 8.677312044329595e-06, "loss": 2.883563756942749, "step": 1791 }, { "epoch": 0.9411764705882353, "grad_norm": 8.386518263299669, "learning_rate": 8.675240701085481e-06, "loss": 2.463327407836914, "step": 1792 }, { "epoch": 0.9417016806722689, "grad_norm": 12.831345621892083, "learning_rate": 8.67316798487695e-06, "loss": 3.131563663482666, "step": 1793 }, { "epoch": 0.9422268907563025, "grad_norm": 13.046653678099416, "learning_rate": 8.67109389647831e-06, "loss": 3.0421652793884277, "step": 1794 }, { "epoch": 0.9427521008403361, "grad_norm": 17.84912520763003, "learning_rate": 8.669018436664382e-06, "loss": 2.6702780723571777, "step": 1795 }, { "epoch": 0.9432773109243697, "grad_norm": 7.9071114232742215, "learning_rate": 8.6669416062105e-06, "loss": 2.7219574451446533, "step": 1796 }, { "epoch": 0.9438025210084033, "grad_norm": 15.392730609681045, "learning_rate": 8.664863405892506e-06, "loss": 2.210427761077881, "step": 1797 }, { "epoch": 0.944327731092437, "grad_norm": 7.982180317670834, "learning_rate": 8.662783836486751e-06, "loss": 2.505744457244873, "step": 1798 }, { "epoch": 0.9448529411764706, "grad_norm": 7.536379874194175, "learning_rate": 8.660702898770113e-06, "loss": 2.801051378250122, "step": 1799 }, { "epoch": 0.9453781512605042, "grad_norm": 13.524087297376317, "learning_rate": 8.658620593519964e-06, "loss": 3.414421796798706, "step": 1800 }, { "epoch": 0.9459033613445378, "grad_norm": 12.612533189965147, "learning_rate": 8.656536921514195e-06, "loss": 2.69132924079895, "step": 1801 }, { "epoch": 0.9464285714285714, "grad_norm": 36.07657582307637, "learning_rate": 8.654451883531205e-06, "loss": 2.282496690750122, "step": 1802 }, { "epoch": 0.946953781512605, "grad_norm": 11.902700522909452, "learning_rate": 8.652365480349904e-06, "loss": 2.824908971786499, "step": 1803 }, { "epoch": 0.9474789915966386, "grad_norm": 35.3183446640842, "learning_rate": 8.650277712749715e-06, "loss": 3.2178330421447754, "step": 1804 }, { "epoch": 0.9480042016806722, "grad_norm": 8.091882541854254, "learning_rate": 8.648188581510567e-06, "loss": 2.548964023590088, "step": 1805 }, { "epoch": 0.9485294117647058, "grad_norm": 10.806709211954933, "learning_rate": 8.646098087412897e-06, "loss": 2.827819347381592, "step": 1806 }, { "epoch": 0.9490546218487395, "grad_norm": 14.81539601197576, "learning_rate": 8.644006231237655e-06, "loss": 2.643984317779541, "step": 1807 }, { "epoch": 0.9495798319327731, "grad_norm": 11.05488678125289, "learning_rate": 8.641913013766301e-06, "loss": 2.810927629470825, "step": 1808 }, { "epoch": 0.9501050420168067, "grad_norm": 16.88559157091052, "learning_rate": 8.639818435780797e-06, "loss": 2.7036876678466797, "step": 1809 }, { "epoch": 0.9506302521008403, "grad_norm": 12.332844750129505, "learning_rate": 8.637722498063619e-06, "loss": 3.2361364364624023, "step": 1810 }, { "epoch": 0.9511554621848739, "grad_norm": 9.568169116029864, "learning_rate": 8.63562520139775e-06, "loss": 2.702150821685791, "step": 1811 }, { "epoch": 0.9516806722689075, "grad_norm": 14.354870471053436, "learning_rate": 8.63352654656668e-06, "loss": 3.1212148666381836, "step": 1812 }, { "epoch": 0.9522058823529411, "grad_norm": 14.82901426364795, "learning_rate": 8.631426534354404e-06, "loss": 3.0510215759277344, "step": 1813 }, { "epoch": 0.9527310924369747, "grad_norm": 7.052700426308374, "learning_rate": 8.629325165545426e-06, "loss": 2.7621378898620605, "step": 1814 }, { "epoch": 0.9532563025210085, "grad_norm": 10.181466227394393, "learning_rate": 8.62722244092476e-06, "loss": 2.6987380981445312, "step": 1815 }, { "epoch": 0.9537815126050421, "grad_norm": 7.379642276446604, "learning_rate": 8.625118361277921e-06, "loss": 2.4137468338012695, "step": 1816 }, { "epoch": 0.9543067226890757, "grad_norm": 9.966511020909078, "learning_rate": 8.623012927390936e-06, "loss": 2.141328811645508, "step": 1817 }, { "epoch": 0.9548319327731093, "grad_norm": 18.90858550355101, "learning_rate": 8.620906140050332e-06, "loss": 2.9326627254486084, "step": 1818 }, { "epoch": 0.9553571428571429, "grad_norm": 9.905485378163771, "learning_rate": 8.618798000043142e-06, "loss": 3.3081917762756348, "step": 1819 }, { "epoch": 0.9558823529411765, "grad_norm": 8.826804118335973, "learning_rate": 8.616688508156912e-06, "loss": 2.9522790908813477, "step": 1820 }, { "epoch": 0.9564075630252101, "grad_norm": 27.27844035411373, "learning_rate": 8.614577665179684e-06, "loss": 2.997037887573242, "step": 1821 }, { "epoch": 0.9569327731092437, "grad_norm": 9.101041023188024, "learning_rate": 8.61246547190001e-06, "loss": 2.8007779121398926, "step": 1822 }, { "epoch": 0.9574579831932774, "grad_norm": 5.045936465679974, "learning_rate": 8.610351929106944e-06, "loss": 2.9660260677337646, "step": 1823 }, { "epoch": 0.957983193277311, "grad_norm": 9.841308592528314, "learning_rate": 8.608237037590044e-06, "loss": 2.7075157165527344, "step": 1824 }, { "epoch": 0.9585084033613446, "grad_norm": 13.276096738014182, "learning_rate": 8.606120798139375e-06, "loss": 2.6712965965270996, "step": 1825 }, { "epoch": 0.9590336134453782, "grad_norm": 15.612695105516615, "learning_rate": 8.6040032115455e-06, "loss": 2.821986198425293, "step": 1826 }, { "epoch": 0.9595588235294118, "grad_norm": 13.353716065276132, "learning_rate": 8.601884278599493e-06, "loss": 2.9493420124053955, "step": 1827 }, { "epoch": 0.9600840336134454, "grad_norm": 7.912159385957208, "learning_rate": 8.599764000092921e-06, "loss": 2.0210976600646973, "step": 1828 }, { "epoch": 0.960609243697479, "grad_norm": 8.04791893105041, "learning_rate": 8.597642376817865e-06, "loss": 2.616459846496582, "step": 1829 }, { "epoch": 0.9611344537815126, "grad_norm": 18.59918151216922, "learning_rate": 8.5955194095669e-06, "loss": 2.24114990234375, "step": 1830 }, { "epoch": 0.9616596638655462, "grad_norm": 7.616087818821263, "learning_rate": 8.593395099133103e-06, "loss": 2.514791488647461, "step": 1831 }, { "epoch": 0.9621848739495799, "grad_norm": 11.382891052560675, "learning_rate": 8.59126944631006e-06, "loss": 2.959714651107788, "step": 1832 }, { "epoch": 0.9627100840336135, "grad_norm": 16.938581937412387, "learning_rate": 8.589142451891849e-06, "loss": 2.572869300842285, "step": 1833 }, { "epoch": 0.9632352941176471, "grad_norm": 12.795502342015439, "learning_rate": 8.58701411667306e-06, "loss": 2.787219524383545, "step": 1834 }, { "epoch": 0.9637605042016807, "grad_norm": 6.498791139312737, "learning_rate": 8.584884441448774e-06, "loss": 2.691793203353882, "step": 1835 }, { "epoch": 0.9642857142857143, "grad_norm": 9.821994066379503, "learning_rate": 8.582753427014576e-06, "loss": 2.721123695373535, "step": 1836 }, { "epoch": 0.9648109243697479, "grad_norm": 42.870174084996364, "learning_rate": 8.580621074166553e-06, "loss": 3.4184532165527344, "step": 1837 }, { "epoch": 0.9653361344537815, "grad_norm": 6.494893254965669, "learning_rate": 8.57848738370129e-06, "loss": 2.405518054962158, "step": 1838 }, { "epoch": 0.9658613445378151, "grad_norm": 6.284528323440992, "learning_rate": 8.576352356415876e-06, "loss": 2.6421608924865723, "step": 1839 }, { "epoch": 0.9663865546218487, "grad_norm": 7.965027483222569, "learning_rate": 8.574215993107892e-06, "loss": 2.546196460723877, "step": 1840 }, { "epoch": 0.9669117647058824, "grad_norm": 13.46452882920774, "learning_rate": 8.572078294575423e-06, "loss": 3.115414619445801, "step": 1841 }, { "epoch": 0.967436974789916, "grad_norm": 13.818032339998968, "learning_rate": 8.569939261617052e-06, "loss": 2.332549571990967, "step": 1842 }, { "epoch": 0.9679621848739496, "grad_norm": 12.092965501437327, "learning_rate": 8.56779889503186e-06, "loss": 2.978212594985962, "step": 1843 }, { "epoch": 0.9684873949579832, "grad_norm": 5.963088828749137, "learning_rate": 8.565657195619427e-06, "loss": 2.8651227951049805, "step": 1844 }, { "epoch": 0.9690126050420168, "grad_norm": 14.825238184496126, "learning_rate": 8.56351416417983e-06, "loss": 2.586548089981079, "step": 1845 }, { "epoch": 0.9695378151260504, "grad_norm": 10.80124055660655, "learning_rate": 8.561369801513647e-06, "loss": 2.636096715927124, "step": 1846 }, { "epoch": 0.970063025210084, "grad_norm": 9.325636596277027, "learning_rate": 8.559224108421943e-06, "loss": 3.1254000663757324, "step": 1847 }, { "epoch": 0.9705882352941176, "grad_norm": 12.346374533301125, "learning_rate": 8.557077085706295e-06, "loss": 2.6923446655273438, "step": 1848 }, { "epoch": 0.9711134453781513, "grad_norm": 12.07670082141277, "learning_rate": 8.554928734168767e-06, "loss": 2.7790634632110596, "step": 1849 }, { "epoch": 0.9716386554621849, "grad_norm": 12.531202975668133, "learning_rate": 8.552779054611917e-06, "loss": 2.7498366832733154, "step": 1850 }, { "epoch": 0.9721638655462185, "grad_norm": 12.654364630281512, "learning_rate": 8.550628047838809e-06, "loss": 3.2891016006469727, "step": 1851 }, { "epoch": 0.9726890756302521, "grad_norm": 30.21970287149626, "learning_rate": 8.548475714652997e-06, "loss": 3.007063388824463, "step": 1852 }, { "epoch": 0.9732142857142857, "grad_norm": 19.707323009878657, "learning_rate": 8.546322055858526e-06, "loss": 2.864422082901001, "step": 1853 }, { "epoch": 0.9737394957983193, "grad_norm": 12.231623007554356, "learning_rate": 8.544167072259947e-06, "loss": 2.7920737266540527, "step": 1854 }, { "epoch": 0.9742647058823529, "grad_norm": 16.267777169718915, "learning_rate": 8.542010764662296e-06, "loss": 3.611020088195801, "step": 1855 }, { "epoch": 0.9747899159663865, "grad_norm": 12.559867699834967, "learning_rate": 8.53985313387111e-06, "loss": 2.9485042095184326, "step": 1856 }, { "epoch": 0.9753151260504201, "grad_norm": 9.655644767699465, "learning_rate": 8.537694180692416e-06, "loss": 2.939473867416382, "step": 1857 }, { "epoch": 0.9758403361344538, "grad_norm": 9.236413649563215, "learning_rate": 8.535533905932739e-06, "loss": 2.4414329528808594, "step": 1858 }, { "epoch": 0.9763655462184874, "grad_norm": 12.901378568621737, "learning_rate": 8.533372310399093e-06, "loss": 2.878749132156372, "step": 1859 }, { "epoch": 0.976890756302521, "grad_norm": 16.44025528982554, "learning_rate": 8.53120939489899e-06, "loss": 2.847072124481201, "step": 1860 }, { "epoch": 0.9774159663865546, "grad_norm": 10.90208461014904, "learning_rate": 8.529045160240433e-06, "loss": 3.040494918823242, "step": 1861 }, { "epoch": 0.9779411764705882, "grad_norm": 15.379610602130203, "learning_rate": 8.526879607231917e-06, "loss": 2.0407490730285645, "step": 1862 }, { "epoch": 0.9784663865546218, "grad_norm": 10.067268557400256, "learning_rate": 8.524712736682433e-06, "loss": 2.9385571479797363, "step": 1863 }, { "epoch": 0.9789915966386554, "grad_norm": 22.4557381152531, "learning_rate": 8.522544549401457e-06, "loss": 3.2371599674224854, "step": 1864 }, { "epoch": 0.979516806722689, "grad_norm": 10.521846568231593, "learning_rate": 8.520375046198965e-06, "loss": 2.6711273193359375, "step": 1865 }, { "epoch": 0.9800420168067226, "grad_norm": 10.941139523626964, "learning_rate": 8.518204227885422e-06, "loss": 2.5425753593444824, "step": 1866 }, { "epoch": 0.9805672268907563, "grad_norm": 11.628589322299094, "learning_rate": 8.51603209527178e-06, "loss": 3.3113784790039062, "step": 1867 }, { "epoch": 0.9810924369747899, "grad_norm": 16.58473894220331, "learning_rate": 8.513858649169488e-06, "loss": 3.3791017532348633, "step": 1868 }, { "epoch": 0.9816176470588235, "grad_norm": 21.68638848570282, "learning_rate": 8.51168389039048e-06, "loss": 2.6426897048950195, "step": 1869 }, { "epoch": 0.9821428571428571, "grad_norm": 8.14627549839803, "learning_rate": 8.50950781974719e-06, "loss": 2.903141736984253, "step": 1870 }, { "epoch": 0.9826680672268907, "grad_norm": 10.422338501355247, "learning_rate": 8.507330438052527e-06, "loss": 2.8371524810791016, "step": 1871 }, { "epoch": 0.9831932773109243, "grad_norm": 9.94067313746645, "learning_rate": 8.505151746119904e-06, "loss": 3.0765295028686523, "step": 1872 }, { "epoch": 0.9837184873949579, "grad_norm": 11.063333204420756, "learning_rate": 8.502971744763216e-06, "loss": 2.120239019393921, "step": 1873 }, { "epoch": 0.9842436974789915, "grad_norm": 11.041822515498248, "learning_rate": 8.500790434796848e-06, "loss": 2.575545310974121, "step": 1874 }, { "epoch": 0.9847689075630253, "grad_norm": 11.183813240011762, "learning_rate": 8.498607817035678e-06, "loss": 3.2164344787597656, "step": 1875 }, { "epoch": 0.9852941176470589, "grad_norm": 9.146282145534625, "learning_rate": 8.496423892295066e-06, "loss": 2.193946361541748, "step": 1876 }, { "epoch": 0.9858193277310925, "grad_norm": 17.41842698593735, "learning_rate": 8.494238661390865e-06, "loss": 2.7824902534484863, "step": 1877 }, { "epoch": 0.9863445378151261, "grad_norm": 8.544143325985905, "learning_rate": 8.492052125139414e-06, "loss": 2.2539215087890625, "step": 1878 }, { "epoch": 0.9868697478991597, "grad_norm": 17.54889547377129, "learning_rate": 8.48986428435754e-06, "loss": 3.429238796234131, "step": 1879 }, { "epoch": 0.9873949579831933, "grad_norm": 16.245562989276348, "learning_rate": 8.48767513986256e-06, "loss": 3.189175844192505, "step": 1880 }, { "epoch": 0.9879201680672269, "grad_norm": 10.627532102304057, "learning_rate": 8.485484692472272e-06, "loss": 2.395397663116455, "step": 1881 }, { "epoch": 0.9884453781512605, "grad_norm": 17.536174472863458, "learning_rate": 8.483292943004965e-06, "loss": 2.467043399810791, "step": 1882 }, { "epoch": 0.9889705882352942, "grad_norm": 20.09310626654821, "learning_rate": 8.481099892279418e-06, "loss": 3.0610265731811523, "step": 1883 }, { "epoch": 0.9894957983193278, "grad_norm": 8.928325296380594, "learning_rate": 8.478905541114886e-06, "loss": 3.007861852645874, "step": 1884 }, { "epoch": 0.9900210084033614, "grad_norm": 10.393411495930849, "learning_rate": 8.476709890331116e-06, "loss": 2.348640203475952, "step": 1885 }, { "epoch": 0.990546218487395, "grad_norm": 14.615631412612505, "learning_rate": 8.474512940748345e-06, "loss": 2.0474324226379395, "step": 1886 }, { "epoch": 0.9910714285714286, "grad_norm": 12.706284887783626, "learning_rate": 8.472314693187285e-06, "loss": 3.3878824710845947, "step": 1887 }, { "epoch": 0.9915966386554622, "grad_norm": 12.225951914697768, "learning_rate": 8.470115148469143e-06, "loss": 3.540670156478882, "step": 1888 }, { "epoch": 0.9921218487394958, "grad_norm": 7.85398971720135, "learning_rate": 8.467914307415601e-06, "loss": 2.438688278198242, "step": 1889 }, { "epoch": 0.9926470588235294, "grad_norm": 12.665499039931298, "learning_rate": 8.465712170848833e-06, "loss": 2.5243587493896484, "step": 1890 }, { "epoch": 0.993172268907563, "grad_norm": 6.031427653850509, "learning_rate": 8.463508739591493e-06, "loss": 2.1889450550079346, "step": 1891 }, { "epoch": 0.9936974789915967, "grad_norm": 9.432235666771582, "learning_rate": 8.46130401446672e-06, "loss": 2.3379998207092285, "step": 1892 }, { "epoch": 0.9942226890756303, "grad_norm": 9.771243724944751, "learning_rate": 8.459097996298137e-06, "loss": 2.6740798950195312, "step": 1893 }, { "epoch": 0.9947478991596639, "grad_norm": 8.199428256110528, "learning_rate": 8.456890685909847e-06, "loss": 2.6622955799102783, "step": 1894 }, { "epoch": 0.9952731092436975, "grad_norm": 8.559301862167205, "learning_rate": 8.45468208412644e-06, "loss": 2.982351303100586, "step": 1895 }, { "epoch": 0.9957983193277311, "grad_norm": 17.70681612153687, "learning_rate": 8.452472191772983e-06, "loss": 2.457289695739746, "step": 1896 }, { "epoch": 0.9963235294117647, "grad_norm": 5.673999815291132, "learning_rate": 8.45026100967503e-06, "loss": 2.7809672355651855, "step": 1897 }, { "epoch": 0.9968487394957983, "grad_norm": 8.730094395146992, "learning_rate": 8.448048538658618e-06, "loss": 2.8093996047973633, "step": 1898 }, { "epoch": 0.9973739495798319, "grad_norm": 16.419111158636518, "learning_rate": 8.445834779550257e-06, "loss": 2.8095481395721436, "step": 1899 }, { "epoch": 0.9978991596638656, "grad_norm": 12.025154461810699, "learning_rate": 8.443619733176949e-06, "loss": 2.9833385944366455, "step": 1900 }, { "epoch": 0.9984243697478992, "grad_norm": 10.744489455236222, "learning_rate": 8.441403400366169e-06, "loss": 2.7518649101257324, "step": 1901 }, { "epoch": 0.9989495798319328, "grad_norm": 12.951882859844048, "learning_rate": 8.439185781945878e-06, "loss": 2.776477813720703, "step": 1902 }, { "epoch": 0.9994747899159664, "grad_norm": 20.019310686502983, "learning_rate": 8.43696687874451e-06, "loss": 3.0205705165863037, "step": 1903 }, { "epoch": 1.0, "grad_norm": 13.33322983646556, "learning_rate": 8.434746691590987e-06, "loss": 2.7565271854400635, "step": 1904 }, { "epoch": 1.0005252100840336, "grad_norm": 10.425103268717317, "learning_rate": 8.432525221314708e-06, "loss": 1.6493275165557861, "step": 1905 }, { "epoch": 1.0010504201680672, "grad_norm": 12.788701715265818, "learning_rate": 8.430302468745546e-06, "loss": 2.2459113597869873, "step": 1906 }, { "epoch": 1.0015756302521008, "grad_norm": 11.940067647697864, "learning_rate": 8.428078434713863e-06, "loss": 1.8953219652175903, "step": 1907 }, { "epoch": 1.0021008403361344, "grad_norm": 11.252538127650912, "learning_rate": 8.42585312005049e-06, "loss": 1.8556398153305054, "step": 1908 }, { "epoch": 1.002626050420168, "grad_norm": 6.741969296382316, "learning_rate": 8.423626525586744e-06, "loss": 1.626365065574646, "step": 1909 }, { "epoch": 1.0031512605042017, "grad_norm": 6.525371213681585, "learning_rate": 8.421398652154412e-06, "loss": 1.3644397258758545, "step": 1910 }, { "epoch": 1.0036764705882353, "grad_norm": 10.261206768161827, "learning_rate": 8.41916950058577e-06, "loss": 1.3718510866165161, "step": 1911 }, { "epoch": 1.004201680672269, "grad_norm": 13.764872593626992, "learning_rate": 8.416939071713559e-06, "loss": 1.724814534187317, "step": 1912 }, { "epoch": 1.0047268907563025, "grad_norm": 9.236179682861412, "learning_rate": 8.414707366371006e-06, "loss": 1.509573221206665, "step": 1913 }, { "epoch": 1.0052521008403361, "grad_norm": 13.584490473887904, "learning_rate": 8.412474385391814e-06, "loss": 1.0250325202941895, "step": 1914 }, { "epoch": 1.0057773109243697, "grad_norm": 15.26791388338633, "learning_rate": 8.410240129610158e-06, "loss": 1.6212220191955566, "step": 1915 }, { "epoch": 1.0063025210084033, "grad_norm": 13.036920248131361, "learning_rate": 8.408004599860692e-06, "loss": 1.6451389789581299, "step": 1916 }, { "epoch": 1.006827731092437, "grad_norm": 11.095108355949035, "learning_rate": 8.405767796978546e-06, "loss": 1.5876981019973755, "step": 1917 }, { "epoch": 1.0073529411764706, "grad_norm": 25.35169379351488, "learning_rate": 8.403529721799325e-06, "loss": 2.09970760345459, "step": 1918 }, { "epoch": 1.0078781512605042, "grad_norm": 11.362918327895557, "learning_rate": 8.40129037515911e-06, "loss": 1.475602388381958, "step": 1919 }, { "epoch": 1.0084033613445378, "grad_norm": 9.621594393477524, "learning_rate": 8.399049757894457e-06, "loss": 2.0639808177948, "step": 1920 }, { "epoch": 1.0089285714285714, "grad_norm": 9.151211286125385, "learning_rate": 8.396807870842396e-06, "loss": 1.6148015260696411, "step": 1921 }, { "epoch": 1.009453781512605, "grad_norm": 10.874206209075602, "learning_rate": 8.394564714840433e-06, "loss": 1.5400185585021973, "step": 1922 }, { "epoch": 1.0099789915966386, "grad_norm": 12.736298604252928, "learning_rate": 8.392320290726543e-06, "loss": 1.2622665166854858, "step": 1923 }, { "epoch": 1.0105042016806722, "grad_norm": 16.957767382381217, "learning_rate": 8.390074599339182e-06, "loss": 2.001948356628418, "step": 1924 }, { "epoch": 1.0110294117647058, "grad_norm": 13.962094214283686, "learning_rate": 8.387827641517274e-06, "loss": 2.5753512382507324, "step": 1925 }, { "epoch": 1.0115546218487395, "grad_norm": 10.222847154104663, "learning_rate": 8.385579418100219e-06, "loss": 1.7223831415176392, "step": 1926 }, { "epoch": 1.012079831932773, "grad_norm": 11.457577280648373, "learning_rate": 8.383329929927888e-06, "loss": 1.5780894756317139, "step": 1927 }, { "epoch": 1.0126050420168067, "grad_norm": 12.046002924461916, "learning_rate": 8.381079177840625e-06, "loss": 1.4928408861160278, "step": 1928 }, { "epoch": 1.0131302521008403, "grad_norm": 10.956957577930083, "learning_rate": 8.378827162679248e-06, "loss": 1.533614158630371, "step": 1929 }, { "epoch": 1.013655462184874, "grad_norm": 9.119484179757583, "learning_rate": 8.376573885285041e-06, "loss": 1.5786470174789429, "step": 1930 }, { "epoch": 1.0141806722689075, "grad_norm": 9.904786233687716, "learning_rate": 8.37431934649977e-06, "loss": 2.3136680126190186, "step": 1931 }, { "epoch": 1.0147058823529411, "grad_norm": 10.186617550044655, "learning_rate": 8.37206354716566e-06, "loss": 1.5691616535186768, "step": 1932 }, { "epoch": 1.0152310924369747, "grad_norm": 8.189996147531692, "learning_rate": 8.369806488125418e-06, "loss": 1.6489810943603516, "step": 1933 }, { "epoch": 1.0157563025210083, "grad_norm": 9.965136678330182, "learning_rate": 8.367548170222213e-06, "loss": 0.9397400617599487, "step": 1934 }, { "epoch": 1.016281512605042, "grad_norm": 13.698451996841913, "learning_rate": 8.365288594299688e-06, "loss": 2.7507967948913574, "step": 1935 }, { "epoch": 1.0168067226890756, "grad_norm": 14.243010083824592, "learning_rate": 8.363027761201957e-06, "loss": 1.343361735343933, "step": 1936 }, { "epoch": 1.0173319327731092, "grad_norm": 17.856114600383805, "learning_rate": 8.360765671773603e-06, "loss": 1.3550803661346436, "step": 1937 }, { "epoch": 1.0178571428571428, "grad_norm": 9.674800205447667, "learning_rate": 8.358502326859674e-06, "loss": 1.7225271463394165, "step": 1938 }, { "epoch": 1.0183823529411764, "grad_norm": 11.045762867972751, "learning_rate": 8.356237727305695e-06, "loss": 2.1100683212280273, "step": 1939 }, { "epoch": 1.01890756302521, "grad_norm": 11.14608259258499, "learning_rate": 8.353971873957652e-06, "loss": 1.6201719045639038, "step": 1940 }, { "epoch": 1.0194327731092436, "grad_norm": 10.454592897256216, "learning_rate": 8.351704767662005e-06, "loss": 1.4395060539245605, "step": 1941 }, { "epoch": 1.0199579831932772, "grad_norm": 8.00835615271533, "learning_rate": 8.34943640926568e-06, "loss": 1.489414095878601, "step": 1942 }, { "epoch": 1.0204831932773109, "grad_norm": 16.38390854750751, "learning_rate": 8.347166799616069e-06, "loss": 1.636405110359192, "step": 1943 }, { "epoch": 1.0210084033613445, "grad_norm": 11.206054027613732, "learning_rate": 8.344895939561034e-06, "loss": 1.1762261390686035, "step": 1944 }, { "epoch": 1.021533613445378, "grad_norm": 10.214458044906083, "learning_rate": 8.3426238299489e-06, "loss": 1.4226603507995605, "step": 1945 }, { "epoch": 1.0220588235294117, "grad_norm": 12.432188175217707, "learning_rate": 8.340350471628469e-06, "loss": 1.2234781980514526, "step": 1946 }, { "epoch": 1.0225840336134453, "grad_norm": 16.250235673576167, "learning_rate": 8.338075865448998e-06, "loss": 1.6221668720245361, "step": 1947 }, { "epoch": 1.023109243697479, "grad_norm": 12.816687210569661, "learning_rate": 8.335800012260211e-06, "loss": 1.376111388206482, "step": 1948 }, { "epoch": 1.0236344537815125, "grad_norm": 10.225470219359712, "learning_rate": 8.333522912912308e-06, "loss": 1.4632911682128906, "step": 1949 }, { "epoch": 1.0241596638655461, "grad_norm": 12.56264744409334, "learning_rate": 8.331244568255944e-06, "loss": 1.10945463180542, "step": 1950 }, { "epoch": 1.0246848739495797, "grad_norm": 12.459697458279704, "learning_rate": 8.328964979142244e-06, "loss": 1.5290939807891846, "step": 1951 }, { "epoch": 1.0252100840336134, "grad_norm": 11.715959432334296, "learning_rate": 8.326684146422798e-06, "loss": 1.2233198881149292, "step": 1952 }, { "epoch": 1.025735294117647, "grad_norm": 17.91977213508758, "learning_rate": 8.324402070949658e-06, "loss": 2.126434326171875, "step": 1953 }, { "epoch": 1.0262605042016806, "grad_norm": 13.220952963877636, "learning_rate": 8.322118753575344e-06, "loss": 1.3733046054840088, "step": 1954 }, { "epoch": 1.0267857142857142, "grad_norm": 10.49379912086789, "learning_rate": 8.319834195152836e-06, "loss": 1.7667503356933594, "step": 1955 }, { "epoch": 1.0273109243697478, "grad_norm": 11.748923411034493, "learning_rate": 8.31754839653558e-06, "loss": 1.35880708694458, "step": 1956 }, { "epoch": 1.0278361344537814, "grad_norm": 8.975299039977969, "learning_rate": 8.315261358577485e-06, "loss": 1.5697741508483887, "step": 1957 }, { "epoch": 1.028361344537815, "grad_norm": 12.443218320042634, "learning_rate": 8.312973082132922e-06, "loss": 1.6423313617706299, "step": 1958 }, { "epoch": 1.0288865546218486, "grad_norm": 10.609728892527682, "learning_rate": 8.310683568056725e-06, "loss": 1.6356289386749268, "step": 1959 }, { "epoch": 1.0294117647058822, "grad_norm": 11.683745668844416, "learning_rate": 8.308392817204194e-06, "loss": 1.2431647777557373, "step": 1960 }, { "epoch": 1.0299369747899159, "grad_norm": 8.590326202928376, "learning_rate": 8.306100830431085e-06, "loss": 1.5427310466766357, "step": 1961 }, { "epoch": 1.0304621848739495, "grad_norm": 15.523140519350019, "learning_rate": 8.303807608593617e-06, "loss": 1.4608049392700195, "step": 1962 }, { "epoch": 1.0309873949579833, "grad_norm": 10.23039884136331, "learning_rate": 8.301513152548474e-06, "loss": 1.7733904123306274, "step": 1963 }, { "epoch": 1.0315126050420167, "grad_norm": 11.219338154828353, "learning_rate": 8.2992174631528e-06, "loss": 1.797380805015564, "step": 1964 }, { "epoch": 1.0320378151260505, "grad_norm": 12.126234321232678, "learning_rate": 8.296920541264197e-06, "loss": 1.1642725467681885, "step": 1965 }, { "epoch": 1.0325630252100841, "grad_norm": 15.32514566454394, "learning_rate": 8.294622387740728e-06, "loss": 1.3890987634658813, "step": 1966 }, { "epoch": 1.0330882352941178, "grad_norm": 12.50659440628141, "learning_rate": 8.292323003440919e-06, "loss": 1.7690242528915405, "step": 1967 }, { "epoch": 1.0336134453781514, "grad_norm": 10.479752133577465, "learning_rate": 8.290022389223754e-06, "loss": 2.1003386974334717, "step": 1968 }, { "epoch": 1.034138655462185, "grad_norm": 30.658755350352227, "learning_rate": 8.287720545948676e-06, "loss": 1.3065478801727295, "step": 1969 }, { "epoch": 1.0346638655462186, "grad_norm": 8.558415688264631, "learning_rate": 8.285417474475587e-06, "loss": 1.1889278888702393, "step": 1970 }, { "epoch": 1.0351890756302522, "grad_norm": 17.685929104524476, "learning_rate": 8.28311317566485e-06, "loss": 1.2149498462677002, "step": 1971 }, { "epoch": 1.0357142857142858, "grad_norm": 12.117931530133028, "learning_rate": 8.28080765037728e-06, "loss": 1.1469614505767822, "step": 1972 }, { "epoch": 1.0362394957983194, "grad_norm": 15.88119938961799, "learning_rate": 8.278500899474162e-06, "loss": 2.375556468963623, "step": 1973 }, { "epoch": 1.036764705882353, "grad_norm": 11.486225001536914, "learning_rate": 8.27619292381723e-06, "loss": 1.814281940460205, "step": 1974 }, { "epoch": 1.0372899159663866, "grad_norm": 10.01479759517437, "learning_rate": 8.273883724268672e-06, "loss": 1.0509192943572998, "step": 1975 }, { "epoch": 1.0378151260504203, "grad_norm": 10.090578344197827, "learning_rate": 8.271573301691145e-06, "loss": 1.9715211391448975, "step": 1976 }, { "epoch": 1.0383403361344539, "grad_norm": 16.65268537151143, "learning_rate": 8.269261656947755e-06, "loss": 2.5270819664001465, "step": 1977 }, { "epoch": 1.0388655462184875, "grad_norm": 11.988105008300673, "learning_rate": 8.266948790902064e-06, "loss": 2.2483301162719727, "step": 1978 }, { "epoch": 1.039390756302521, "grad_norm": 11.726136781315917, "learning_rate": 8.264634704418095e-06, "loss": 1.6729965209960938, "step": 1979 }, { "epoch": 1.0399159663865547, "grad_norm": 9.88970599842327, "learning_rate": 8.262319398360323e-06, "loss": 2.0312275886535645, "step": 1980 }, { "epoch": 1.0404411764705883, "grad_norm": 6.994579754001975, "learning_rate": 8.260002873593679e-06, "loss": 1.3279500007629395, "step": 1981 }, { "epoch": 1.040966386554622, "grad_norm": 7.711351392387537, "learning_rate": 8.257685130983552e-06, "loss": 1.3498008251190186, "step": 1982 }, { "epoch": 1.0414915966386555, "grad_norm": 11.63951195762544, "learning_rate": 8.255366171395783e-06, "loss": 1.81411612033844, "step": 1983 }, { "epoch": 1.0420168067226891, "grad_norm": 6.9665214970757185, "learning_rate": 8.253045995696669e-06, "loss": 1.441640853881836, "step": 1984 }, { "epoch": 1.0425420168067228, "grad_norm": 11.84377264488808, "learning_rate": 8.25072460475296e-06, "loss": 2.0769429206848145, "step": 1985 }, { "epoch": 1.0430672268907564, "grad_norm": 7.8559847090079495, "learning_rate": 8.248401999431864e-06, "loss": 1.1951301097869873, "step": 1986 }, { "epoch": 1.04359243697479, "grad_norm": 17.04903626381307, "learning_rate": 8.246078180601035e-06, "loss": 1.5911346673965454, "step": 1987 }, { "epoch": 1.0441176470588236, "grad_norm": 9.627284924411768, "learning_rate": 8.243753149128589e-06, "loss": 1.5778251886367798, "step": 1988 }, { "epoch": 1.0446428571428572, "grad_norm": 9.970954874040544, "learning_rate": 8.24142690588309e-06, "loss": 1.6873408555984497, "step": 1989 }, { "epoch": 1.0451680672268908, "grad_norm": 10.305284208499565, "learning_rate": 8.239099451733555e-06, "loss": 1.4405624866485596, "step": 1990 }, { "epoch": 1.0456932773109244, "grad_norm": 10.200434591389094, "learning_rate": 8.236770787549456e-06, "loss": 1.347198724746704, "step": 1991 }, { "epoch": 1.046218487394958, "grad_norm": 8.571961955733093, "learning_rate": 8.23444091420071e-06, "loss": 0.9504708051681519, "step": 1992 }, { "epoch": 1.0467436974789917, "grad_norm": 10.923911768087208, "learning_rate": 8.232109832557696e-06, "loss": 1.4706783294677734, "step": 1993 }, { "epoch": 1.0472689075630253, "grad_norm": 9.433031076323532, "learning_rate": 8.229777543491238e-06, "loss": 1.5270686149597168, "step": 1994 }, { "epoch": 1.0477941176470589, "grad_norm": 18.548511555963298, "learning_rate": 8.227444047872612e-06, "loss": 1.6383750438690186, "step": 1995 }, { "epoch": 1.0483193277310925, "grad_norm": 9.778095972387245, "learning_rate": 8.225109346573544e-06, "loss": 1.8342719078063965, "step": 1996 }, { "epoch": 1.048844537815126, "grad_norm": 17.06380107436487, "learning_rate": 8.222773440466213e-06, "loss": 1.628516674041748, "step": 1997 }, { "epoch": 1.0493697478991597, "grad_norm": 9.864098374870848, "learning_rate": 8.220436330423243e-06, "loss": 0.9155079126358032, "step": 1998 }, { "epoch": 1.0498949579831933, "grad_norm": 10.115094244055996, "learning_rate": 8.218098017317715e-06, "loss": 2.3998050689697266, "step": 1999 }, { "epoch": 1.050420168067227, "grad_norm": 13.536007676797151, "learning_rate": 8.215758502023157e-06, "loss": 1.2778449058532715, "step": 2000 }, { "epoch": 1.0509453781512605, "grad_norm": 12.233788990512272, "learning_rate": 8.213417785413538e-06, "loss": 1.4833780527114868, "step": 2001 }, { "epoch": 1.0514705882352942, "grad_norm": 13.862242589295523, "learning_rate": 8.21107586836329e-06, "loss": 1.400753140449524, "step": 2002 }, { "epoch": 1.0519957983193278, "grad_norm": 14.330524845983492, "learning_rate": 8.208732751747281e-06, "loss": 2.317253589630127, "step": 2003 }, { "epoch": 1.0525210084033614, "grad_norm": 14.704479467473012, "learning_rate": 8.206388436440833e-06, "loss": 2.202234983444214, "step": 2004 }, { "epoch": 1.053046218487395, "grad_norm": 7.315855181709781, "learning_rate": 8.204042923319717e-06, "loss": 1.7417163848876953, "step": 2005 }, { "epoch": 1.0535714285714286, "grad_norm": 10.912332897356237, "learning_rate": 8.201696213260149e-06, "loss": 1.5026731491088867, "step": 2006 }, { "epoch": 1.0540966386554622, "grad_norm": 55.81202535331172, "learning_rate": 8.19934830713879e-06, "loss": 5.182719707489014, "step": 2007 }, { "epoch": 1.0546218487394958, "grad_norm": 12.774663008107998, "learning_rate": 8.196999205832752e-06, "loss": 1.316359519958496, "step": 2008 }, { "epoch": 1.0551470588235294, "grad_norm": 15.585678671547443, "learning_rate": 8.19464891021959e-06, "loss": 2.2544400691986084, "step": 2009 }, { "epoch": 1.055672268907563, "grad_norm": 9.725154249611599, "learning_rate": 8.19229742117731e-06, "loss": 1.4268792867660522, "step": 2010 }, { "epoch": 1.0561974789915967, "grad_norm": 18.92807162103923, "learning_rate": 8.189944739584361e-06, "loss": 1.7833478450775146, "step": 2011 }, { "epoch": 1.0567226890756303, "grad_norm": 10.257885875688178, "learning_rate": 8.187590866319635e-06, "loss": 1.4846409559249878, "step": 2012 }, { "epoch": 1.0572478991596639, "grad_norm": 11.497196233579782, "learning_rate": 8.18523580226247e-06, "loss": 1.2766668796539307, "step": 2013 }, { "epoch": 1.0577731092436975, "grad_norm": 10.755765096146325, "learning_rate": 8.182879548292655e-06, "loss": 1.36566162109375, "step": 2014 }, { "epoch": 1.058298319327731, "grad_norm": 8.729101870569439, "learning_rate": 8.180522105290414e-06, "loss": 1.9430431127548218, "step": 2015 }, { "epoch": 1.0588235294117647, "grad_norm": 12.708760338325707, "learning_rate": 8.178163474136424e-06, "loss": 1.4622400999069214, "step": 2016 }, { "epoch": 1.0593487394957983, "grad_norm": 7.533207633814089, "learning_rate": 8.1758036557118e-06, "loss": 1.449235200881958, "step": 2017 }, { "epoch": 1.059873949579832, "grad_norm": 11.715678520399887, "learning_rate": 8.173442650898103e-06, "loss": 1.6451451778411865, "step": 2018 }, { "epoch": 1.0603991596638656, "grad_norm": 15.151606191516144, "learning_rate": 8.171080460577337e-06, "loss": 1.9938178062438965, "step": 2019 }, { "epoch": 1.0609243697478992, "grad_norm": 35.448718866794074, "learning_rate": 8.168717085631946e-06, "loss": 1.7531225681304932, "step": 2020 }, { "epoch": 1.0614495798319328, "grad_norm": 55.681063416463836, "learning_rate": 8.166352526944821e-06, "loss": 1.7547500133514404, "step": 2021 }, { "epoch": 1.0619747899159664, "grad_norm": 7.3171440156645575, "learning_rate": 8.163986785399295e-06, "loss": 1.7646377086639404, "step": 2022 }, { "epoch": 1.0625, "grad_norm": 15.786531214305574, "learning_rate": 8.161619861879136e-06, "loss": 2.4931797981262207, "step": 2023 }, { "epoch": 1.0630252100840336, "grad_norm": 22.610717897909964, "learning_rate": 8.159251757268566e-06, "loss": 1.6910812854766846, "step": 2024 }, { "epoch": 1.0635504201680672, "grad_norm": 40.578679605306704, "learning_rate": 8.156882472452232e-06, "loss": 1.2922301292419434, "step": 2025 }, { "epoch": 1.0640756302521008, "grad_norm": 15.988684800481444, "learning_rate": 8.154512008315239e-06, "loss": 1.3810336589813232, "step": 2026 }, { "epoch": 1.0646008403361344, "grad_norm": 10.736692668762364, "learning_rate": 8.15214036574312e-06, "loss": 1.039983868598938, "step": 2027 }, { "epoch": 1.065126050420168, "grad_norm": 12.908950532378157, "learning_rate": 8.149767545621852e-06, "loss": 1.4156992435455322, "step": 2028 }, { "epoch": 1.0656512605042017, "grad_norm": 7.119310520679408, "learning_rate": 8.147393548837856e-06, "loss": 1.9925599098205566, "step": 2029 }, { "epoch": 1.0661764705882353, "grad_norm": 8.347532632013307, "learning_rate": 8.145018376277987e-06, "loss": 1.4140294790267944, "step": 2030 }, { "epoch": 1.066701680672269, "grad_norm": 17.027405986487327, "learning_rate": 8.14264202882954e-06, "loss": 1.6106376647949219, "step": 2031 }, { "epoch": 1.0672268907563025, "grad_norm": 19.674325053254293, "learning_rate": 8.14026450738025e-06, "loss": 1.544736623764038, "step": 2032 }, { "epoch": 1.0677521008403361, "grad_norm": 12.161085828150632, "learning_rate": 8.137885812818296e-06, "loss": 1.5386223793029785, "step": 2033 }, { "epoch": 1.0682773109243697, "grad_norm": 12.707797474192757, "learning_rate": 8.135505946032285e-06, "loss": 1.6691786050796509, "step": 2034 }, { "epoch": 1.0688025210084033, "grad_norm": 12.529033411521157, "learning_rate": 8.133124907911268e-06, "loss": 1.5454782247543335, "step": 2035 }, { "epoch": 1.069327731092437, "grad_norm": 13.596070634937583, "learning_rate": 8.130742699344731e-06, "loss": 1.7887153625488281, "step": 2036 }, { "epoch": 1.0698529411764706, "grad_norm": 18.118361638657046, "learning_rate": 8.128359321222601e-06, "loss": 1.795508623123169, "step": 2037 }, { "epoch": 1.0703781512605042, "grad_norm": 9.370203496553357, "learning_rate": 8.12597477443524e-06, "loss": 1.3259203433990479, "step": 2038 }, { "epoch": 1.0709033613445378, "grad_norm": 11.834875007050861, "learning_rate": 8.123589059873445e-06, "loss": 1.6646111011505127, "step": 2039 }, { "epoch": 1.0714285714285714, "grad_norm": 10.787634958856607, "learning_rate": 8.121202178428449e-06, "loss": 2.476722240447998, "step": 2040 }, { "epoch": 1.071953781512605, "grad_norm": 9.031927869789378, "learning_rate": 8.118814130991925e-06, "loss": 1.5974336862564087, "step": 2041 }, { "epoch": 1.0724789915966386, "grad_norm": 15.376387669285181, "learning_rate": 8.116424918455978e-06, "loss": 1.0614888668060303, "step": 2042 }, { "epoch": 1.0730042016806722, "grad_norm": 11.152342039453462, "learning_rate": 8.114034541713152e-06, "loss": 1.2790303230285645, "step": 2043 }, { "epoch": 1.0735294117647058, "grad_norm": 14.691697775097392, "learning_rate": 8.111643001656417e-06, "loss": 1.645697832107544, "step": 2044 }, { "epoch": 1.0740546218487395, "grad_norm": 9.121862327883473, "learning_rate": 8.109250299179188e-06, "loss": 1.6840964555740356, "step": 2045 }, { "epoch": 1.074579831932773, "grad_norm": 10.47633908025849, "learning_rate": 8.10685643517531e-06, "loss": 1.9050471782684326, "step": 2046 }, { "epoch": 1.0751050420168067, "grad_norm": 9.80912174008259, "learning_rate": 8.10446141053906e-06, "loss": 2.5355887413024902, "step": 2047 }, { "epoch": 1.0756302521008403, "grad_norm": 13.492053149298671, "learning_rate": 8.10206522616515e-06, "loss": 1.5174318552017212, "step": 2048 }, { "epoch": 1.076155462184874, "grad_norm": 9.565188460047532, "learning_rate": 8.09966788294873e-06, "loss": 1.2498542070388794, "step": 2049 }, { "epoch": 1.0766806722689075, "grad_norm": 15.641539869504182, "learning_rate": 8.097269381785373e-06, "loss": 2.404829978942871, "step": 2050 }, { "epoch": 1.0772058823529411, "grad_norm": 7.470558374854489, "learning_rate": 8.094869723571093e-06, "loss": 1.439483880996704, "step": 2051 }, { "epoch": 1.0777310924369747, "grad_norm": 9.086649858544119, "learning_rate": 8.092468909202335e-06, "loss": 1.3623019456863403, "step": 2052 }, { "epoch": 1.0782563025210083, "grad_norm": 12.676821930614663, "learning_rate": 8.090066939575972e-06, "loss": 1.33333158493042, "step": 2053 }, { "epoch": 1.078781512605042, "grad_norm": 12.452525963389826, "learning_rate": 8.08766381558931e-06, "loss": 1.772153377532959, "step": 2054 }, { "epoch": 1.0793067226890756, "grad_norm": 9.747770629837545, "learning_rate": 8.08525953814009e-06, "loss": 1.0967566967010498, "step": 2055 }, { "epoch": 1.0798319327731092, "grad_norm": 7.561380657211976, "learning_rate": 8.08285410812648e-06, "loss": 1.398902177810669, "step": 2056 }, { "epoch": 1.0803571428571428, "grad_norm": 12.324074414733897, "learning_rate": 8.080447526447079e-06, "loss": 1.1446821689605713, "step": 2057 }, { "epoch": 1.0808823529411764, "grad_norm": 11.217338342744727, "learning_rate": 8.078039794000915e-06, "loss": 1.4770509004592896, "step": 2058 }, { "epoch": 1.08140756302521, "grad_norm": 13.735683293515555, "learning_rate": 8.075630911687451e-06, "loss": 2.7962405681610107, "step": 2059 }, { "epoch": 1.0819327731092436, "grad_norm": 12.48060899717405, "learning_rate": 8.073220880406576e-06, "loss": 1.7615755796432495, "step": 2060 }, { "epoch": 1.0824579831932772, "grad_norm": 11.783077342575183, "learning_rate": 8.070809701058606e-06, "loss": 0.9123398661613464, "step": 2061 }, { "epoch": 1.0829831932773109, "grad_norm": 11.414839447073165, "learning_rate": 8.068397374544292e-06, "loss": 2.6187493801116943, "step": 2062 }, { "epoch": 1.0835084033613445, "grad_norm": 8.039078127914273, "learning_rate": 8.065983901764807e-06, "loss": 1.6874656677246094, "step": 2063 }, { "epoch": 1.084033613445378, "grad_norm": 9.701868706948737, "learning_rate": 8.063569283621754e-06, "loss": 2.1172051429748535, "step": 2064 }, { "epoch": 1.0845588235294117, "grad_norm": 11.175470061764607, "learning_rate": 8.061153521017169e-06, "loss": 1.5343923568725586, "step": 2065 }, { "epoch": 1.0850840336134453, "grad_norm": 13.997563887917403, "learning_rate": 8.05873661485351e-06, "loss": 1.7000303268432617, "step": 2066 }, { "epoch": 1.085609243697479, "grad_norm": 28.031097332273816, "learning_rate": 8.056318566033664e-06, "loss": 1.5794883966445923, "step": 2067 }, { "epoch": 1.0861344537815125, "grad_norm": 12.183818861348108, "learning_rate": 8.05389937546094e-06, "loss": 1.4774911403656006, "step": 2068 }, { "epoch": 1.0866596638655461, "grad_norm": 10.483688768681391, "learning_rate": 8.051479044039086e-06, "loss": 1.605708122253418, "step": 2069 }, { "epoch": 1.0871848739495797, "grad_norm": 27.820678027151825, "learning_rate": 8.049057572672263e-06, "loss": 1.5847560167312622, "step": 2070 }, { "epoch": 1.0877100840336134, "grad_norm": 12.4251757392418, "learning_rate": 8.046634962265064e-06, "loss": 1.5098514556884766, "step": 2071 }, { "epoch": 1.088235294117647, "grad_norm": 36.490382393341974, "learning_rate": 8.044211213722508e-06, "loss": 2.929486036300659, "step": 2072 }, { "epoch": 1.0887605042016806, "grad_norm": 20.939520114666106, "learning_rate": 8.041786327950037e-06, "loss": 1.748311161994934, "step": 2073 }, { "epoch": 1.0892857142857142, "grad_norm": 15.906718423276466, "learning_rate": 8.039360305853518e-06, "loss": 1.479958415031433, "step": 2074 }, { "epoch": 1.0898109243697478, "grad_norm": 11.035678490765639, "learning_rate": 8.036933148339246e-06, "loss": 1.3642189502716064, "step": 2075 }, { "epoch": 1.0903361344537814, "grad_norm": 12.328561544618518, "learning_rate": 8.034504856313935e-06, "loss": 1.1948144435882568, "step": 2076 }, { "epoch": 1.090861344537815, "grad_norm": 9.035793402326892, "learning_rate": 8.032075430684724e-06, "loss": 1.5175752639770508, "step": 2077 }, { "epoch": 1.0913865546218486, "grad_norm": 11.951260351842103, "learning_rate": 8.029644872359182e-06, "loss": 1.1992278099060059, "step": 2078 }, { "epoch": 1.0919117647058822, "grad_norm": 12.288571411765174, "learning_rate": 8.027213182245289e-06, "loss": 1.6592459678649902, "step": 2079 }, { "epoch": 1.092436974789916, "grad_norm": 8.202280335864222, "learning_rate": 8.024780361251458e-06, "loss": 1.628820776939392, "step": 2080 }, { "epoch": 1.0929621848739495, "grad_norm": 10.919147602699114, "learning_rate": 8.02234641028652e-06, "loss": 1.2624070644378662, "step": 2081 }, { "epoch": 1.0934873949579833, "grad_norm": 22.946780337331695, "learning_rate": 8.019911330259733e-06, "loss": 2.224916458129883, "step": 2082 }, { "epoch": 1.0940126050420167, "grad_norm": 9.588830757244613, "learning_rate": 8.017475122080767e-06, "loss": 1.2903892993927002, "step": 2083 }, { "epoch": 1.0945378151260505, "grad_norm": 12.260829010012472, "learning_rate": 8.015037786659725e-06, "loss": 2.123779058456421, "step": 2084 }, { "epoch": 1.095063025210084, "grad_norm": 13.832226936795907, "learning_rate": 8.012599324907121e-06, "loss": 1.6024169921875, "step": 2085 }, { "epoch": 1.0955882352941178, "grad_norm": 7.579076367169775, "learning_rate": 8.010159737733897e-06, "loss": 1.3724197149276733, "step": 2086 }, { "epoch": 1.0961134453781514, "grad_norm": 7.8693581818591625, "learning_rate": 8.007719026051413e-06, "loss": 1.2978019714355469, "step": 2087 }, { "epoch": 1.096638655462185, "grad_norm": 10.111588038676686, "learning_rate": 8.005277190771447e-06, "loss": 0.7179710865020752, "step": 2088 }, { "epoch": 1.0971638655462186, "grad_norm": 15.55957612031918, "learning_rate": 8.0028342328062e-06, "loss": 1.2068419456481934, "step": 2089 }, { "epoch": 1.0976890756302522, "grad_norm": 9.113015245034111, "learning_rate": 8.00039015306829e-06, "loss": 1.2838717699050903, "step": 2090 }, { "epoch": 1.0982142857142858, "grad_norm": 12.952906569925261, "learning_rate": 7.997944952470755e-06, "loss": 2.1330454349517822, "step": 2091 }, { "epoch": 1.0987394957983194, "grad_norm": 13.445457154747649, "learning_rate": 7.995498631927053e-06, "loss": 2.0122201442718506, "step": 2092 }, { "epoch": 1.099264705882353, "grad_norm": 11.051591329612487, "learning_rate": 7.993051192351056e-06, "loss": 2.080228090286255, "step": 2093 }, { "epoch": 1.0997899159663866, "grad_norm": 10.63558033130149, "learning_rate": 7.990602634657062e-06, "loss": 1.3373291492462158, "step": 2094 }, { "epoch": 1.1003151260504203, "grad_norm": 16.169468011674773, "learning_rate": 7.988152959759778e-06, "loss": 1.3824642896652222, "step": 2095 }, { "epoch": 1.1008403361344539, "grad_norm": 11.460722090488222, "learning_rate": 7.985702168574335e-06, "loss": 1.7380826473236084, "step": 2096 }, { "epoch": 1.1013655462184875, "grad_norm": 12.843943771247455, "learning_rate": 7.983250262016276e-06, "loss": 1.554076910018921, "step": 2097 }, { "epoch": 1.101890756302521, "grad_norm": 11.044942328970958, "learning_rate": 7.980797241001563e-06, "loss": 1.7797731161117554, "step": 2098 }, { "epoch": 1.1024159663865547, "grad_norm": 11.24677358803186, "learning_rate": 7.978343106446575e-06, "loss": 2.0195977687835693, "step": 2099 }, { "epoch": 1.1029411764705883, "grad_norm": 8.62059118420625, "learning_rate": 7.975887859268105e-06, "loss": 1.4654607772827148, "step": 2100 }, { "epoch": 1.103466386554622, "grad_norm": 8.437366913653893, "learning_rate": 7.973431500383366e-06, "loss": 2.535557508468628, "step": 2101 }, { "epoch": 1.1039915966386555, "grad_norm": 11.01283641772209, "learning_rate": 7.970974030709982e-06, "loss": 1.2387416362762451, "step": 2102 }, { "epoch": 1.1045168067226891, "grad_norm": 10.255202846207247, "learning_rate": 7.96851545116599e-06, "loss": 1.650892734527588, "step": 2103 }, { "epoch": 1.1050420168067228, "grad_norm": 13.853695395247032, "learning_rate": 7.966055762669846e-06, "loss": 2.867412567138672, "step": 2104 }, { "epoch": 1.1055672268907564, "grad_norm": 15.835028427878946, "learning_rate": 7.963594966140423e-06, "loss": 1.7530300617218018, "step": 2105 }, { "epoch": 1.10609243697479, "grad_norm": 8.282839586685641, "learning_rate": 7.961133062496999e-06, "loss": 2.3559670448303223, "step": 2106 }, { "epoch": 1.1066176470588236, "grad_norm": 9.570546417949428, "learning_rate": 7.958670052659274e-06, "loss": 1.6627740859985352, "step": 2107 }, { "epoch": 1.1071428571428572, "grad_norm": 7.740856763553868, "learning_rate": 7.956205937547354e-06, "loss": 1.9173784255981445, "step": 2108 }, { "epoch": 1.1076680672268908, "grad_norm": 9.773895398357354, "learning_rate": 7.953740718081765e-06, "loss": 2.2621545791625977, "step": 2109 }, { "epoch": 1.1081932773109244, "grad_norm": 9.583472556089905, "learning_rate": 7.951274395183442e-06, "loss": 1.1864567995071411, "step": 2110 }, { "epoch": 1.108718487394958, "grad_norm": 15.68589090768525, "learning_rate": 7.948806969773731e-06, "loss": 1.8923931121826172, "step": 2111 }, { "epoch": 1.1092436974789917, "grad_norm": 12.954378909740273, "learning_rate": 7.94633844277439e-06, "loss": 1.652284860610962, "step": 2112 }, { "epoch": 1.1097689075630253, "grad_norm": 7.644328375634613, "learning_rate": 7.943868815107594e-06, "loss": 1.6230323314666748, "step": 2113 }, { "epoch": 1.1102941176470589, "grad_norm": 11.226280315272911, "learning_rate": 7.941398087695923e-06, "loss": 2.466500997543335, "step": 2114 }, { "epoch": 1.1108193277310925, "grad_norm": 7.585851375413204, "learning_rate": 7.938926261462366e-06, "loss": 1.4757699966430664, "step": 2115 }, { "epoch": 1.111344537815126, "grad_norm": 7.776935588322972, "learning_rate": 7.936453337330332e-06, "loss": 1.6728451251983643, "step": 2116 }, { "epoch": 1.1118697478991597, "grad_norm": 21.108057869364007, "learning_rate": 7.933979316223632e-06, "loss": 0.9731060266494751, "step": 2117 }, { "epoch": 1.1123949579831933, "grad_norm": 9.702414396064892, "learning_rate": 7.931504199066491e-06, "loss": 1.139401912689209, "step": 2118 }, { "epoch": 1.112920168067227, "grad_norm": 10.092397912276617, "learning_rate": 7.929027986783538e-06, "loss": 1.5177682638168335, "step": 2119 }, { "epoch": 1.1134453781512605, "grad_norm": 9.11765591697188, "learning_rate": 7.926550680299819e-06, "loss": 1.5064119100570679, "step": 2120 }, { "epoch": 1.1139705882352942, "grad_norm": 13.95785702803511, "learning_rate": 7.92407228054078e-06, "loss": 1.2225053310394287, "step": 2121 }, { "epoch": 1.1144957983193278, "grad_norm": 11.100026573328575, "learning_rate": 7.921592788432286e-06, "loss": 1.8686716556549072, "step": 2122 }, { "epoch": 1.1150210084033614, "grad_norm": 10.567833444457785, "learning_rate": 7.919112204900597e-06, "loss": 1.3062869310379028, "step": 2123 }, { "epoch": 1.115546218487395, "grad_norm": 16.474738009080117, "learning_rate": 7.916630530872394e-06, "loss": 1.540877342224121, "step": 2124 }, { "epoch": 1.1160714285714286, "grad_norm": 14.35074258298947, "learning_rate": 7.914147767274756e-06, "loss": 1.9422478675842285, "step": 2125 }, { "epoch": 1.1165966386554622, "grad_norm": 14.403776564618257, "learning_rate": 7.911663915035173e-06, "loss": 1.6253186464309692, "step": 2126 }, { "epoch": 1.1171218487394958, "grad_norm": 7.615524001765494, "learning_rate": 7.90917897508154e-06, "loss": 1.4136857986450195, "step": 2127 }, { "epoch": 1.1176470588235294, "grad_norm": 7.8717356840017265, "learning_rate": 7.90669294834216e-06, "loss": 1.2877161502838135, "step": 2128 }, { "epoch": 1.118172268907563, "grad_norm": 9.996219379659037, "learning_rate": 7.904205835745744e-06, "loss": 1.1908361911773682, "step": 2129 }, { "epoch": 1.1186974789915967, "grad_norm": 10.477496498628517, "learning_rate": 7.9017176382214e-06, "loss": 1.6736955642700195, "step": 2130 }, { "epoch": 1.1192226890756303, "grad_norm": 11.47395084321555, "learning_rate": 7.899228356698651e-06, "loss": 1.5920789241790771, "step": 2131 }, { "epoch": 1.1197478991596639, "grad_norm": 13.865603087171925, "learning_rate": 7.896737992107419e-06, "loss": 3.513488292694092, "step": 2132 }, { "epoch": 1.1202731092436975, "grad_norm": 11.555605178973293, "learning_rate": 7.894246545378037e-06, "loss": 1.7649219036102295, "step": 2133 }, { "epoch": 1.120798319327731, "grad_norm": 10.22602902533883, "learning_rate": 7.891754017441234e-06, "loss": 1.7826954126358032, "step": 2134 }, { "epoch": 1.1213235294117647, "grad_norm": 9.059720444354182, "learning_rate": 7.889260409228146e-06, "loss": 1.7290338277816772, "step": 2135 }, { "epoch": 1.1218487394957983, "grad_norm": 15.914026506393606, "learning_rate": 7.886765721670316e-06, "loss": 1.7057852745056152, "step": 2136 }, { "epoch": 1.122373949579832, "grad_norm": 11.616104242752517, "learning_rate": 7.884269955699689e-06, "loss": 1.070406198501587, "step": 2137 }, { "epoch": 1.1228991596638656, "grad_norm": 16.155071107074644, "learning_rate": 7.881773112248607e-06, "loss": 1.948705792427063, "step": 2138 }, { "epoch": 1.1234243697478992, "grad_norm": 12.81143105369926, "learning_rate": 7.879275192249822e-06, "loss": 1.8320515155792236, "step": 2139 }, { "epoch": 1.1239495798319328, "grad_norm": 13.145916077419596, "learning_rate": 7.876776196636486e-06, "loss": 1.7804217338562012, "step": 2140 }, { "epoch": 1.1244747899159664, "grad_norm": 8.863979098179895, "learning_rate": 7.874276126342151e-06, "loss": 1.4772311449050903, "step": 2141 }, { "epoch": 1.125, "grad_norm": 9.3697796448481, "learning_rate": 7.87177498230077e-06, "loss": 1.59299635887146, "step": 2142 }, { "epoch": 1.1255252100840336, "grad_norm": 10.06456063773516, "learning_rate": 7.869272765446701e-06, "loss": 1.3145837783813477, "step": 2143 }, { "epoch": 1.1260504201680672, "grad_norm": 15.160868267450002, "learning_rate": 7.866769476714697e-06, "loss": 1.5451463460922241, "step": 2144 }, { "epoch": 1.1265756302521008, "grad_norm": 10.424644826790757, "learning_rate": 7.86426511703992e-06, "loss": 1.3202342987060547, "step": 2145 }, { "epoch": 1.1271008403361344, "grad_norm": 12.435631858650448, "learning_rate": 7.861759687357922e-06, "loss": 1.6770918369293213, "step": 2146 }, { "epoch": 1.127626050420168, "grad_norm": 9.180228584474863, "learning_rate": 7.859253188604662e-06, "loss": 1.8522690534591675, "step": 2147 }, { "epoch": 1.1281512605042017, "grad_norm": 14.472194230527451, "learning_rate": 7.856745621716495e-06, "loss": 1.361525297164917, "step": 2148 }, { "epoch": 1.1286764705882353, "grad_norm": 16.700491541017072, "learning_rate": 7.854236987630178e-06, "loss": 1.7216304540634155, "step": 2149 }, { "epoch": 1.129201680672269, "grad_norm": 40.94757163724544, "learning_rate": 7.851727287282863e-06, "loss": 2.517025947570801, "step": 2150 }, { "epoch": 1.1297268907563025, "grad_norm": 13.502250829995372, "learning_rate": 7.8492165216121e-06, "loss": 1.6959048509597778, "step": 2151 }, { "epoch": 1.1302521008403361, "grad_norm": 8.181785474013854, "learning_rate": 7.846704691555843e-06, "loss": 1.4638583660125732, "step": 2152 }, { "epoch": 1.1307773109243697, "grad_norm": 13.888894644745077, "learning_rate": 7.844191798052438e-06, "loss": 1.392757534980774, "step": 2153 }, { "epoch": 1.1313025210084033, "grad_norm": 10.720030748817441, "learning_rate": 7.841677842040628e-06, "loss": 1.1821082830429077, "step": 2154 }, { "epoch": 1.131827731092437, "grad_norm": 15.365251726266722, "learning_rate": 7.839162824459559e-06, "loss": 1.3670696020126343, "step": 2155 }, { "epoch": 1.1323529411764706, "grad_norm": 19.855347202280466, "learning_rate": 7.836646746248764e-06, "loss": 2.9130239486694336, "step": 2156 }, { "epoch": 1.1328781512605042, "grad_norm": 14.311094415567702, "learning_rate": 7.834129608348183e-06, "loss": 2.0681352615356445, "step": 2157 }, { "epoch": 1.1334033613445378, "grad_norm": 11.205340555282687, "learning_rate": 7.831611411698141e-06, "loss": 2.2508697509765625, "step": 2158 }, { "epoch": 1.1339285714285714, "grad_norm": 7.939795916520177, "learning_rate": 7.829092157239369e-06, "loss": 1.585735559463501, "step": 2159 }, { "epoch": 1.134453781512605, "grad_norm": 22.578910723135277, "learning_rate": 7.826571845912985e-06, "loss": 1.9568084478378296, "step": 2160 }, { "epoch": 1.1349789915966386, "grad_norm": 12.494036014927381, "learning_rate": 7.824050478660506e-06, "loss": 2.1708197593688965, "step": 2161 }, { "epoch": 1.1355042016806722, "grad_norm": 9.881004788162711, "learning_rate": 7.821528056423842e-06, "loss": 1.2551255226135254, "step": 2162 }, { "epoch": 1.1360294117647058, "grad_norm": 10.450550682617532, "learning_rate": 7.819004580145298e-06, "loss": 1.5346543788909912, "step": 2163 }, { "epoch": 1.1365546218487395, "grad_norm": 15.25330423969048, "learning_rate": 7.816480050767573e-06, "loss": 1.47426438331604, "step": 2164 }, { "epoch": 1.137079831932773, "grad_norm": 10.78464785219572, "learning_rate": 7.813954469233758e-06, "loss": 1.9535887241363525, "step": 2165 }, { "epoch": 1.1376050420168067, "grad_norm": 8.468384442509409, "learning_rate": 7.811427836487336e-06, "loss": 1.2346587181091309, "step": 2166 }, { "epoch": 1.1381302521008403, "grad_norm": 13.7764814417855, "learning_rate": 7.808900153472188e-06, "loss": 0.9352411031723022, "step": 2167 }, { "epoch": 1.138655462184874, "grad_norm": 9.760876736141405, "learning_rate": 7.80637142113258e-06, "loss": 1.6460061073303223, "step": 2168 }, { "epoch": 1.1391806722689075, "grad_norm": 13.153259347584816, "learning_rate": 7.803841640413177e-06, "loss": 0.9652544856071472, "step": 2169 }, { "epoch": 1.1397058823529411, "grad_norm": 14.051395771547853, "learning_rate": 7.801310812259031e-06, "loss": 1.7024542093276978, "step": 2170 }, { "epoch": 1.1402310924369747, "grad_norm": 12.165075438526586, "learning_rate": 7.798778937615586e-06, "loss": 1.4805266857147217, "step": 2171 }, { "epoch": 1.1407563025210083, "grad_norm": 9.555541474279284, "learning_rate": 7.79624601742868e-06, "loss": 1.645806074142456, "step": 2172 }, { "epoch": 1.141281512605042, "grad_norm": 16.181434082927396, "learning_rate": 7.793712052644535e-06, "loss": 1.834975004196167, "step": 2173 }, { "epoch": 1.1418067226890756, "grad_norm": 13.115189814615517, "learning_rate": 7.791177044209773e-06, "loss": 1.0914037227630615, "step": 2174 }, { "epoch": 1.1423319327731092, "grad_norm": 8.68477919041166, "learning_rate": 7.788640993071397e-06, "loss": 1.2928192615509033, "step": 2175 }, { "epoch": 1.1428571428571428, "grad_norm": 9.277072928900862, "learning_rate": 7.786103900176804e-06, "loss": 1.5239992141723633, "step": 2176 }, { "epoch": 1.1433823529411764, "grad_norm": 7.165041431498236, "learning_rate": 7.783565766473777e-06, "loss": 1.7771321535110474, "step": 2177 }, { "epoch": 1.14390756302521, "grad_norm": 10.426663279881508, "learning_rate": 7.781026592910493e-06, "loss": 2.5389771461486816, "step": 2178 }, { "epoch": 1.1444327731092436, "grad_norm": 11.924272441885787, "learning_rate": 7.778486380435512e-06, "loss": 1.434378743171692, "step": 2179 }, { "epoch": 1.1449579831932772, "grad_norm": 7.598280634840082, "learning_rate": 7.775945129997788e-06, "loss": 1.3532328605651855, "step": 2180 }, { "epoch": 1.1454831932773109, "grad_norm": 12.587811480765048, "learning_rate": 7.773402842546654e-06, "loss": 1.4346599578857422, "step": 2181 }, { "epoch": 1.1460084033613445, "grad_norm": 9.462944816682953, "learning_rate": 7.770859519031839e-06, "loss": 1.7618328332901, "step": 2182 }, { "epoch": 1.146533613445378, "grad_norm": 11.06245957229266, "learning_rate": 7.768315160403453e-06, "loss": 1.3378257751464844, "step": 2183 }, { "epoch": 1.1470588235294117, "grad_norm": 10.404480736254966, "learning_rate": 7.765769767611999e-06, "loss": 1.3406996726989746, "step": 2184 }, { "epoch": 1.1475840336134453, "grad_norm": 12.673317864361517, "learning_rate": 7.76322334160836e-06, "loss": 1.298325538635254, "step": 2185 }, { "epoch": 1.148109243697479, "grad_norm": 12.277135317633379, "learning_rate": 7.76067588334381e-06, "loss": 1.5648902654647827, "step": 2186 }, { "epoch": 1.1486344537815125, "grad_norm": 13.71327764667219, "learning_rate": 7.758127393770003e-06, "loss": 1.9965567588806152, "step": 2187 }, { "epoch": 1.1491596638655461, "grad_norm": 10.60434114484367, "learning_rate": 7.755577873838985e-06, "loss": 1.3782873153686523, "step": 2188 }, { "epoch": 1.1496848739495797, "grad_norm": 18.137455973454465, "learning_rate": 7.75302732450318e-06, "loss": 1.3186569213867188, "step": 2189 }, { "epoch": 1.1502100840336134, "grad_norm": 18.197280725243864, "learning_rate": 7.750475746715403e-06, "loss": 2.3285152912139893, "step": 2190 }, { "epoch": 1.150735294117647, "grad_norm": 10.493131901404077, "learning_rate": 7.747923141428848e-06, "loss": 2.311206340789795, "step": 2191 }, { "epoch": 1.1512605042016806, "grad_norm": 9.222473958897405, "learning_rate": 7.745369509597095e-06, "loss": 1.473219633102417, "step": 2192 }, { "epoch": 1.1517857142857142, "grad_norm": 16.959872297960754, "learning_rate": 7.742814852174112e-06, "loss": 2.3074145317077637, "step": 2193 }, { "epoch": 1.1523109243697478, "grad_norm": 8.303568265808774, "learning_rate": 7.740259170114239e-06, "loss": 2.263456106185913, "step": 2194 }, { "epoch": 1.1528361344537816, "grad_norm": 10.857625163027615, "learning_rate": 7.73770246437221e-06, "loss": 0.9129188060760498, "step": 2195 }, { "epoch": 1.153361344537815, "grad_norm": 8.814366667068162, "learning_rate": 7.735144735903136e-06, "loss": 1.7062675952911377, "step": 2196 }, { "epoch": 1.1538865546218489, "grad_norm": 19.082552999553837, "learning_rate": 7.73258598566251e-06, "loss": 2.3475735187530518, "step": 2197 }, { "epoch": 1.1544117647058822, "grad_norm": 9.13030709719154, "learning_rate": 7.730026214606207e-06, "loss": 1.606765866279602, "step": 2198 }, { "epoch": 1.154936974789916, "grad_norm": 9.45085825425194, "learning_rate": 7.727465423690487e-06, "loss": 1.6220197677612305, "step": 2199 }, { "epoch": 1.1554621848739495, "grad_norm": 7.260769089318586, "learning_rate": 7.724903613871986e-06, "loss": 1.2804646492004395, "step": 2200 }, { "epoch": 1.1559873949579833, "grad_norm": 16.98348371327778, "learning_rate": 7.72234078610772e-06, "loss": 1.0989782810211182, "step": 2201 }, { "epoch": 1.1565126050420167, "grad_norm": 13.348591794056514, "learning_rate": 7.719776941355093e-06, "loss": 1.6219122409820557, "step": 2202 }, { "epoch": 1.1570378151260505, "grad_norm": 14.205782907159508, "learning_rate": 7.71721208057188e-06, "loss": 1.6735048294067383, "step": 2203 }, { "epoch": 1.157563025210084, "grad_norm": 8.44877838656389, "learning_rate": 7.714646204716244e-06, "loss": 1.04374361038208, "step": 2204 }, { "epoch": 1.1580882352941178, "grad_norm": 12.324779542535993, "learning_rate": 7.712079314746716e-06, "loss": 1.0110960006713867, "step": 2205 }, { "epoch": 1.1586134453781511, "grad_norm": 7.798643108103532, "learning_rate": 7.709511411622216e-06, "loss": 1.518144130706787, "step": 2206 }, { "epoch": 1.159138655462185, "grad_norm": 9.864624868476044, "learning_rate": 7.706942496302039e-06, "loss": 1.8752518892288208, "step": 2207 }, { "epoch": 1.1596638655462184, "grad_norm": 8.867600811607666, "learning_rate": 7.704372569745857e-06, "loss": 1.336355209350586, "step": 2208 }, { "epoch": 1.1601890756302522, "grad_norm": 13.112751487090154, "learning_rate": 7.701801632913722e-06, "loss": 2.052809953689575, "step": 2209 }, { "epoch": 1.1607142857142858, "grad_norm": 11.693376323968234, "learning_rate": 7.69922968676606e-06, "loss": 1.4469162225723267, "step": 2210 }, { "epoch": 1.1612394957983194, "grad_norm": 10.919647757046446, "learning_rate": 7.69665673226368e-06, "loss": 1.6109741926193237, "step": 2211 }, { "epoch": 1.161764705882353, "grad_norm": 10.553175507569769, "learning_rate": 7.69408277036776e-06, "loss": 1.6837149858474731, "step": 2212 }, { "epoch": 1.1622899159663866, "grad_norm": 20.045312743210857, "learning_rate": 7.691507802039861e-06, "loss": 1.4205944538116455, "step": 2213 }, { "epoch": 1.1628151260504203, "grad_norm": 11.196244236622643, "learning_rate": 7.688931828241916e-06, "loss": 1.208276629447937, "step": 2214 }, { "epoch": 1.1633403361344539, "grad_norm": 11.563170174544473, "learning_rate": 7.686354849936235e-06, "loss": 1.4215166568756104, "step": 2215 }, { "epoch": 1.1638655462184875, "grad_norm": 11.349779155393156, "learning_rate": 7.683776868085502e-06, "loss": 0.8596599102020264, "step": 2216 }, { "epoch": 1.164390756302521, "grad_norm": 10.356674416924188, "learning_rate": 7.68119788365278e-06, "loss": 1.4210429191589355, "step": 2217 }, { "epoch": 1.1649159663865547, "grad_norm": 10.493863437026867, "learning_rate": 7.678617897601501e-06, "loss": 1.152337908744812, "step": 2218 }, { "epoch": 1.1654411764705883, "grad_norm": 8.61595920869213, "learning_rate": 7.676036910895475e-06, "loss": 1.4171079397201538, "step": 2219 }, { "epoch": 1.165966386554622, "grad_norm": 11.934352631186487, "learning_rate": 7.673454924498882e-06, "loss": 1.6628780364990234, "step": 2220 }, { "epoch": 1.1664915966386555, "grad_norm": 11.17731569518165, "learning_rate": 7.670871939376281e-06, "loss": 0.9881746768951416, "step": 2221 }, { "epoch": 1.1670168067226891, "grad_norm": 10.279571354553513, "learning_rate": 7.668287956492601e-06, "loss": 1.43239426612854, "step": 2222 }, { "epoch": 1.1675420168067228, "grad_norm": 9.813795062381661, "learning_rate": 7.665702976813142e-06, "loss": 1.773823857307434, "step": 2223 }, { "epoch": 1.1680672268907564, "grad_norm": 15.557281978419983, "learning_rate": 7.663117001303581e-06, "loss": 1.0854368209838867, "step": 2224 }, { "epoch": 1.16859243697479, "grad_norm": 16.850206546046667, "learning_rate": 7.660530030929961e-06, "loss": 1.9169096946716309, "step": 2225 }, { "epoch": 1.1691176470588236, "grad_norm": 8.202714341848928, "learning_rate": 7.657942066658701e-06, "loss": 1.2993048429489136, "step": 2226 }, { "epoch": 1.1696428571428572, "grad_norm": 10.278018449182921, "learning_rate": 7.65535310945659e-06, "loss": 1.7205994129180908, "step": 2227 }, { "epoch": 1.1701680672268908, "grad_norm": 14.789210296516265, "learning_rate": 7.65276316029079e-06, "loss": 1.8085758686065674, "step": 2228 }, { "epoch": 1.1706932773109244, "grad_norm": 16.328586878063675, "learning_rate": 7.650172220128828e-06, "loss": 1.6963579654693604, "step": 2229 }, { "epoch": 1.171218487394958, "grad_norm": 16.158694679158433, "learning_rate": 7.647580289938607e-06, "loss": 1.520153284072876, "step": 2230 }, { "epoch": 1.1717436974789917, "grad_norm": 14.191398364207553, "learning_rate": 7.644987370688399e-06, "loss": 1.472973108291626, "step": 2231 }, { "epoch": 1.1722689075630253, "grad_norm": 6.213348496719508, "learning_rate": 7.642393463346843e-06, "loss": 1.6185429096221924, "step": 2232 }, { "epoch": 1.1727941176470589, "grad_norm": 8.854404306709156, "learning_rate": 7.639798568882947e-06, "loss": 1.7556939125061035, "step": 2233 }, { "epoch": 1.1733193277310925, "grad_norm": 9.953899976846554, "learning_rate": 7.63720268826609e-06, "loss": 1.6055560111999512, "step": 2234 }, { "epoch": 1.173844537815126, "grad_norm": 10.277069630384608, "learning_rate": 7.634605822466022e-06, "loss": 1.6435586214065552, "step": 2235 }, { "epoch": 1.1743697478991597, "grad_norm": 8.86394125184648, "learning_rate": 7.632007972452851e-06, "loss": 1.2334827184677124, "step": 2236 }, { "epoch": 1.1748949579831933, "grad_norm": 7.773956370147138, "learning_rate": 7.629409139197063e-06, "loss": 1.8761959075927734, "step": 2237 }, { "epoch": 1.175420168067227, "grad_norm": 10.846228404668672, "learning_rate": 7.626809323669506e-06, "loss": 1.469628095626831, "step": 2238 }, { "epoch": 1.1759453781512605, "grad_norm": 13.28251458961807, "learning_rate": 7.624208526841399e-06, "loss": 1.7837494611740112, "step": 2239 }, { "epoch": 1.1764705882352942, "grad_norm": 7.602935698634584, "learning_rate": 7.621606749684323e-06, "loss": 0.9383059740066528, "step": 2240 }, { "epoch": 1.1769957983193278, "grad_norm": 8.484095831447153, "learning_rate": 7.619003993170226e-06, "loss": 1.2930907011032104, "step": 2241 }, { "epoch": 1.1775210084033614, "grad_norm": 16.7328276650519, "learning_rate": 7.616400258271426e-06, "loss": 1.5919756889343262, "step": 2242 }, { "epoch": 1.178046218487395, "grad_norm": 11.454158982382433, "learning_rate": 7.613795545960602e-06, "loss": 1.1217150688171387, "step": 2243 }, { "epoch": 1.1785714285714286, "grad_norm": 18.35984108956994, "learning_rate": 7.611189857210801e-06, "loss": 1.9384922981262207, "step": 2244 }, { "epoch": 1.1790966386554622, "grad_norm": 11.217826540787774, "learning_rate": 7.608583192995433e-06, "loss": 1.3692917823791504, "step": 2245 }, { "epoch": 1.1796218487394958, "grad_norm": 9.570086657338253, "learning_rate": 7.605975554288272e-06, "loss": 1.1451338529586792, "step": 2246 }, { "epoch": 1.1801470588235294, "grad_norm": 11.920033506330194, "learning_rate": 7.603366942063457e-06, "loss": 1.7379136085510254, "step": 2247 }, { "epoch": 1.180672268907563, "grad_norm": 9.928107310522863, "learning_rate": 7.60075735729549e-06, "loss": 1.9680445194244385, "step": 2248 }, { "epoch": 1.1811974789915967, "grad_norm": 10.095289179159145, "learning_rate": 7.598146800959238e-06, "loss": 1.7297587394714355, "step": 2249 }, { "epoch": 1.1817226890756303, "grad_norm": 17.006755815106438, "learning_rate": 7.595535274029933e-06, "loss": 1.7141847610473633, "step": 2250 }, { "epoch": 1.1822478991596639, "grad_norm": 15.035510539089135, "learning_rate": 7.592922777483162e-06, "loss": 1.4125642776489258, "step": 2251 }, { "epoch": 1.1827731092436975, "grad_norm": 9.331982226354048, "learning_rate": 7.590309312294879e-06, "loss": 1.4077017307281494, "step": 2252 }, { "epoch": 1.183298319327731, "grad_norm": 11.39061833765179, "learning_rate": 7.5876948794414015e-06, "loss": 1.595273494720459, "step": 2253 }, { "epoch": 1.1838235294117647, "grad_norm": 16.170756474309968, "learning_rate": 7.585079479899407e-06, "loss": 1.803749918937683, "step": 2254 }, { "epoch": 1.1843487394957983, "grad_norm": 11.416218779228032, "learning_rate": 7.58246311464593e-06, "loss": 1.7962976694107056, "step": 2255 }, { "epoch": 1.184873949579832, "grad_norm": 11.930061478371591, "learning_rate": 7.579845784658373e-06, "loss": 1.7797696590423584, "step": 2256 }, { "epoch": 1.1853991596638656, "grad_norm": 22.18371416276764, "learning_rate": 7.577227490914495e-06, "loss": 1.3999063968658447, "step": 2257 }, { "epoch": 1.1859243697478992, "grad_norm": 13.512564010610445, "learning_rate": 7.5746082343924146e-06, "loss": 1.5380282402038574, "step": 2258 }, { "epoch": 1.1864495798319328, "grad_norm": 14.31168018522443, "learning_rate": 7.571988016070611e-06, "loss": 1.2362651824951172, "step": 2259 }, { "epoch": 1.1869747899159664, "grad_norm": 7.639694621889757, "learning_rate": 7.569366836927921e-06, "loss": 1.8090265989303589, "step": 2260 }, { "epoch": 1.1875, "grad_norm": 8.50071725348861, "learning_rate": 7.5667446979435445e-06, "loss": 1.4643746614456177, "step": 2261 }, { "epoch": 1.1880252100840336, "grad_norm": 7.557482951144118, "learning_rate": 7.564121600097037e-06, "loss": 1.645832896232605, "step": 2262 }, { "epoch": 1.1885504201680672, "grad_norm": 12.676426625212807, "learning_rate": 7.561497544368309e-06, "loss": 1.6147786378860474, "step": 2263 }, { "epoch": 1.1890756302521008, "grad_norm": 16.807843359095575, "learning_rate": 7.558872531737635e-06, "loss": 1.0271517038345337, "step": 2264 }, { "epoch": 1.1896008403361344, "grad_norm": 7.801099310746753, "learning_rate": 7.556246563185648e-06, "loss": 1.6632678508758545, "step": 2265 }, { "epoch": 1.190126050420168, "grad_norm": 9.26191272675503, "learning_rate": 7.553619639693328e-06, "loss": 1.1168607473373413, "step": 2266 }, { "epoch": 1.1906512605042017, "grad_norm": 10.528482369183887, "learning_rate": 7.550991762242022e-06, "loss": 1.5746139287948608, "step": 2267 }, { "epoch": 1.1911764705882353, "grad_norm": 9.81673524549289, "learning_rate": 7.5483629318134285e-06, "loss": 2.125807762145996, "step": 2268 }, { "epoch": 1.191701680672269, "grad_norm": 10.24554407141191, "learning_rate": 7.545733149389605e-06, "loss": 1.7941548824310303, "step": 2269 }, { "epoch": 1.1922268907563025, "grad_norm": 17.2128839696117, "learning_rate": 7.5431024159529585e-06, "loss": 1.520341396331787, "step": 2270 }, { "epoch": 1.1927521008403361, "grad_norm": 7.006581734437614, "learning_rate": 7.540470732486258e-06, "loss": 1.9107754230499268, "step": 2271 }, { "epoch": 1.1932773109243697, "grad_norm": 8.43516927711525, "learning_rate": 7.537838099972628e-06, "loss": 1.8271441459655762, "step": 2272 }, { "epoch": 1.1938025210084033, "grad_norm": 9.194221379236254, "learning_rate": 7.535204519395538e-06, "loss": 1.4770328998565674, "step": 2273 }, { "epoch": 1.194327731092437, "grad_norm": 13.16455808693371, "learning_rate": 7.5325699917388226e-06, "loss": 2.21441912651062, "step": 2274 }, { "epoch": 1.1948529411764706, "grad_norm": 24.47272132447887, "learning_rate": 7.529934517986663e-06, "loss": 2.2615766525268555, "step": 2275 }, { "epoch": 1.1953781512605042, "grad_norm": 22.196531362086226, "learning_rate": 7.5272980991236015e-06, "loss": 1.48537015914917, "step": 2276 }, { "epoch": 1.1959033613445378, "grad_norm": 10.30229607018962, "learning_rate": 7.5246607361345215e-06, "loss": 0.6189466714859009, "step": 2277 }, { "epoch": 1.1964285714285714, "grad_norm": 11.89676089870916, "learning_rate": 7.522022430004672e-06, "loss": 1.6322544813156128, "step": 2278 }, { "epoch": 1.196953781512605, "grad_norm": 14.66246786517055, "learning_rate": 7.519383181719644e-06, "loss": 1.8301173448562622, "step": 2279 }, { "epoch": 1.1974789915966386, "grad_norm": 12.142503576275619, "learning_rate": 7.516742992265389e-06, "loss": 1.1744235754013062, "step": 2280 }, { "epoch": 1.1980042016806722, "grad_norm": 13.850062553650906, "learning_rate": 7.514101862628203e-06, "loss": 1.7453515529632568, "step": 2281 }, { "epoch": 1.1985294117647058, "grad_norm": 23.510428274436297, "learning_rate": 7.511459793794736e-06, "loss": 2.7380056381225586, "step": 2282 }, { "epoch": 1.1990546218487395, "grad_norm": 11.481471300399695, "learning_rate": 7.508816786751991e-06, "loss": 1.2900176048278809, "step": 2283 }, { "epoch": 1.199579831932773, "grad_norm": 8.807838434433554, "learning_rate": 7.506172842487321e-06, "loss": 1.7967376708984375, "step": 2284 }, { "epoch": 1.2001050420168067, "grad_norm": 11.144029341780646, "learning_rate": 7.503527961988422e-06, "loss": 1.5620723962783813, "step": 2285 }, { "epoch": 1.2006302521008403, "grad_norm": 13.161074191782578, "learning_rate": 7.500882146243349e-06, "loss": 1.4015578031539917, "step": 2286 }, { "epoch": 1.201155462184874, "grad_norm": 11.326606985210484, "learning_rate": 7.498235396240505e-06, "loss": 1.5541796684265137, "step": 2287 }, { "epoch": 1.2016806722689075, "grad_norm": 11.165684930086023, "learning_rate": 7.495587712968637e-06, "loss": 1.4527666568756104, "step": 2288 }, { "epoch": 1.2022058823529411, "grad_norm": 10.299875594492086, "learning_rate": 7.492939097416842e-06, "loss": 1.3378864526748657, "step": 2289 }, { "epoch": 1.2027310924369747, "grad_norm": 12.416826127313424, "learning_rate": 7.49028955057457e-06, "loss": 1.7873291969299316, "step": 2290 }, { "epoch": 1.2032563025210083, "grad_norm": 7.324940705463291, "learning_rate": 7.487639073431615e-06, "loss": 0.9052544832229614, "step": 2291 }, { "epoch": 1.203781512605042, "grad_norm": 11.160324785999983, "learning_rate": 7.4849876669781175e-06, "loss": 1.3971418142318726, "step": 2292 }, { "epoch": 1.2043067226890756, "grad_norm": 9.812802069170903, "learning_rate": 7.482335332204568e-06, "loss": 1.3695425987243652, "step": 2293 }, { "epoch": 1.2048319327731092, "grad_norm": 24.923735606326765, "learning_rate": 7.4796820701018025e-06, "loss": 1.7986787557601929, "step": 2294 }, { "epoch": 1.2053571428571428, "grad_norm": 25.29305918756146, "learning_rate": 7.477027881661003e-06, "loss": 1.5392940044403076, "step": 2295 }, { "epoch": 1.2058823529411764, "grad_norm": 11.143561320900542, "learning_rate": 7.4743727678737e-06, "loss": 1.5945121049880981, "step": 2296 }, { "epoch": 1.20640756302521, "grad_norm": 20.283242988904952, "learning_rate": 7.471716729731764e-06, "loss": 1.3238778114318848, "step": 2297 }, { "epoch": 1.2069327731092436, "grad_norm": 14.612581857378458, "learning_rate": 7.469059768227419e-06, "loss": 1.3909658193588257, "step": 2298 }, { "epoch": 1.2074579831932772, "grad_norm": 13.84989681165633, "learning_rate": 7.466401884353227e-06, "loss": 1.689344882965088, "step": 2299 }, { "epoch": 1.2079831932773109, "grad_norm": 15.509073178148029, "learning_rate": 7.4637430791020974e-06, "loss": 1.5234793424606323, "step": 2300 }, { "epoch": 1.2085084033613445, "grad_norm": 13.34196354051738, "learning_rate": 7.461083353467283e-06, "loss": 1.6402065753936768, "step": 2301 }, { "epoch": 1.209033613445378, "grad_norm": 15.28186026868191, "learning_rate": 7.458422708442382e-06, "loss": 1.3864027261734009, "step": 2302 }, { "epoch": 1.2095588235294117, "grad_norm": 11.622612511011315, "learning_rate": 7.455761145021335e-06, "loss": 1.594147801399231, "step": 2303 }, { "epoch": 1.2100840336134453, "grad_norm": 12.765662852247218, "learning_rate": 7.453098664198426e-06, "loss": 1.1258426904678345, "step": 2304 }, { "epoch": 1.210609243697479, "grad_norm": 14.58211207292688, "learning_rate": 7.450435266968279e-06, "loss": 1.759843349456787, "step": 2305 }, { "epoch": 1.2111344537815125, "grad_norm": 15.514594495150906, "learning_rate": 7.447770954325866e-06, "loss": 1.5733953714370728, "step": 2306 }, { "epoch": 1.2116596638655461, "grad_norm": 12.880778086443373, "learning_rate": 7.445105727266496e-06, "loss": 2.0581679344177246, "step": 2307 }, { "epoch": 1.2121848739495797, "grad_norm": 15.249229861220583, "learning_rate": 7.4424395867858224e-06, "loss": 1.3483026027679443, "step": 2308 }, { "epoch": 1.2127100840336134, "grad_norm": 10.515358810176897, "learning_rate": 7.4397725338798365e-06, "loss": 1.224669337272644, "step": 2309 }, { "epoch": 1.213235294117647, "grad_norm": 13.802263456031447, "learning_rate": 7.437104569544874e-06, "loss": 1.6141116619110107, "step": 2310 }, { "epoch": 1.2137605042016806, "grad_norm": 12.138170482009528, "learning_rate": 7.4344356947776106e-06, "loss": 1.521715760231018, "step": 2311 }, { "epoch": 1.2142857142857142, "grad_norm": 9.396927032693675, "learning_rate": 7.431765910575061e-06, "loss": 1.1617248058319092, "step": 2312 }, { "epoch": 1.2148109243697478, "grad_norm": 14.34054360977382, "learning_rate": 7.429095217934578e-06, "loss": 1.523795247077942, "step": 2313 }, { "epoch": 1.2153361344537816, "grad_norm": 9.817340736775819, "learning_rate": 7.426423617853858e-06, "loss": 1.5130796432495117, "step": 2314 }, { "epoch": 1.215861344537815, "grad_norm": 13.588005997380902, "learning_rate": 7.423751111330933e-06, "loss": 1.6209462881088257, "step": 2315 }, { "epoch": 1.2163865546218489, "grad_norm": 10.733814846402563, "learning_rate": 7.421077699364174e-06, "loss": 1.9656691551208496, "step": 2316 }, { "epoch": 1.2169117647058822, "grad_norm": 15.954637512455726, "learning_rate": 7.4184033829522935e-06, "loss": 1.733542561531067, "step": 2317 }, { "epoch": 1.217436974789916, "grad_norm": 20.652914552902658, "learning_rate": 7.415728163094338e-06, "loss": 2.0990822315216064, "step": 2318 }, { "epoch": 1.2179621848739495, "grad_norm": 18.289704594032273, "learning_rate": 7.413052040789692e-06, "loss": 2.069448709487915, "step": 2319 }, { "epoch": 1.2184873949579833, "grad_norm": 10.974487038682968, "learning_rate": 7.410375017038078e-06, "loss": 1.6104955673217773, "step": 2320 }, { "epoch": 1.2190126050420167, "grad_norm": 7.259922667918353, "learning_rate": 7.4076970928395565e-06, "loss": 1.2447270154953003, "step": 2321 }, { "epoch": 1.2195378151260505, "grad_norm": 12.022730022541118, "learning_rate": 7.405018269194522e-06, "loss": 1.1101574897766113, "step": 2322 }, { "epoch": 1.220063025210084, "grad_norm": 9.148123641674003, "learning_rate": 7.402338547103708e-06, "loss": 1.99159574508667, "step": 2323 }, { "epoch": 1.2205882352941178, "grad_norm": 11.43638906210396, "learning_rate": 7.399657927568178e-06, "loss": 1.485155701637268, "step": 2324 }, { "epoch": 1.2211134453781511, "grad_norm": 10.90216510145063, "learning_rate": 7.396976411589338e-06, "loss": 1.0308756828308105, "step": 2325 }, { "epoch": 1.221638655462185, "grad_norm": 12.655532474708437, "learning_rate": 7.3942940001689245e-06, "loss": 1.5401026010513306, "step": 2326 }, { "epoch": 1.2221638655462184, "grad_norm": 9.194874093328167, "learning_rate": 7.391610694309008e-06, "loss": 1.2197831869125366, "step": 2327 }, { "epoch": 1.2226890756302522, "grad_norm": 14.753605984197215, "learning_rate": 7.388926495011996e-06, "loss": 2.316397190093994, "step": 2328 }, { "epoch": 1.2232142857142858, "grad_norm": 9.772386958492579, "learning_rate": 7.386241403280629e-06, "loss": 1.372429609298706, "step": 2329 }, { "epoch": 1.2237394957983194, "grad_norm": 10.631382394346844, "learning_rate": 7.3835554201179785e-06, "loss": 1.5177174806594849, "step": 2330 }, { "epoch": 1.224264705882353, "grad_norm": 10.98341862924508, "learning_rate": 7.380868546527449e-06, "loss": 1.1611489057540894, "step": 2331 }, { "epoch": 1.2247899159663866, "grad_norm": 12.532395756792724, "learning_rate": 7.378180783512784e-06, "loss": 1.8947821855545044, "step": 2332 }, { "epoch": 1.2253151260504203, "grad_norm": 12.089171708399894, "learning_rate": 7.375492132078051e-06, "loss": 1.257406234741211, "step": 2333 }, { "epoch": 1.2258403361344539, "grad_norm": 13.513579765433992, "learning_rate": 7.372802593227656e-06, "loss": 1.256013035774231, "step": 2334 }, { "epoch": 1.2263655462184875, "grad_norm": 12.853310916075031, "learning_rate": 7.3701121679663305e-06, "loss": 1.4088852405548096, "step": 2335 }, { "epoch": 1.226890756302521, "grad_norm": 8.70224293067176, "learning_rate": 7.36742085729914e-06, "loss": 2.04591965675354, "step": 2336 }, { "epoch": 1.2274159663865547, "grad_norm": 13.45383616067398, "learning_rate": 7.364728662231484e-06, "loss": 1.4291229248046875, "step": 2337 }, { "epoch": 1.2279411764705883, "grad_norm": 18.310273006141156, "learning_rate": 7.362035583769087e-06, "loss": 1.1931278705596924, "step": 2338 }, { "epoch": 1.228466386554622, "grad_norm": 7.059913921580223, "learning_rate": 7.359341622918006e-06, "loss": 1.6950139999389648, "step": 2339 }, { "epoch": 1.2289915966386555, "grad_norm": 17.606553957763804, "learning_rate": 7.356646780684629e-06, "loss": 2.0042810440063477, "step": 2340 }, { "epoch": 1.2295168067226891, "grad_norm": 14.03085666214147, "learning_rate": 7.353951058075669e-06, "loss": 1.4454351663589478, "step": 2341 }, { "epoch": 1.2300420168067228, "grad_norm": 8.559323919901624, "learning_rate": 7.351254456098172e-06, "loss": 1.7152481079101562, "step": 2342 }, { "epoch": 1.2305672268907564, "grad_norm": 11.824404222001439, "learning_rate": 7.348556975759512e-06, "loss": 1.5654159784317017, "step": 2343 }, { "epoch": 1.23109243697479, "grad_norm": 10.644263196876427, "learning_rate": 7.34585861806739e-06, "loss": 1.801296591758728, "step": 2344 }, { "epoch": 1.2316176470588236, "grad_norm": 13.96733594110418, "learning_rate": 7.343159384029833e-06, "loss": 1.6108777523040771, "step": 2345 }, { "epoch": 1.2321428571428572, "grad_norm": 14.731768021182264, "learning_rate": 7.340459274655198e-06, "loss": 1.9901306629180908, "step": 2346 }, { "epoch": 1.2326680672268908, "grad_norm": 16.95908361334622, "learning_rate": 7.3377582909521705e-06, "loss": 1.535915493965149, "step": 2347 }, { "epoch": 1.2331932773109244, "grad_norm": 22.4332799229794, "learning_rate": 7.335056433929758e-06, "loss": 3.0728158950805664, "step": 2348 }, { "epoch": 1.233718487394958, "grad_norm": 14.20740872874293, "learning_rate": 7.332353704597299e-06, "loss": 1.109440565109253, "step": 2349 }, { "epoch": 1.2342436974789917, "grad_norm": 8.476647453494687, "learning_rate": 7.3296501039644515e-06, "loss": 1.7299296855926514, "step": 2350 }, { "epoch": 1.2347689075630253, "grad_norm": 11.261603136272209, "learning_rate": 7.326945633041209e-06, "loss": 1.9006025791168213, "step": 2351 }, { "epoch": 1.2352941176470589, "grad_norm": 11.608106477800181, "learning_rate": 7.32424029283788e-06, "loss": 1.1014738082885742, "step": 2352 }, { "epoch": 1.2358193277310925, "grad_norm": 7.4838832637857235, "learning_rate": 7.321534084365101e-06, "loss": 1.101230502128601, "step": 2353 }, { "epoch": 1.236344537815126, "grad_norm": 10.786551368424025, "learning_rate": 7.318827008633837e-06, "loss": 1.6727733612060547, "step": 2354 }, { "epoch": 1.2368697478991597, "grad_norm": 8.95980030612558, "learning_rate": 7.316119066655374e-06, "loss": 1.7786109447479248, "step": 2355 }, { "epoch": 1.2373949579831933, "grad_norm": 10.129048303462964, "learning_rate": 7.31341025944132e-06, "loss": 1.322527527809143, "step": 2356 }, { "epoch": 1.237920168067227, "grad_norm": 9.381053963879273, "learning_rate": 7.310700588003605e-06, "loss": 1.10860276222229, "step": 2357 }, { "epoch": 1.2384453781512605, "grad_norm": 9.653449934052412, "learning_rate": 7.307990053354489e-06, "loss": 1.2966623306274414, "step": 2358 }, { "epoch": 1.2389705882352942, "grad_norm": 14.132126149032192, "learning_rate": 7.305278656506547e-06, "loss": 1.5544002056121826, "step": 2359 }, { "epoch": 1.2394957983193278, "grad_norm": 22.343298440599415, "learning_rate": 7.3025663984726804e-06, "loss": 1.3582324981689453, "step": 2360 }, { "epoch": 1.2400210084033614, "grad_norm": 15.390441555017505, "learning_rate": 7.299853280266109e-06, "loss": 2.1058707237243652, "step": 2361 }, { "epoch": 1.240546218487395, "grad_norm": 15.624905181753281, "learning_rate": 7.29713930290038e-06, "loss": 1.9919989109039307, "step": 2362 }, { "epoch": 1.2410714285714286, "grad_norm": 8.29412970289628, "learning_rate": 7.294424467389354e-06, "loss": 1.880364179611206, "step": 2363 }, { "epoch": 1.2415966386554622, "grad_norm": 11.708658580648029, "learning_rate": 7.291708774747215e-06, "loss": 1.5461198091506958, "step": 2364 }, { "epoch": 1.2421218487394958, "grad_norm": 10.28464568270922, "learning_rate": 7.28899222598847e-06, "loss": 2.2227697372436523, "step": 2365 }, { "epoch": 1.2426470588235294, "grad_norm": 9.68029298731685, "learning_rate": 7.286274822127943e-06, "loss": 1.1307097673416138, "step": 2366 }, { "epoch": 1.243172268907563, "grad_norm": 12.22929327621289, "learning_rate": 7.2835565641807784e-06, "loss": 1.349056601524353, "step": 2367 }, { "epoch": 1.2436974789915967, "grad_norm": 8.766422223819754, "learning_rate": 7.280837453162437e-06, "loss": 2.0873913764953613, "step": 2368 }, { "epoch": 1.2442226890756303, "grad_norm": 7.639382297018337, "learning_rate": 7.278117490088703e-06, "loss": 1.324272871017456, "step": 2369 }, { "epoch": 1.2447478991596639, "grad_norm": 10.350289263510936, "learning_rate": 7.2753966759756775e-06, "loss": 1.8540616035461426, "step": 2370 }, { "epoch": 1.2452731092436975, "grad_norm": 9.468841605534482, "learning_rate": 7.272675011839776e-06, "loss": 2.102241039276123, "step": 2371 }, { "epoch": 1.245798319327731, "grad_norm": 9.032877181742217, "learning_rate": 7.269952498697734e-06, "loss": 1.2577991485595703, "step": 2372 }, { "epoch": 1.2463235294117647, "grad_norm": 12.83783323017104, "learning_rate": 7.267229137566607e-06, "loss": 1.348679780960083, "step": 2373 }, { "epoch": 1.2468487394957983, "grad_norm": 16.509721885073024, "learning_rate": 7.2645049294637625e-06, "loss": 1.8049670457839966, "step": 2374 }, { "epoch": 1.247373949579832, "grad_norm": 7.679108219991571, "learning_rate": 7.261779875406887e-06, "loss": 1.522689938545227, "step": 2375 }, { "epoch": 1.2478991596638656, "grad_norm": 7.993956714427982, "learning_rate": 7.259053976413981e-06, "loss": 1.6695988178253174, "step": 2376 }, { "epoch": 1.2484243697478992, "grad_norm": 11.598346916315906, "learning_rate": 7.256327233503365e-06, "loss": 1.1637938022613525, "step": 2377 }, { "epoch": 1.2489495798319328, "grad_norm": 19.863618347328224, "learning_rate": 7.2535996476936696e-06, "loss": 1.5508743524551392, "step": 2378 }, { "epoch": 1.2494747899159664, "grad_norm": 19.81622027458916, "learning_rate": 7.2508712200038435e-06, "loss": 1.48831307888031, "step": 2379 }, { "epoch": 1.25, "grad_norm": 11.056709659977496, "learning_rate": 7.248141951453148e-06, "loss": 1.6738970279693604, "step": 2380 }, { "epoch": 1.2505252100840336, "grad_norm": 7.266503881384918, "learning_rate": 7.24541184306116e-06, "loss": 1.417891025543213, "step": 2381 }, { "epoch": 1.2510504201680672, "grad_norm": 9.303065882150086, "learning_rate": 7.24268089584777e-06, "loss": 2.12142276763916, "step": 2382 }, { "epoch": 1.2515756302521008, "grad_norm": 6.535764656682369, "learning_rate": 7.239949110833182e-06, "loss": 1.5944756269454956, "step": 2383 }, { "epoch": 1.2521008403361344, "grad_norm": 22.395338083691897, "learning_rate": 7.2372164890379106e-06, "loss": 1.6734291315078735, "step": 2384 }, { "epoch": 1.252626050420168, "grad_norm": 10.720706252841383, "learning_rate": 7.234483031482787e-06, "loss": 1.893827199935913, "step": 2385 }, { "epoch": 1.2531512605042017, "grad_norm": 8.158464787827441, "learning_rate": 7.231748739188951e-06, "loss": 1.8776541948318481, "step": 2386 }, { "epoch": 1.2536764705882353, "grad_norm": 10.89127995232285, "learning_rate": 7.229013613177856e-06, "loss": 1.3742156028747559, "step": 2387 }, { "epoch": 1.254201680672269, "grad_norm": 8.674192807216064, "learning_rate": 7.2262776544712665e-06, "loss": 0.8402234315872192, "step": 2388 }, { "epoch": 1.2547268907563025, "grad_norm": 13.504420594820457, "learning_rate": 7.223540864091259e-06, "loss": 1.623948097229004, "step": 2389 }, { "epoch": 1.2552521008403361, "grad_norm": 10.586645126393956, "learning_rate": 7.2208032430602185e-06, "loss": 1.0190811157226562, "step": 2390 }, { "epoch": 1.2557773109243697, "grad_norm": 12.57602406447126, "learning_rate": 7.218064792400842e-06, "loss": 1.052843451499939, "step": 2391 }, { "epoch": 1.2563025210084033, "grad_norm": 6.133078075000218, "learning_rate": 7.215325513136137e-06, "loss": 1.6767473220825195, "step": 2392 }, { "epoch": 1.256827731092437, "grad_norm": 10.340515409303494, "learning_rate": 7.2125854062894184e-06, "loss": 1.3847999572753906, "step": 2393 }, { "epoch": 1.2573529411764706, "grad_norm": 11.643403558581834, "learning_rate": 7.209844472884313e-06, "loss": 2.0188345909118652, "step": 2394 }, { "epoch": 1.2578781512605042, "grad_norm": 10.25441324221315, "learning_rate": 7.207102713944752e-06, "loss": 1.5930418968200684, "step": 2395 }, { "epoch": 1.2584033613445378, "grad_norm": 14.36265003017186, "learning_rate": 7.204360130494981e-06, "loss": 1.1003354787826538, "step": 2396 }, { "epoch": 1.2589285714285714, "grad_norm": 9.903410270466072, "learning_rate": 7.201616723559548e-06, "loss": 1.4389723539352417, "step": 2397 }, { "epoch": 1.259453781512605, "grad_norm": 16.36601934860646, "learning_rate": 7.198872494163312e-06, "loss": 2.5507800579071045, "step": 2398 }, { "epoch": 1.2599789915966386, "grad_norm": 15.286978689183501, "learning_rate": 7.19612744333144e-06, "loss": 2.1392221450805664, "step": 2399 }, { "epoch": 1.2605042016806722, "grad_norm": 17.465508677471693, "learning_rate": 7.193381572089402e-06, "loss": 1.398755669593811, "step": 2400 }, { "epoch": 1.2610294117647058, "grad_norm": 11.700706016241789, "learning_rate": 7.190634881462976e-06, "loss": 1.5313366651535034, "step": 2401 }, { "epoch": 1.2615546218487395, "grad_norm": 11.195516448088934, "learning_rate": 7.18788737247825e-06, "loss": 1.6762734651565552, "step": 2402 }, { "epoch": 1.262079831932773, "grad_norm": 10.082021172255459, "learning_rate": 7.185139046161611e-06, "loss": 1.2127187252044678, "step": 2403 }, { "epoch": 1.2626050420168067, "grad_norm": 13.058806553080483, "learning_rate": 7.182389903539757e-06, "loss": 1.383756160736084, "step": 2404 }, { "epoch": 1.2631302521008403, "grad_norm": 22.872980267738292, "learning_rate": 7.179639945639688e-06, "loss": 1.0362639427185059, "step": 2405 }, { "epoch": 1.263655462184874, "grad_norm": 13.011234775175142, "learning_rate": 7.1768891734887095e-06, "loss": 2.003307342529297, "step": 2406 }, { "epoch": 1.2641806722689075, "grad_norm": 13.357718438880962, "learning_rate": 7.174137588114432e-06, "loss": 1.7289718389511108, "step": 2407 }, { "epoch": 1.2647058823529411, "grad_norm": 8.875762013574194, "learning_rate": 7.171385190544766e-06, "loss": 1.2428697347640991, "step": 2408 }, { "epoch": 1.2652310924369747, "grad_norm": 13.211830534036327, "learning_rate": 7.168631981807931e-06, "loss": 1.3453094959259033, "step": 2409 }, { "epoch": 1.2657563025210083, "grad_norm": 17.40159836218747, "learning_rate": 7.165877962932444e-06, "loss": 2.3050365447998047, "step": 2410 }, { "epoch": 1.266281512605042, "grad_norm": 9.799835084766892, "learning_rate": 7.1631231349471306e-06, "loss": 2.27128267288208, "step": 2411 }, { "epoch": 1.2668067226890756, "grad_norm": 16.14197538278971, "learning_rate": 7.160367498881113e-06, "loss": 1.8228071928024292, "step": 2412 }, { "epoch": 1.2673319327731092, "grad_norm": 15.260337123016297, "learning_rate": 7.15761105576382e-06, "loss": 1.5178961753845215, "step": 2413 }, { "epoch": 1.2678571428571428, "grad_norm": 8.091406049823233, "learning_rate": 7.1548538066249776e-06, "loss": 1.494361400604248, "step": 2414 }, { "epoch": 1.2683823529411764, "grad_norm": 12.0098589479205, "learning_rate": 7.152095752494616e-06, "loss": 1.3326354026794434, "step": 2415 }, { "epoch": 1.26890756302521, "grad_norm": 9.095525866005758, "learning_rate": 7.149336894403064e-06, "loss": 0.7168285250663757, "step": 2416 }, { "epoch": 1.2694327731092436, "grad_norm": 14.397071140794958, "learning_rate": 7.1465772333809524e-06, "loss": 1.0425605773925781, "step": 2417 }, { "epoch": 1.2699579831932772, "grad_norm": 9.501871797516532, "learning_rate": 7.143816770459211e-06, "loss": 1.8473377227783203, "step": 2418 }, { "epoch": 1.2704831932773109, "grad_norm": 11.960339149604845, "learning_rate": 7.141055506669072e-06, "loss": 1.3001151084899902, "step": 2419 }, { "epoch": 1.2710084033613445, "grad_norm": 12.42609961220722, "learning_rate": 7.13829344304206e-06, "loss": 1.3024431467056274, "step": 2420 }, { "epoch": 1.271533613445378, "grad_norm": 15.451523377574512, "learning_rate": 7.1355305806100036e-06, "loss": 2.631690263748169, "step": 2421 }, { "epoch": 1.2720588235294117, "grad_norm": 14.468136365218367, "learning_rate": 7.132766920405033e-06, "loss": 2.0492539405822754, "step": 2422 }, { "epoch": 1.2725840336134453, "grad_norm": 7.644834895154859, "learning_rate": 7.130002463459569e-06, "loss": 1.053494930267334, "step": 2423 }, { "epoch": 1.273109243697479, "grad_norm": 9.407528896608309, "learning_rate": 7.1272372108063315e-06, "loss": 1.9194227457046509, "step": 2424 }, { "epoch": 1.2736344537815127, "grad_norm": 15.39001144716507, "learning_rate": 7.124471163478344e-06, "loss": 1.0588860511779785, "step": 2425 }, { "epoch": 1.2741596638655461, "grad_norm": 14.90338302375827, "learning_rate": 7.1217043225089196e-06, "loss": 1.0698069334030151, "step": 2426 }, { "epoch": 1.27468487394958, "grad_norm": 19.89454204903591, "learning_rate": 7.118936688931672e-06, "loss": 1.8833686113357544, "step": 2427 }, { "epoch": 1.2752100840336134, "grad_norm": 13.361454987704288, "learning_rate": 7.1161682637805065e-06, "loss": 1.7349896430969238, "step": 2428 }, { "epoch": 1.2757352941176472, "grad_norm": 9.598161833500898, "learning_rate": 7.113399048089631e-06, "loss": 1.980614185333252, "step": 2429 }, { "epoch": 1.2762605042016806, "grad_norm": 15.600245357473879, "learning_rate": 7.110629042893543e-06, "loss": 1.5294065475463867, "step": 2430 }, { "epoch": 1.2767857142857144, "grad_norm": 18.83154035782719, "learning_rate": 7.1078582492270385e-06, "loss": 1.830521583557129, "step": 2431 }, { "epoch": 1.2773109243697478, "grad_norm": 9.997210701856849, "learning_rate": 7.105086668125205e-06, "loss": 2.4552969932556152, "step": 2432 }, { "epoch": 1.2778361344537816, "grad_norm": 10.710851096646278, "learning_rate": 7.102314300623425e-06, "loss": 1.468703031539917, "step": 2433 }, { "epoch": 1.278361344537815, "grad_norm": 18.498009820053685, "learning_rate": 7.0995411477573786e-06, "loss": 3.8373215198516846, "step": 2434 }, { "epoch": 1.2788865546218489, "grad_norm": 13.547066904733787, "learning_rate": 7.096767210563031e-06, "loss": 1.225649118423462, "step": 2435 }, { "epoch": 1.2794117647058822, "grad_norm": 15.803823739293396, "learning_rate": 7.093992490076652e-06, "loss": 1.571283221244812, "step": 2436 }, { "epoch": 1.279936974789916, "grad_norm": 12.594173306429065, "learning_rate": 7.091216987334792e-06, "loss": 1.723379135131836, "step": 2437 }, { "epoch": 1.2804621848739495, "grad_norm": 8.408963976459619, "learning_rate": 7.088440703374302e-06, "loss": 1.1697720289230347, "step": 2438 }, { "epoch": 1.2809873949579833, "grad_norm": 8.255317949054, "learning_rate": 7.0856636392323205e-06, "loss": 1.266052484512329, "step": 2439 }, { "epoch": 1.2815126050420167, "grad_norm": 27.24276770547867, "learning_rate": 7.08288579594628e-06, "loss": 1.7574093341827393, "step": 2440 }, { "epoch": 1.2820378151260505, "grad_norm": 15.15257145767664, "learning_rate": 7.080107174553903e-06, "loss": 2.3162498474121094, "step": 2441 }, { "epoch": 1.282563025210084, "grad_norm": 15.105360916116396, "learning_rate": 7.0773277760932015e-06, "loss": 1.3757708072662354, "step": 2442 }, { "epoch": 1.2830882352941178, "grad_norm": 12.230471977015398, "learning_rate": 7.074547601602479e-06, "loss": 1.2318499088287354, "step": 2443 }, { "epoch": 1.2836134453781511, "grad_norm": 10.457889294630673, "learning_rate": 7.071766652120331e-06, "loss": 2.0742783546447754, "step": 2444 }, { "epoch": 1.284138655462185, "grad_norm": 23.0353340887712, "learning_rate": 7.068984928685638e-06, "loss": 2.8480892181396484, "step": 2445 }, { "epoch": 1.2846638655462184, "grad_norm": 9.600151102945814, "learning_rate": 7.0662024323375745e-06, "loss": 1.3152753114700317, "step": 2446 }, { "epoch": 1.2851890756302522, "grad_norm": 17.008552694520134, "learning_rate": 7.063419164115598e-06, "loss": 1.9276971817016602, "step": 2447 }, { "epoch": 1.2857142857142856, "grad_norm": 11.052118241756485, "learning_rate": 7.060635125059461e-06, "loss": 1.2775708436965942, "step": 2448 }, { "epoch": 1.2862394957983194, "grad_norm": 10.236994214278042, "learning_rate": 7.057850316209198e-06, "loss": 1.361575722694397, "step": 2449 }, { "epoch": 1.2867647058823528, "grad_norm": 19.449345318770636, "learning_rate": 7.055064738605134e-06, "loss": 1.8090773820877075, "step": 2450 }, { "epoch": 1.2872899159663866, "grad_norm": 9.225946217388557, "learning_rate": 7.052278393287884e-06, "loss": 2.043266773223877, "step": 2451 }, { "epoch": 1.28781512605042, "grad_norm": 8.613599139821474, "learning_rate": 7.049491281298342e-06, "loss": 1.7184176445007324, "step": 2452 }, { "epoch": 1.2883403361344539, "grad_norm": 11.700284459321495, "learning_rate": 7.0467034036776945e-06, "loss": 1.412935495376587, "step": 2453 }, { "epoch": 1.2888655462184873, "grad_norm": 12.860868786785693, "learning_rate": 7.043914761467414e-06, "loss": 1.8810560703277588, "step": 2454 }, { "epoch": 1.289390756302521, "grad_norm": 8.95799380669124, "learning_rate": 7.041125355709256e-06, "loss": 1.2562406063079834, "step": 2455 }, { "epoch": 1.2899159663865547, "grad_norm": 10.112064208293942, "learning_rate": 7.038335187445263e-06, "loss": 1.3138580322265625, "step": 2456 }, { "epoch": 1.2904411764705883, "grad_norm": 13.636094187722334, "learning_rate": 7.035544257717761e-06, "loss": 1.4140331745147705, "step": 2457 }, { "epoch": 1.290966386554622, "grad_norm": 12.628416107837033, "learning_rate": 7.032752567569362e-06, "loss": 1.4917939901351929, "step": 2458 }, { "epoch": 1.2914915966386555, "grad_norm": 10.456060957087649, "learning_rate": 7.0299601180429615e-06, "loss": 2.041104793548584, "step": 2459 }, { "epoch": 1.2920168067226891, "grad_norm": 15.10892323677275, "learning_rate": 7.0271669101817375e-06, "loss": 1.2987377643585205, "step": 2460 }, { "epoch": 1.2925420168067228, "grad_norm": 12.641863560360875, "learning_rate": 7.024372945029152e-06, "loss": 2.1223607063293457, "step": 2461 }, { "epoch": 1.2930672268907564, "grad_norm": 9.589315059027086, "learning_rate": 7.02157822362895e-06, "loss": 1.4910497665405273, "step": 2462 }, { "epoch": 1.29359243697479, "grad_norm": 10.449212079431476, "learning_rate": 7.018782747025161e-06, "loss": 1.3137577772140503, "step": 2463 }, { "epoch": 1.2941176470588236, "grad_norm": 8.853385697952692, "learning_rate": 7.015986516262096e-06, "loss": 1.8313026428222656, "step": 2464 }, { "epoch": 1.2946428571428572, "grad_norm": 12.65867949209294, "learning_rate": 7.013189532384343e-06, "loss": 1.743154525756836, "step": 2465 }, { "epoch": 1.2951680672268908, "grad_norm": 14.294451207287464, "learning_rate": 7.010391796436775e-06, "loss": 1.4444868564605713, "step": 2466 }, { "epoch": 1.2956932773109244, "grad_norm": 11.366121595375953, "learning_rate": 7.007593309464549e-06, "loss": 2.383770227432251, "step": 2467 }, { "epoch": 1.296218487394958, "grad_norm": 7.761305998097778, "learning_rate": 7.004794072513096e-06, "loss": 1.5093591213226318, "step": 2468 }, { "epoch": 1.2967436974789917, "grad_norm": 10.511966100376124, "learning_rate": 7.001994086628133e-06, "loss": 2.276927947998047, "step": 2469 }, { "epoch": 1.2972689075630253, "grad_norm": 11.81309193143502, "learning_rate": 6.999193352855652e-06, "loss": 2.1074419021606445, "step": 2470 }, { "epoch": 1.2977941176470589, "grad_norm": 7.328549800873724, "learning_rate": 6.99639187224193e-06, "loss": 1.4727612733840942, "step": 2471 }, { "epoch": 1.2983193277310925, "grad_norm": 14.305735034760207, "learning_rate": 6.9935896458335176e-06, "loss": 1.7596924304962158, "step": 2472 }, { "epoch": 1.298844537815126, "grad_norm": 11.72411346833107, "learning_rate": 6.990786674677246e-06, "loss": 1.3528380393981934, "step": 2473 }, { "epoch": 1.2993697478991597, "grad_norm": 8.637583794808803, "learning_rate": 6.987982959820224e-06, "loss": 1.59431791305542, "step": 2474 }, { "epoch": 1.2998949579831933, "grad_norm": 10.306180855405167, "learning_rate": 6.985178502309842e-06, "loss": 1.5949974060058594, "step": 2475 }, { "epoch": 1.300420168067227, "grad_norm": 17.461503333618932, "learning_rate": 6.982373303193763e-06, "loss": 1.2602121829986572, "step": 2476 }, { "epoch": 1.3009453781512605, "grad_norm": 11.863168941574267, "learning_rate": 6.979567363519927e-06, "loss": 1.1584863662719727, "step": 2477 }, { "epoch": 1.3014705882352942, "grad_norm": 13.700741655029095, "learning_rate": 6.976760684336556e-06, "loss": 1.492077112197876, "step": 2478 }, { "epoch": 1.3019957983193278, "grad_norm": 10.163214071824699, "learning_rate": 6.973953266692143e-06, "loss": 1.0241563320159912, "step": 2479 }, { "epoch": 1.3025210084033614, "grad_norm": 10.034203329814925, "learning_rate": 6.9711451116354576e-06, "loss": 1.3957622051239014, "step": 2480 }, { "epoch": 1.303046218487395, "grad_norm": 9.863725755737763, "learning_rate": 6.9683362202155465e-06, "loss": 1.196930170059204, "step": 2481 }, { "epoch": 1.3035714285714286, "grad_norm": 31.49627663522701, "learning_rate": 6.965526593481734e-06, "loss": 1.653714656829834, "step": 2482 }, { "epoch": 1.3040966386554622, "grad_norm": 11.437529945321943, "learning_rate": 6.962716232483612e-06, "loss": 1.6412488222122192, "step": 2483 }, { "epoch": 1.3046218487394958, "grad_norm": 17.18904918559585, "learning_rate": 6.959905138271051e-06, "loss": 2.010774612426758, "step": 2484 }, { "epoch": 1.3051470588235294, "grad_norm": 11.86631216395381, "learning_rate": 6.957093311894199e-06, "loss": 1.3377530574798584, "step": 2485 }, { "epoch": 1.305672268907563, "grad_norm": 8.065782433379868, "learning_rate": 6.954280754403469e-06, "loss": 2.142580032348633, "step": 2486 }, { "epoch": 1.3061974789915967, "grad_norm": 10.93177158102414, "learning_rate": 6.951467466849553e-06, "loss": 1.1615842580795288, "step": 2487 }, { "epoch": 1.3067226890756303, "grad_norm": 8.762552231845937, "learning_rate": 6.948653450283416e-06, "loss": 2.062228202819824, "step": 2488 }, { "epoch": 1.3072478991596639, "grad_norm": 14.14240358317665, "learning_rate": 6.945838705756293e-06, "loss": 1.3985791206359863, "step": 2489 }, { "epoch": 1.3077731092436975, "grad_norm": 12.192426375474398, "learning_rate": 6.943023234319691e-06, "loss": 2.175405502319336, "step": 2490 }, { "epoch": 1.308298319327731, "grad_norm": 11.19077682008825, "learning_rate": 6.940207037025391e-06, "loss": 1.8613638877868652, "step": 2491 }, { "epoch": 1.3088235294117647, "grad_norm": 13.661050933922677, "learning_rate": 6.93739011492544e-06, "loss": 1.8705384731292725, "step": 2492 }, { "epoch": 1.3093487394957983, "grad_norm": 12.848329309525058, "learning_rate": 6.934572469072163e-06, "loss": 0.7445922493934631, "step": 2493 }, { "epoch": 1.309873949579832, "grad_norm": 9.521107438821621, "learning_rate": 6.931754100518151e-06, "loss": 1.5413544178009033, "step": 2494 }, { "epoch": 1.3103991596638656, "grad_norm": 7.490547274245034, "learning_rate": 6.928935010316262e-06, "loss": 1.845062494277954, "step": 2495 }, { "epoch": 1.3109243697478992, "grad_norm": 11.475722297659969, "learning_rate": 6.926115199519632e-06, "loss": 1.6455457210540771, "step": 2496 }, { "epoch": 1.3114495798319328, "grad_norm": 11.416749357700317, "learning_rate": 6.923294669181659e-06, "loss": 1.795639157295227, "step": 2497 }, { "epoch": 1.3119747899159664, "grad_norm": 14.580465924989591, "learning_rate": 6.920473420356013e-06, "loss": 2.1982929706573486, "step": 2498 }, { "epoch": 1.3125, "grad_norm": 10.754106629208545, "learning_rate": 6.91765145409663e-06, "loss": 1.6893014907836914, "step": 2499 }, { "epoch": 1.3130252100840336, "grad_norm": 12.308432564515396, "learning_rate": 6.914828771457718e-06, "loss": 1.7461330890655518, "step": 2500 }, { "epoch": 1.3135504201680672, "grad_norm": 7.99296736363269, "learning_rate": 6.912005373493747e-06, "loss": 1.3773590326309204, "step": 2501 }, { "epoch": 1.3140756302521008, "grad_norm": 9.43636596519202, "learning_rate": 6.909181261259461e-06, "loss": 1.7150299549102783, "step": 2502 }, { "epoch": 1.3146008403361344, "grad_norm": 13.615524480762446, "learning_rate": 6.9063564358098636e-06, "loss": 1.3732833862304688, "step": 2503 }, { "epoch": 1.315126050420168, "grad_norm": 8.099324301769936, "learning_rate": 6.903530898200231e-06, "loss": 1.6331502199172974, "step": 2504 }, { "epoch": 1.3156512605042017, "grad_norm": 12.83455375137105, "learning_rate": 6.900704649486103e-06, "loss": 1.3890109062194824, "step": 2505 }, { "epoch": 1.3161764705882353, "grad_norm": 10.330302981513746, "learning_rate": 6.897877690723285e-06, "loss": 1.0741055011749268, "step": 2506 }, { "epoch": 1.316701680672269, "grad_norm": 13.608230933694813, "learning_rate": 6.895050022967844e-06, "loss": 1.4386227130889893, "step": 2507 }, { "epoch": 1.3172268907563025, "grad_norm": 11.404381582808693, "learning_rate": 6.89222164727612e-06, "loss": 2.4623260498046875, "step": 2508 }, { "epoch": 1.3177521008403361, "grad_norm": 9.654563698345797, "learning_rate": 6.889392564704712e-06, "loss": 1.3226982355117798, "step": 2509 }, { "epoch": 1.3182773109243697, "grad_norm": 18.538528952492204, "learning_rate": 6.886562776310482e-06, "loss": 1.6413676738739014, "step": 2510 }, { "epoch": 1.3188025210084033, "grad_norm": 13.063432822429318, "learning_rate": 6.88373228315056e-06, "loss": 1.5557506084442139, "step": 2511 }, { "epoch": 1.319327731092437, "grad_norm": 11.261364293695788, "learning_rate": 6.880901086282337e-06, "loss": 2.2341644763946533, "step": 2512 }, { "epoch": 1.3198529411764706, "grad_norm": 11.076696009119743, "learning_rate": 6.878069186763466e-06, "loss": 1.740747332572937, "step": 2513 }, { "epoch": 1.3203781512605042, "grad_norm": 8.168371709889001, "learning_rate": 6.8752365856518595e-06, "loss": 1.3587056398391724, "step": 2514 }, { "epoch": 1.3209033613445378, "grad_norm": 10.693304800338483, "learning_rate": 6.872403284005703e-06, "loss": 1.4282145500183105, "step": 2515 }, { "epoch": 1.3214285714285714, "grad_norm": 9.20667491126241, "learning_rate": 6.869569282883434e-06, "loss": 1.3325786590576172, "step": 2516 }, { "epoch": 1.321953781512605, "grad_norm": 11.509424025330327, "learning_rate": 6.866734583343753e-06, "loss": 1.85734224319458, "step": 2517 }, { "epoch": 1.3224789915966386, "grad_norm": 12.590709244370263, "learning_rate": 6.8638991864456205e-06, "loss": 1.0745584964752197, "step": 2518 }, { "epoch": 1.3230042016806722, "grad_norm": 19.291653056658767, "learning_rate": 6.861063093248264e-06, "loss": 1.315691351890564, "step": 2519 }, { "epoch": 1.3235294117647058, "grad_norm": 11.880771493036994, "learning_rate": 6.858226304811163e-06, "loss": 1.4071595668792725, "step": 2520 }, { "epoch": 1.3240546218487395, "grad_norm": 9.494184232116408, "learning_rate": 6.855388822194061e-06, "loss": 1.9811328649520874, "step": 2521 }, { "epoch": 1.324579831932773, "grad_norm": 11.820403139060442, "learning_rate": 6.852550646456962e-06, "loss": 2.430528163909912, "step": 2522 }, { "epoch": 1.3251050420168067, "grad_norm": 8.929151314662786, "learning_rate": 6.849711778660124e-06, "loss": 1.6251394748687744, "step": 2523 }, { "epoch": 1.3256302521008403, "grad_norm": 19.98342407839324, "learning_rate": 6.846872219864068e-06, "loss": 1.5677638053894043, "step": 2524 }, { "epoch": 1.326155462184874, "grad_norm": 13.528596231737234, "learning_rate": 6.844031971129571e-06, "loss": 1.9841523170471191, "step": 2525 }, { "epoch": 1.3266806722689075, "grad_norm": 24.438549163870544, "learning_rate": 6.84119103351767e-06, "loss": 1.4829826354980469, "step": 2526 }, { "epoch": 1.3272058823529411, "grad_norm": 18.612654802549784, "learning_rate": 6.8383494080896575e-06, "loss": 1.2201976776123047, "step": 2527 }, { "epoch": 1.3277310924369747, "grad_norm": 11.683062185750027, "learning_rate": 6.835507095907082e-06, "loss": 1.965669870376587, "step": 2528 }, { "epoch": 1.3282563025210083, "grad_norm": 12.42980937815667, "learning_rate": 6.8326640980317475e-06, "loss": 1.4965461492538452, "step": 2529 }, { "epoch": 1.328781512605042, "grad_norm": 8.660389755626476, "learning_rate": 6.829820415525721e-06, "loss": 1.406752347946167, "step": 2530 }, { "epoch": 1.3293067226890756, "grad_norm": 12.00426067443169, "learning_rate": 6.8269760494513185e-06, "loss": 1.608267068862915, "step": 2531 }, { "epoch": 1.3298319327731092, "grad_norm": 11.123005964951068, "learning_rate": 6.824131000871113e-06, "loss": 1.803060531616211, "step": 2532 }, { "epoch": 1.3303571428571428, "grad_norm": 10.273374497276253, "learning_rate": 6.821285270847934e-06, "loss": 0.8821080923080444, "step": 2533 }, { "epoch": 1.3308823529411764, "grad_norm": 11.7024536004965, "learning_rate": 6.818438860444865e-06, "loss": 1.0945472717285156, "step": 2534 }, { "epoch": 1.33140756302521, "grad_norm": 8.949617329870396, "learning_rate": 6.815591770725241e-06, "loss": 1.6190129518508911, "step": 2535 }, { "epoch": 1.3319327731092436, "grad_norm": 10.707926587792425, "learning_rate": 6.812744002752653e-06, "loss": 1.4689741134643555, "step": 2536 }, { "epoch": 1.3324579831932772, "grad_norm": 12.086019162302449, "learning_rate": 6.80989555759095e-06, "loss": 1.2574225664138794, "step": 2537 }, { "epoch": 1.3329831932773109, "grad_norm": 9.606623699042247, "learning_rate": 6.807046436304224e-06, "loss": 1.4694947004318237, "step": 2538 }, { "epoch": 1.3335084033613445, "grad_norm": 13.763373638672054, "learning_rate": 6.804196639956828e-06, "loss": 1.2479515075683594, "step": 2539 }, { "epoch": 1.334033613445378, "grad_norm": 15.701453600680292, "learning_rate": 6.801346169613361e-06, "loss": 1.2505472898483276, "step": 2540 }, { "epoch": 1.3345588235294117, "grad_norm": 7.851971342183067, "learning_rate": 6.79849502633868e-06, "loss": 1.3575226068496704, "step": 2541 }, { "epoch": 1.3350840336134453, "grad_norm": 12.599257053527712, "learning_rate": 6.79564321119789e-06, "loss": 2.1606597900390625, "step": 2542 }, { "epoch": 1.335609243697479, "grad_norm": 12.879209720966696, "learning_rate": 6.792790725256347e-06, "loss": 1.8259855508804321, "step": 2543 }, { "epoch": 1.3361344537815127, "grad_norm": 16.980649451686435, "learning_rate": 6.7899375695796545e-06, "loss": 1.5110688209533691, "step": 2544 }, { "epoch": 1.3366596638655461, "grad_norm": 9.898623798622163, "learning_rate": 6.787083745233674e-06, "loss": 1.8591792583465576, "step": 2545 }, { "epoch": 1.33718487394958, "grad_norm": 11.167484827774937, "learning_rate": 6.784229253284511e-06, "loss": 2.2330236434936523, "step": 2546 }, { "epoch": 1.3377100840336134, "grad_norm": 13.873008847757054, "learning_rate": 6.781374094798522e-06, "loss": 2.464442491531372, "step": 2547 }, { "epoch": 1.3382352941176472, "grad_norm": 10.521202124826498, "learning_rate": 6.77851827084231e-06, "loss": 1.4322118759155273, "step": 2548 }, { "epoch": 1.3387605042016806, "grad_norm": 10.514189793460767, "learning_rate": 6.775661782482732e-06, "loss": 1.3177762031555176, "step": 2549 }, { "epoch": 1.3392857142857144, "grad_norm": 9.46733935917788, "learning_rate": 6.7728046307868875e-06, "loss": 1.9117001295089722, "step": 2550 }, { "epoch": 1.3398109243697478, "grad_norm": 11.983795556008838, "learning_rate": 6.769946816822128e-06, "loss": 1.4520769119262695, "step": 2551 }, { "epoch": 1.3403361344537816, "grad_norm": 11.959673616566686, "learning_rate": 6.767088341656051e-06, "loss": 1.7856323719024658, "step": 2552 }, { "epoch": 1.340861344537815, "grad_norm": 15.4263714940938, "learning_rate": 6.764229206356498e-06, "loss": 1.325421690940857, "step": 2553 }, { "epoch": 1.3413865546218489, "grad_norm": 14.404989650969412, "learning_rate": 6.761369411991564e-06, "loss": 1.808119535446167, "step": 2554 }, { "epoch": 1.3419117647058822, "grad_norm": 10.145814591441445, "learning_rate": 6.7585089596295815e-06, "loss": 1.6972968578338623, "step": 2555 }, { "epoch": 1.342436974789916, "grad_norm": 13.800616055779992, "learning_rate": 6.7556478503391375e-06, "loss": 1.389564037322998, "step": 2556 }, { "epoch": 1.3429621848739495, "grad_norm": 23.327558473709566, "learning_rate": 6.752786085189059e-06, "loss": 1.3491129875183105, "step": 2557 }, { "epoch": 1.3434873949579833, "grad_norm": 8.807767000944555, "learning_rate": 6.749923665248419e-06, "loss": 0.8782278895378113, "step": 2558 }, { "epoch": 1.3440126050420167, "grad_norm": 9.039579718890321, "learning_rate": 6.747060591586533e-06, "loss": 2.1642870903015137, "step": 2559 }, { "epoch": 1.3445378151260505, "grad_norm": 8.52660806934076, "learning_rate": 6.744196865272967e-06, "loss": 1.5673531293869019, "step": 2560 }, { "epoch": 1.345063025210084, "grad_norm": 11.06218019437839, "learning_rate": 6.741332487377525e-06, "loss": 1.4878621101379395, "step": 2561 }, { "epoch": 1.3455882352941178, "grad_norm": 12.440956245310376, "learning_rate": 6.738467458970257e-06, "loss": 1.1033461093902588, "step": 2562 }, { "epoch": 1.3461134453781511, "grad_norm": 15.710698731020198, "learning_rate": 6.735601781121454e-06, "loss": 1.962068796157837, "step": 2563 }, { "epoch": 1.346638655462185, "grad_norm": 9.55682423903587, "learning_rate": 6.732735454901655e-06, "loss": 1.9984880685806274, "step": 2564 }, { "epoch": 1.3471638655462184, "grad_norm": 8.30414066844809, "learning_rate": 6.729868481381632e-06, "loss": 1.3165334463119507, "step": 2565 }, { "epoch": 1.3476890756302522, "grad_norm": 14.592880402511963, "learning_rate": 6.727000861632406e-06, "loss": 1.8011142015457153, "step": 2566 }, { "epoch": 1.3482142857142856, "grad_norm": 12.499109422383643, "learning_rate": 6.724132596725237e-06, "loss": 2.018202304840088, "step": 2567 }, { "epoch": 1.3487394957983194, "grad_norm": 14.192070130331215, "learning_rate": 6.7212636877316285e-06, "loss": 1.5367764234542847, "step": 2568 }, { "epoch": 1.3492647058823528, "grad_norm": 15.339220046068164, "learning_rate": 6.718394135723321e-06, "loss": 1.013871192932129, "step": 2569 }, { "epoch": 1.3497899159663866, "grad_norm": 8.918308112649619, "learning_rate": 6.7155239417722965e-06, "loss": 1.7312051057815552, "step": 2570 }, { "epoch": 1.35031512605042, "grad_norm": 10.220664703279132, "learning_rate": 6.712653106950778e-06, "loss": 1.2396622896194458, "step": 2571 }, { "epoch": 1.3508403361344539, "grad_norm": 12.729365501764047, "learning_rate": 6.709781632331225e-06, "loss": 1.614361047744751, "step": 2572 }, { "epoch": 1.3513655462184873, "grad_norm": 8.795237921642682, "learning_rate": 6.706909518986341e-06, "loss": 1.6548465490341187, "step": 2573 }, { "epoch": 1.351890756302521, "grad_norm": 11.77075807534768, "learning_rate": 6.7040367679890615e-06, "loss": 1.3578767776489258, "step": 2574 }, { "epoch": 1.3524159663865547, "grad_norm": 8.822220085139815, "learning_rate": 6.701163380412568e-06, "loss": 1.4953851699829102, "step": 2575 }, { "epoch": 1.3529411764705883, "grad_norm": 7.840622770182397, "learning_rate": 6.698289357330272e-06, "loss": 1.3457810878753662, "step": 2576 }, { "epoch": 1.353466386554622, "grad_norm": 9.702436161519671, "learning_rate": 6.695414699815828e-06, "loss": 1.5230767726898193, "step": 2577 }, { "epoch": 1.3539915966386555, "grad_norm": 10.397147343129433, "learning_rate": 6.692539408943124e-06, "loss": 1.2239813804626465, "step": 2578 }, { "epoch": 1.3545168067226891, "grad_norm": 13.13970364203173, "learning_rate": 6.689663485786287e-06, "loss": 1.343689203262329, "step": 2579 }, { "epoch": 1.3550420168067228, "grad_norm": 18.493747579673524, "learning_rate": 6.686786931419681e-06, "loss": 1.5230176448822021, "step": 2580 }, { "epoch": 1.3555672268907564, "grad_norm": 10.334524274305997, "learning_rate": 6.6839097469179e-06, "loss": 1.6321138143539429, "step": 2581 }, { "epoch": 1.35609243697479, "grad_norm": 16.73477207146799, "learning_rate": 6.6810319333557815e-06, "loss": 0.9947364330291748, "step": 2582 }, { "epoch": 1.3566176470588236, "grad_norm": 11.768404186451034, "learning_rate": 6.678153491808394e-06, "loss": 1.2328753471374512, "step": 2583 }, { "epoch": 1.3571428571428572, "grad_norm": 18.240083270794724, "learning_rate": 6.675274423351037e-06, "loss": 1.6512658596038818, "step": 2584 }, { "epoch": 1.3576680672268908, "grad_norm": 10.917484463702253, "learning_rate": 6.6723947290592505e-06, "loss": 1.6520872116088867, "step": 2585 }, { "epoch": 1.3581932773109244, "grad_norm": 10.929865099541153, "learning_rate": 6.669514410008806e-06, "loss": 2.4195618629455566, "step": 2586 }, { "epoch": 1.358718487394958, "grad_norm": 9.344452389539134, "learning_rate": 6.666633467275706e-06, "loss": 1.4502514600753784, "step": 2587 }, { "epoch": 1.3592436974789917, "grad_norm": 12.74859647104274, "learning_rate": 6.6637519019361895e-06, "loss": 2.389765977859497, "step": 2588 }, { "epoch": 1.3597689075630253, "grad_norm": 13.963940493816377, "learning_rate": 6.660869715066725e-06, "loss": 1.0900962352752686, "step": 2589 }, { "epoch": 1.3602941176470589, "grad_norm": 11.492524116734717, "learning_rate": 6.657986907744018e-06, "loss": 2.2712888717651367, "step": 2590 }, { "epoch": 1.3608193277310925, "grad_norm": 8.71014440315091, "learning_rate": 6.655103481045e-06, "loss": 1.4768775701522827, "step": 2591 }, { "epoch": 1.361344537815126, "grad_norm": 12.506118340174757, "learning_rate": 6.652219436046836e-06, "loss": 1.9261109828948975, "step": 2592 }, { "epoch": 1.3618697478991597, "grad_norm": 13.911280033325387, "learning_rate": 6.649334773826924e-06, "loss": 2.072033405303955, "step": 2593 }, { "epoch": 1.3623949579831933, "grad_norm": 13.4623849741064, "learning_rate": 6.646449495462891e-06, "loss": 1.2910997867584229, "step": 2594 }, { "epoch": 1.362920168067227, "grad_norm": 13.046448031093849, "learning_rate": 6.643563602032593e-06, "loss": 1.1936817169189453, "step": 2595 }, { "epoch": 1.3634453781512605, "grad_norm": 8.811381722565676, "learning_rate": 6.640677094614117e-06, "loss": 2.49958872795105, "step": 2596 }, { "epoch": 1.3639705882352942, "grad_norm": 11.652098257259762, "learning_rate": 6.63778997428578e-06, "loss": 2.2749969959259033, "step": 2597 }, { "epoch": 1.3644957983193278, "grad_norm": 17.82890109769083, "learning_rate": 6.6349022421261275e-06, "loss": 1.549394965171814, "step": 2598 }, { "epoch": 1.3650210084033614, "grad_norm": 19.38828160727182, "learning_rate": 6.632013899213934e-06, "loss": 1.3625071048736572, "step": 2599 }, { "epoch": 1.365546218487395, "grad_norm": 9.68278057120787, "learning_rate": 6.629124946628198e-06, "loss": 1.88570237159729, "step": 2600 }, { "epoch": 1.3660714285714286, "grad_norm": 9.485250698838614, "learning_rate": 6.626235385448152e-06, "loss": 1.4110757112503052, "step": 2601 }, { "epoch": 1.3665966386554622, "grad_norm": 10.356454455125625, "learning_rate": 6.623345216753254e-06, "loss": 1.019836187362671, "step": 2602 }, { "epoch": 1.3671218487394958, "grad_norm": 9.839943031800415, "learning_rate": 6.6204544416231865e-06, "loss": 1.557316541671753, "step": 2603 }, { "epoch": 1.3676470588235294, "grad_norm": 9.516374842795816, "learning_rate": 6.617563061137859e-06, "loss": 1.1713138818740845, "step": 2604 }, { "epoch": 1.368172268907563, "grad_norm": 8.888276381665476, "learning_rate": 6.61467107637741e-06, "loss": 1.6227775812149048, "step": 2605 }, { "epoch": 1.3686974789915967, "grad_norm": 9.936072019817864, "learning_rate": 6.611778488422203e-06, "loss": 1.488483190536499, "step": 2606 }, { "epoch": 1.3692226890756303, "grad_norm": 14.394976903542132, "learning_rate": 6.608885298352823e-06, "loss": 1.5318236351013184, "step": 2607 }, { "epoch": 1.3697478991596639, "grad_norm": 7.2118829005676455, "learning_rate": 6.6059915072500845e-06, "loss": 1.4667561054229736, "step": 2608 }, { "epoch": 1.3702731092436975, "grad_norm": 19.7245976124542, "learning_rate": 6.603097116195026e-06, "loss": 1.724409580230713, "step": 2609 }, { "epoch": 1.370798319327731, "grad_norm": 16.784771707501868, "learning_rate": 6.600202126268905e-06, "loss": 2.025880813598633, "step": 2610 }, { "epoch": 1.3713235294117647, "grad_norm": 11.661960217787538, "learning_rate": 6.59730653855321e-06, "loss": 1.8906800746917725, "step": 2611 }, { "epoch": 1.3718487394957983, "grad_norm": 13.24328380174217, "learning_rate": 6.5944103541296486e-06, "loss": 1.6317015886306763, "step": 2612 }, { "epoch": 1.372373949579832, "grad_norm": 15.173657359422222, "learning_rate": 6.591513574080152e-06, "loss": 1.6432816982269287, "step": 2613 }, { "epoch": 1.3728991596638656, "grad_norm": 16.114639890649695, "learning_rate": 6.5886161994868744e-06, "loss": 1.4184069633483887, "step": 2614 }, { "epoch": 1.3734243697478992, "grad_norm": 16.52273933884079, "learning_rate": 6.58571823143219e-06, "loss": 1.7298986911773682, "step": 2615 }, { "epoch": 1.3739495798319328, "grad_norm": 11.782970640317508, "learning_rate": 6.582819670998699e-06, "loss": 1.436945915222168, "step": 2616 }, { "epoch": 1.3744747899159664, "grad_norm": 12.52398633251897, "learning_rate": 6.579920519269218e-06, "loss": 1.3968048095703125, "step": 2617 }, { "epoch": 1.375, "grad_norm": 10.295271294768517, "learning_rate": 6.577020777326789e-06, "loss": 1.237300992012024, "step": 2618 }, { "epoch": 1.3755252100840336, "grad_norm": 12.049889074833844, "learning_rate": 6.574120446254672e-06, "loss": 1.6429824829101562, "step": 2619 }, { "epoch": 1.3760504201680672, "grad_norm": 7.164282559855988, "learning_rate": 6.571219527136347e-06, "loss": 1.7228885889053345, "step": 2620 }, { "epoch": 1.3765756302521008, "grad_norm": 11.272822828941221, "learning_rate": 6.568318021055512e-06, "loss": 1.6949775218963623, "step": 2621 }, { "epoch": 1.3771008403361344, "grad_norm": 10.272152303244857, "learning_rate": 6.5654159290960895e-06, "loss": 1.272033929824829, "step": 2622 }, { "epoch": 1.377626050420168, "grad_norm": 12.10455057830705, "learning_rate": 6.562513252342216e-06, "loss": 1.2234035730361938, "step": 2623 }, { "epoch": 1.3781512605042017, "grad_norm": 9.343131832171544, "learning_rate": 6.55960999187825e-06, "loss": 1.1954748630523682, "step": 2624 }, { "epoch": 1.3786764705882353, "grad_norm": 8.224273647016544, "learning_rate": 6.556706148788765e-06, "loss": 1.7882004976272583, "step": 2625 }, { "epoch": 1.379201680672269, "grad_norm": 10.40029164081238, "learning_rate": 6.553801724158552e-06, "loss": 1.4875035285949707, "step": 2626 }, { "epoch": 1.3797268907563025, "grad_norm": 10.655318633990067, "learning_rate": 6.550896719072624e-06, "loss": 1.8149640560150146, "step": 2627 }, { "epoch": 1.3802521008403361, "grad_norm": 13.177774614503326, "learning_rate": 6.547991134616204e-06, "loss": 1.6386281251907349, "step": 2628 }, { "epoch": 1.3807773109243697, "grad_norm": 7.566280409389933, "learning_rate": 6.545084971874738e-06, "loss": 2.0819602012634277, "step": 2629 }, { "epoch": 1.3813025210084033, "grad_norm": 9.321219115057831, "learning_rate": 6.542178231933882e-06, "loss": 1.232421875, "step": 2630 }, { "epoch": 1.381827731092437, "grad_norm": 14.479870399116498, "learning_rate": 6.539270915879513e-06, "loss": 3.0914225578308105, "step": 2631 }, { "epoch": 1.3823529411764706, "grad_norm": 13.387208054414357, "learning_rate": 6.536363024797721e-06, "loss": 1.8260456323623657, "step": 2632 }, { "epoch": 1.3828781512605042, "grad_norm": 9.62056668563731, "learning_rate": 6.5334545597748075e-06, "loss": 1.3626213073730469, "step": 2633 }, { "epoch": 1.3834033613445378, "grad_norm": 12.191952993848863, "learning_rate": 6.530545521897293e-06, "loss": 1.6068592071533203, "step": 2634 }, { "epoch": 1.3839285714285714, "grad_norm": 10.70208601001819, "learning_rate": 6.527635912251914e-06, "loss": 1.9337867498397827, "step": 2635 }, { "epoch": 1.384453781512605, "grad_norm": 14.753455181001744, "learning_rate": 6.524725731925613e-06, "loss": 1.1043397188186646, "step": 2636 }, { "epoch": 1.3849789915966386, "grad_norm": 10.501587226416273, "learning_rate": 6.521814982005552e-06, "loss": 1.173478126525879, "step": 2637 }, { "epoch": 1.3855042016806722, "grad_norm": 7.756842605929269, "learning_rate": 6.5189036635791e-06, "loss": 0.8882420659065247, "step": 2638 }, { "epoch": 1.3860294117647058, "grad_norm": 10.626611100897398, "learning_rate": 6.5159917777338466e-06, "loss": 1.5061715841293335, "step": 2639 }, { "epoch": 1.3865546218487395, "grad_norm": 8.048004334552857, "learning_rate": 6.513079325557587e-06, "loss": 2.0927653312683105, "step": 2640 }, { "epoch": 1.387079831932773, "grad_norm": 11.249220854343857, "learning_rate": 6.510166308138328e-06, "loss": 0.9584404826164246, "step": 2641 }, { "epoch": 1.3876050420168067, "grad_norm": 11.998451384517095, "learning_rate": 6.507252726564293e-06, "loss": 1.6279972791671753, "step": 2642 }, { "epoch": 1.3881302521008403, "grad_norm": 18.75109577628921, "learning_rate": 6.5043385819239095e-06, "loss": 1.6063534021377563, "step": 2643 }, { "epoch": 1.388655462184874, "grad_norm": 11.257558081059175, "learning_rate": 6.501423875305819e-06, "loss": 1.3692386150360107, "step": 2644 }, { "epoch": 1.3891806722689075, "grad_norm": 12.677813614247667, "learning_rate": 6.498508607798872e-06, "loss": 1.5299866199493408, "step": 2645 }, { "epoch": 1.3897058823529411, "grad_norm": 8.320974312397558, "learning_rate": 6.4955927804921284e-06, "loss": 2.0422544479370117, "step": 2646 }, { "epoch": 1.3902310924369747, "grad_norm": 7.7916598720904755, "learning_rate": 6.49267639447486e-06, "loss": 1.7703297138214111, "step": 2647 }, { "epoch": 1.3907563025210083, "grad_norm": 14.479568830284776, "learning_rate": 6.489759450836541e-06, "loss": 2.3879706859588623, "step": 2648 }, { "epoch": 1.391281512605042, "grad_norm": 6.298967670520775, "learning_rate": 6.48684195066686e-06, "loss": 1.664591908454895, "step": 2649 }, { "epoch": 1.3918067226890756, "grad_norm": 19.64520198257997, "learning_rate": 6.483923895055713e-06, "loss": 1.2596275806427002, "step": 2650 }, { "epoch": 1.3923319327731092, "grad_norm": 10.252645986693791, "learning_rate": 6.481005285093199e-06, "loss": 1.020234227180481, "step": 2651 }, { "epoch": 1.3928571428571428, "grad_norm": 9.145891761994292, "learning_rate": 6.4780861218696265e-06, "loss": 2.633622407913208, "step": 2652 }, { "epoch": 1.3933823529411764, "grad_norm": 9.158351197360872, "learning_rate": 6.475166406475515e-06, "loss": 1.233659267425537, "step": 2653 }, { "epoch": 1.39390756302521, "grad_norm": 8.32455042133505, "learning_rate": 6.472246140001582e-06, "loss": 2.1047203540802, "step": 2654 }, { "epoch": 1.3944327731092436, "grad_norm": 12.136926307616424, "learning_rate": 6.4693253235387575e-06, "loss": 1.2161425352096558, "step": 2655 }, { "epoch": 1.3949579831932772, "grad_norm": 16.27085468344808, "learning_rate": 6.466403958178175e-06, "loss": 1.779348373413086, "step": 2656 }, { "epoch": 1.3954831932773109, "grad_norm": 11.158250460715964, "learning_rate": 6.4634820450111715e-06, "loss": 1.38726007938385, "step": 2657 }, { "epoch": 1.3960084033613445, "grad_norm": 11.611274022507967, "learning_rate": 6.460559585129289e-06, "loss": 1.1857019662857056, "step": 2658 }, { "epoch": 1.396533613445378, "grad_norm": 15.148118324524411, "learning_rate": 6.457636579624278e-06, "loss": 1.4580409526824951, "step": 2659 }, { "epoch": 1.3970588235294117, "grad_norm": 9.052838688025497, "learning_rate": 6.454713029588086e-06, "loss": 1.095137357711792, "step": 2660 }, { "epoch": 1.3975840336134453, "grad_norm": 10.358754953892628, "learning_rate": 6.451788936112868e-06, "loss": 1.8477425575256348, "step": 2661 }, { "epoch": 1.398109243697479, "grad_norm": 32.799345529210306, "learning_rate": 6.4488643002909845e-06, "loss": 2.163045883178711, "step": 2662 }, { "epoch": 1.3986344537815127, "grad_norm": 6.803439693193377, "learning_rate": 6.445939123214991e-06, "loss": 1.712918758392334, "step": 2663 }, { "epoch": 1.3991596638655461, "grad_norm": 15.808125169176924, "learning_rate": 6.443013405977652e-06, "loss": 1.7482061386108398, "step": 2664 }, { "epoch": 1.39968487394958, "grad_norm": 13.589295403290127, "learning_rate": 6.440087149671932e-06, "loss": 1.5376638174057007, "step": 2665 }, { "epoch": 1.4002100840336134, "grad_norm": 13.139163850847654, "learning_rate": 6.437160355390997e-06, "loss": 1.6581084728240967, "step": 2666 }, { "epoch": 1.4007352941176472, "grad_norm": 8.262781614669443, "learning_rate": 6.434233024228209e-06, "loss": 1.416346549987793, "step": 2667 }, { "epoch": 1.4012605042016806, "grad_norm": 14.855034754990058, "learning_rate": 6.431305157277139e-06, "loss": 1.7294692993164062, "step": 2668 }, { "epoch": 1.4017857142857144, "grad_norm": 7.980525829144439, "learning_rate": 6.428376755631553e-06, "loss": 1.3376318216323853, "step": 2669 }, { "epoch": 1.4023109243697478, "grad_norm": 9.111471869390265, "learning_rate": 6.4254478203854175e-06, "loss": 1.1048033237457275, "step": 2670 }, { "epoch": 1.4028361344537816, "grad_norm": 8.633811759504914, "learning_rate": 6.422518352632898e-06, "loss": 1.1107443571090698, "step": 2671 }, { "epoch": 1.403361344537815, "grad_norm": 9.614913628192502, "learning_rate": 6.419588353468361e-06, "loss": 1.5569854974746704, "step": 2672 }, { "epoch": 1.4038865546218489, "grad_norm": 14.525560523011887, "learning_rate": 6.41665782398637e-06, "loss": 1.8844988346099854, "step": 2673 }, { "epoch": 1.4044117647058822, "grad_norm": 13.854399534689552, "learning_rate": 6.413726765281685e-06, "loss": 1.430602788925171, "step": 2674 }, { "epoch": 1.404936974789916, "grad_norm": 10.373295258274258, "learning_rate": 6.410795178449266e-06, "loss": 0.9985050559043884, "step": 2675 }, { "epoch": 1.4054621848739495, "grad_norm": 10.760439519982976, "learning_rate": 6.407863064584271e-06, "loss": 1.928354024887085, "step": 2676 }, { "epoch": 1.4059873949579833, "grad_norm": 12.748172047930892, "learning_rate": 6.404930424782052e-06, "loss": 1.3580679893493652, "step": 2677 }, { "epoch": 1.4065126050420167, "grad_norm": 7.350157703996195, "learning_rate": 6.40199726013816e-06, "loss": 1.6785142421722412, "step": 2678 }, { "epoch": 1.4070378151260505, "grad_norm": 10.02657720960701, "learning_rate": 6.3990635717483404e-06, "loss": 1.535596489906311, "step": 2679 }, { "epoch": 1.407563025210084, "grad_norm": 9.162115309527774, "learning_rate": 6.396129360708537e-06, "loss": 1.3276948928833008, "step": 2680 }, { "epoch": 1.4080882352941178, "grad_norm": 8.401950811227174, "learning_rate": 6.393194628114885e-06, "loss": 1.2496113777160645, "step": 2681 }, { "epoch": 1.4086134453781511, "grad_norm": 18.966908845623266, "learning_rate": 6.390259375063714e-06, "loss": 1.3329956531524658, "step": 2682 }, { "epoch": 1.409138655462185, "grad_norm": 11.926368754198064, "learning_rate": 6.387323602651554e-06, "loss": 1.5731288194656372, "step": 2683 }, { "epoch": 1.4096638655462184, "grad_norm": 14.364387574817396, "learning_rate": 6.384387311975124e-06, "loss": 1.4846248626708984, "step": 2684 }, { "epoch": 1.4101890756302522, "grad_norm": 11.325711220917505, "learning_rate": 6.381450504131339e-06, "loss": 1.1658028364181519, "step": 2685 }, { "epoch": 1.4107142857142856, "grad_norm": 9.454291241719664, "learning_rate": 6.378513180217303e-06, "loss": 1.37001633644104, "step": 2686 }, { "epoch": 1.4112394957983194, "grad_norm": 11.729171590517273, "learning_rate": 6.37557534133032e-06, "loss": 1.3812564611434937, "step": 2687 }, { "epoch": 1.4117647058823528, "grad_norm": 9.110161139375982, "learning_rate": 6.3726369885678785e-06, "loss": 1.5691630840301514, "step": 2688 }, { "epoch": 1.4122899159663866, "grad_norm": 11.666376825432682, "learning_rate": 6.369698123027664e-06, "loss": 1.8340256214141846, "step": 2689 }, { "epoch": 1.41281512605042, "grad_norm": 13.329603843645343, "learning_rate": 6.366758745807554e-06, "loss": 1.6556529998779297, "step": 2690 }, { "epoch": 1.4133403361344539, "grad_norm": 10.920192684086777, "learning_rate": 6.363818858005614e-06, "loss": 1.2496334314346313, "step": 2691 }, { "epoch": 1.4138655462184873, "grad_norm": 21.030961907191475, "learning_rate": 6.360878460720101e-06, "loss": 2.0735185146331787, "step": 2692 }, { "epoch": 1.414390756302521, "grad_norm": 12.238898954810804, "learning_rate": 6.357937555049465e-06, "loss": 2.2130722999572754, "step": 2693 }, { "epoch": 1.4149159663865547, "grad_norm": 11.893673609094371, "learning_rate": 6.354996142092343e-06, "loss": 1.3949319124221802, "step": 2694 }, { "epoch": 1.4154411764705883, "grad_norm": 6.369629277415591, "learning_rate": 6.35205422294756e-06, "loss": 1.3924736976623535, "step": 2695 }, { "epoch": 1.415966386554622, "grad_norm": 9.636829463557582, "learning_rate": 6.349111798714136e-06, "loss": 1.8141247034072876, "step": 2696 }, { "epoch": 1.4164915966386555, "grad_norm": 15.188161640453659, "learning_rate": 6.3461688704912735e-06, "loss": 1.7034218311309814, "step": 2697 }, { "epoch": 1.4170168067226891, "grad_norm": 12.406060949782468, "learning_rate": 6.34322543937837e-06, "loss": 2.171898126602173, "step": 2698 }, { "epoch": 1.4175420168067228, "grad_norm": 16.879363153798028, "learning_rate": 6.340281506475003e-06, "loss": 1.3152217864990234, "step": 2699 }, { "epoch": 1.4180672268907564, "grad_norm": 11.897718407530666, "learning_rate": 6.337337072880942e-06, "loss": 2.0754623413085938, "step": 2700 }, { "epoch": 1.41859243697479, "grad_norm": 8.097156028830476, "learning_rate": 6.334392139696144e-06, "loss": 1.7747178077697754, "step": 2701 }, { "epoch": 1.4191176470588236, "grad_norm": 15.835349283719063, "learning_rate": 6.331446708020751e-06, "loss": 1.4925724267959595, "step": 2702 }, { "epoch": 1.4196428571428572, "grad_norm": 8.972494776001891, "learning_rate": 6.328500778955091e-06, "loss": 1.1682425737380981, "step": 2703 }, { "epoch": 1.4201680672268908, "grad_norm": 16.005993054048783, "learning_rate": 6.325554353599681e-06, "loss": 1.5616768598556519, "step": 2704 }, { "epoch": 1.4206932773109244, "grad_norm": 9.53703014155293, "learning_rate": 6.322607433055217e-06, "loss": 1.3121098279953003, "step": 2705 }, { "epoch": 1.421218487394958, "grad_norm": 9.866894377440097, "learning_rate": 6.3196600184225875e-06, "loss": 1.2491700649261475, "step": 2706 }, { "epoch": 1.4217436974789917, "grad_norm": 12.107020031566593, "learning_rate": 6.31671211080286e-06, "loss": 1.3733625411987305, "step": 2707 }, { "epoch": 1.4222689075630253, "grad_norm": 10.488442066499326, "learning_rate": 6.31376371129729e-06, "loss": 1.749148964881897, "step": 2708 }, { "epoch": 1.4227941176470589, "grad_norm": 16.226851146659296, "learning_rate": 6.310814821007312e-06, "loss": 1.3072221279144287, "step": 2709 }, { "epoch": 1.4233193277310925, "grad_norm": 8.781505569756783, "learning_rate": 6.3078654410345485e-06, "loss": 1.71349036693573, "step": 2710 }, { "epoch": 1.423844537815126, "grad_norm": 10.727948604474648, "learning_rate": 6.304915572480803e-06, "loss": 1.785348653793335, "step": 2711 }, { "epoch": 1.4243697478991597, "grad_norm": 8.522798288972508, "learning_rate": 6.301965216448062e-06, "loss": 0.9674264192581177, "step": 2712 }, { "epoch": 1.4248949579831933, "grad_norm": 17.51722415753573, "learning_rate": 6.299014374038493e-06, "loss": 1.4388782978057861, "step": 2713 }, { "epoch": 1.425420168067227, "grad_norm": 8.741920039837364, "learning_rate": 6.296063046354448e-06, "loss": 1.347830057144165, "step": 2714 }, { "epoch": 1.4259453781512605, "grad_norm": 9.675295109035934, "learning_rate": 6.293111234498456e-06, "loss": 1.2614622116088867, "step": 2715 }, { "epoch": 1.4264705882352942, "grad_norm": 12.468022455838982, "learning_rate": 6.29015893957323e-06, "loss": 1.663257360458374, "step": 2716 }, { "epoch": 1.4269957983193278, "grad_norm": 8.781387102591754, "learning_rate": 6.287206162681663e-06, "loss": 1.2915159463882446, "step": 2717 }, { "epoch": 1.4275210084033614, "grad_norm": 15.535814336336738, "learning_rate": 6.284252904926826e-06, "loss": 1.846402645111084, "step": 2718 }, { "epoch": 1.428046218487395, "grad_norm": 11.130476806401775, "learning_rate": 6.281299167411975e-06, "loss": 2.127185344696045, "step": 2719 }, { "epoch": 1.4285714285714286, "grad_norm": 21.313055669016336, "learning_rate": 6.278344951240537e-06, "loss": 1.433441162109375, "step": 2720 }, { "epoch": 1.4290966386554622, "grad_norm": 14.531003249615098, "learning_rate": 6.275390257516125e-06, "loss": 1.560739517211914, "step": 2721 }, { "epoch": 1.4296218487394958, "grad_norm": 12.849787430675377, "learning_rate": 6.2724350873425285e-06, "loss": 1.3328757286071777, "step": 2722 }, { "epoch": 1.4301470588235294, "grad_norm": 9.78905227006667, "learning_rate": 6.269479441823712e-06, "loss": 2.651552677154541, "step": 2723 }, { "epoch": 1.430672268907563, "grad_norm": 7.738392960724227, "learning_rate": 6.266523322063821e-06, "loss": 1.7567307949066162, "step": 2724 }, { "epoch": 1.4311974789915967, "grad_norm": 12.502016160279725, "learning_rate": 6.263566729167177e-06, "loss": 1.6257787942886353, "step": 2725 }, { "epoch": 1.4317226890756303, "grad_norm": 17.777974679193576, "learning_rate": 6.260609664238278e-06, "loss": 1.6813796758651733, "step": 2726 }, { "epoch": 1.4322478991596639, "grad_norm": 10.9861645693464, "learning_rate": 6.257652128381798e-06, "loss": 1.2643649578094482, "step": 2727 }, { "epoch": 1.4327731092436975, "grad_norm": 13.62052228931387, "learning_rate": 6.254694122702589e-06, "loss": 0.9569892883300781, "step": 2728 }, { "epoch": 1.433298319327731, "grad_norm": 11.133732791010567, "learning_rate": 6.251735648305676e-06, "loss": 1.6710110902786255, "step": 2729 }, { "epoch": 1.4338235294117647, "grad_norm": 12.914654977164414, "learning_rate": 6.24877670629626e-06, "loss": 1.3672080039978027, "step": 2730 }, { "epoch": 1.4343487394957983, "grad_norm": 12.281255047032186, "learning_rate": 6.245817297779716e-06, "loss": 2.125563621520996, "step": 2731 }, { "epoch": 1.434873949579832, "grad_norm": 9.098304820351398, "learning_rate": 6.242857423861597e-06, "loss": 1.7239699363708496, "step": 2732 }, { "epoch": 1.4353991596638656, "grad_norm": 6.934151955267918, "learning_rate": 6.239897085647624e-06, "loss": 1.8293339014053345, "step": 2733 }, { "epoch": 1.4359243697478992, "grad_norm": 21.139878296146573, "learning_rate": 6.236936284243695e-06, "loss": 1.8869755268096924, "step": 2734 }, { "epoch": 1.4364495798319328, "grad_norm": 8.508829231590704, "learning_rate": 6.23397502075588e-06, "loss": 1.1078177690505981, "step": 2735 }, { "epoch": 1.4369747899159664, "grad_norm": 8.729160781114265, "learning_rate": 6.231013296290425e-06, "loss": 1.2480807304382324, "step": 2736 }, { "epoch": 1.4375, "grad_norm": 13.610444769345298, "learning_rate": 6.228051111953742e-06, "loss": 2.1868491172790527, "step": 2737 }, { "epoch": 1.4380252100840336, "grad_norm": 8.428366000493645, "learning_rate": 6.225088468852418e-06, "loss": 1.2806732654571533, "step": 2738 }, { "epoch": 1.4385504201680672, "grad_norm": 10.604402798905818, "learning_rate": 6.222125368093213e-06, "loss": 1.1063880920410156, "step": 2739 }, { "epoch": 1.4390756302521008, "grad_norm": 31.431759017420447, "learning_rate": 6.219161810783057e-06, "loss": 2.608151912689209, "step": 2740 }, { "epoch": 1.4396008403361344, "grad_norm": 9.704102880620201, "learning_rate": 6.216197798029049e-06, "loss": 1.8975062370300293, "step": 2741 }, { "epoch": 1.440126050420168, "grad_norm": 8.786043840419737, "learning_rate": 6.213233330938456e-06, "loss": 1.8726248741149902, "step": 2742 }, { "epoch": 1.4406512605042017, "grad_norm": 15.659039793981638, "learning_rate": 6.210268410618723e-06, "loss": 1.9601554870605469, "step": 2743 }, { "epoch": 1.4411764705882353, "grad_norm": 9.081157829260926, "learning_rate": 6.207303038177457e-06, "loss": 2.228078842163086, "step": 2744 }, { "epoch": 1.441701680672269, "grad_norm": 9.77563223858514, "learning_rate": 6.204337214722435e-06, "loss": 1.5869779586791992, "step": 2745 }, { "epoch": 1.4422268907563025, "grad_norm": 8.54218598421352, "learning_rate": 6.201370941361603e-06, "loss": 2.2011094093322754, "step": 2746 }, { "epoch": 1.4427521008403361, "grad_norm": 12.422908355758468, "learning_rate": 6.198404219203078e-06, "loss": 1.746273398399353, "step": 2747 }, { "epoch": 1.4432773109243697, "grad_norm": 10.784065381502472, "learning_rate": 6.1954370493551415e-06, "loss": 1.5087051391601562, "step": 2748 }, { "epoch": 1.4438025210084033, "grad_norm": 7.654085791132907, "learning_rate": 6.192469432926241e-06, "loss": 1.4601963758468628, "step": 2749 }, { "epoch": 1.444327731092437, "grad_norm": 11.055293317789749, "learning_rate": 6.189501371024995e-06, "loss": 1.2775212526321411, "step": 2750 }, { "epoch": 1.4448529411764706, "grad_norm": 11.233046445606105, "learning_rate": 6.186532864760186e-06, "loss": 1.5675815343856812, "step": 2751 }, { "epoch": 1.4453781512605042, "grad_norm": 9.430242842745319, "learning_rate": 6.183563915240763e-06, "loss": 1.4484829902648926, "step": 2752 }, { "epoch": 1.4459033613445378, "grad_norm": 9.650257345529285, "learning_rate": 6.180594523575838e-06, "loss": 1.5815004110336304, "step": 2753 }, { "epoch": 1.4464285714285714, "grad_norm": 16.290533767329066, "learning_rate": 6.177624690874693e-06, "loss": 2.012706756591797, "step": 2754 }, { "epoch": 1.446953781512605, "grad_norm": 9.52004766857945, "learning_rate": 6.174654418246772e-06, "loss": 1.7970008850097656, "step": 2755 }, { "epoch": 1.4474789915966386, "grad_norm": 11.0420388819629, "learning_rate": 6.1716837068016825e-06, "loss": 0.7532917261123657, "step": 2756 }, { "epoch": 1.4480042016806722, "grad_norm": 9.032300626920227, "learning_rate": 6.1687125576491945e-06, "loss": 1.4576060771942139, "step": 2757 }, { "epoch": 1.4485294117647058, "grad_norm": 7.431941950250911, "learning_rate": 6.16574097189925e-06, "loss": 1.5218952894210815, "step": 2758 }, { "epoch": 1.4490546218487395, "grad_norm": 13.848389559198779, "learning_rate": 6.162768950661945e-06, "loss": 1.6575257778167725, "step": 2759 }, { "epoch": 1.449579831932773, "grad_norm": 10.205416632073112, "learning_rate": 6.15979649504754e-06, "loss": 2.0593268871307373, "step": 2760 }, { "epoch": 1.4501050420168067, "grad_norm": 14.671209130408167, "learning_rate": 6.156823606166461e-06, "loss": 1.703036904335022, "step": 2761 }, { "epoch": 1.4506302521008403, "grad_norm": 14.195892914638451, "learning_rate": 6.153850285129293e-06, "loss": 1.147667646408081, "step": 2762 }, { "epoch": 1.451155462184874, "grad_norm": 13.269428621486092, "learning_rate": 6.150876533046784e-06, "loss": 1.3205734491348267, "step": 2763 }, { "epoch": 1.4516806722689075, "grad_norm": 9.353869151990617, "learning_rate": 6.147902351029842e-06, "loss": 1.0645673274993896, "step": 2764 }, { "epoch": 1.4522058823529411, "grad_norm": 19.86336156613061, "learning_rate": 6.144927740189537e-06, "loss": 1.4619238376617432, "step": 2765 }, { "epoch": 1.4527310924369747, "grad_norm": 15.934684600251815, "learning_rate": 6.141952701637098e-06, "loss": 1.5207512378692627, "step": 2766 }, { "epoch": 1.4532563025210083, "grad_norm": 8.923483655391978, "learning_rate": 6.138977236483912e-06, "loss": 1.8451919555664062, "step": 2767 }, { "epoch": 1.453781512605042, "grad_norm": 15.248024961440924, "learning_rate": 6.1360013458415276e-06, "loss": 1.460693120956421, "step": 2768 }, { "epoch": 1.4543067226890756, "grad_norm": 8.404726421486403, "learning_rate": 6.133025030821656e-06, "loss": 2.239863157272339, "step": 2769 }, { "epoch": 1.4548319327731092, "grad_norm": 13.586024669612279, "learning_rate": 6.130048292536158e-06, "loss": 1.5917932987213135, "step": 2770 }, { "epoch": 1.4553571428571428, "grad_norm": 8.033148961170872, "learning_rate": 6.127071132097061e-06, "loss": 1.2371349334716797, "step": 2771 }, { "epoch": 1.4558823529411764, "grad_norm": 13.998857978072763, "learning_rate": 6.124093550616544e-06, "loss": 1.7588425874710083, "step": 2772 }, { "epoch": 1.45640756302521, "grad_norm": 17.617515653278907, "learning_rate": 6.12111554920695e-06, "loss": 1.9410431385040283, "step": 2773 }, { "epoch": 1.4569327731092436, "grad_norm": 9.161409370597044, "learning_rate": 6.118137128980771e-06, "loss": 0.9624344706535339, "step": 2774 }, { "epoch": 1.4574579831932772, "grad_norm": 9.52809010695272, "learning_rate": 6.11515829105066e-06, "loss": 1.4326848983764648, "step": 2775 }, { "epoch": 1.4579831932773109, "grad_norm": 10.023203255773875, "learning_rate": 6.112179036529426e-06, "loss": 1.4362763166427612, "step": 2776 }, { "epoch": 1.4585084033613445, "grad_norm": 12.801240692566088, "learning_rate": 6.1091993665300354e-06, "loss": 1.397044062614441, "step": 2777 }, { "epoch": 1.459033613445378, "grad_norm": 15.212209550852911, "learning_rate": 6.106219282165603e-06, "loss": 1.1173803806304932, "step": 2778 }, { "epoch": 1.4595588235294117, "grad_norm": 10.63706747298648, "learning_rate": 6.103238784549404e-06, "loss": 1.6797499656677246, "step": 2779 }, { "epoch": 1.4600840336134453, "grad_norm": 7.368362917754149, "learning_rate": 6.1002578747948686e-06, "loss": 1.9404041767120361, "step": 2780 }, { "epoch": 1.460609243697479, "grad_norm": 16.695431205021084, "learning_rate": 6.0972765540155764e-06, "loss": 1.6921062469482422, "step": 2781 }, { "epoch": 1.4611344537815127, "grad_norm": 18.507486596298616, "learning_rate": 6.0942948233252655e-06, "loss": 1.4276188611984253, "step": 2782 }, { "epoch": 1.4616596638655461, "grad_norm": 13.979346214698044, "learning_rate": 6.091312683837823e-06, "loss": 1.2384809255599976, "step": 2783 }, { "epoch": 1.46218487394958, "grad_norm": 11.236858096804077, "learning_rate": 6.088330136667294e-06, "loss": 1.4504996538162231, "step": 2784 }, { "epoch": 1.4627100840336134, "grad_norm": 9.754105170854729, "learning_rate": 6.08534718292787e-06, "loss": 2.2792468070983887, "step": 2785 }, { "epoch": 1.4632352941176472, "grad_norm": 10.261888445214334, "learning_rate": 6.082363823733897e-06, "loss": 1.542618751525879, "step": 2786 }, { "epoch": 1.4637605042016806, "grad_norm": 15.713836107112463, "learning_rate": 6.07938006019987e-06, "loss": 1.386710524559021, "step": 2787 }, { "epoch": 1.4642857142857144, "grad_norm": 10.27332010013314, "learning_rate": 6.076395893440442e-06, "loss": 1.9322963953018188, "step": 2788 }, { "epoch": 1.4648109243697478, "grad_norm": 12.404609035213884, "learning_rate": 6.07341132457041e-06, "loss": 1.8743345737457275, "step": 2789 }, { "epoch": 1.4653361344537816, "grad_norm": 9.417357232035894, "learning_rate": 6.070426354704723e-06, "loss": 1.754909634590149, "step": 2790 }, { "epoch": 1.465861344537815, "grad_norm": 9.593555949937741, "learning_rate": 6.067440984958479e-06, "loss": 1.671936273574829, "step": 2791 }, { "epoch": 1.4663865546218489, "grad_norm": 7.774702693146011, "learning_rate": 6.064455216446929e-06, "loss": 1.509257435798645, "step": 2792 }, { "epoch": 1.4669117647058822, "grad_norm": 17.78768586767669, "learning_rate": 6.061469050285469e-06, "loss": 1.269576072692871, "step": 2793 }, { "epoch": 1.467436974789916, "grad_norm": 13.45180608305342, "learning_rate": 6.058482487589644e-06, "loss": 1.6840946674346924, "step": 2794 }, { "epoch": 1.4679621848739495, "grad_norm": 7.7412920982889295, "learning_rate": 6.055495529475149e-06, "loss": 1.9168556928634644, "step": 2795 }, { "epoch": 1.4684873949579833, "grad_norm": 7.294054716643995, "learning_rate": 6.0525081770578265e-06, "loss": 1.2719926834106445, "step": 2796 }, { "epoch": 1.4690126050420167, "grad_norm": 11.068267519959369, "learning_rate": 6.049520431453666e-06, "loss": 1.2731499671936035, "step": 2797 }, { "epoch": 1.4695378151260505, "grad_norm": 9.807784474363542, "learning_rate": 6.0465322937788e-06, "loss": 1.6818287372589111, "step": 2798 }, { "epoch": 1.470063025210084, "grad_norm": 12.181562084315226, "learning_rate": 6.043543765149514e-06, "loss": 1.31424880027771, "step": 2799 }, { "epoch": 1.4705882352941178, "grad_norm": 9.106684508139276, "learning_rate": 6.040554846682237e-06, "loss": 2.372061252593994, "step": 2800 }, { "epoch": 1.4711134453781511, "grad_norm": 9.874967096732872, "learning_rate": 6.037565539493542e-06, "loss": 1.5737472772598267, "step": 2801 }, { "epoch": 1.471638655462185, "grad_norm": 8.045375010534565, "learning_rate": 6.034575844700148e-06, "loss": 1.7884564399719238, "step": 2802 }, { "epoch": 1.4721638655462184, "grad_norm": 11.35333671438149, "learning_rate": 6.031585763418919e-06, "loss": 1.6468238830566406, "step": 2803 }, { "epoch": 1.4726890756302522, "grad_norm": 8.28167539536036, "learning_rate": 6.028595296766865e-06, "loss": 1.7392634153366089, "step": 2804 }, { "epoch": 1.4732142857142856, "grad_norm": 8.540889665677351, "learning_rate": 6.025604445861137e-06, "loss": 1.6154978275299072, "step": 2805 }, { "epoch": 1.4737394957983194, "grad_norm": 15.244155133260243, "learning_rate": 6.02261321181903e-06, "loss": 1.1525774002075195, "step": 2806 }, { "epoch": 1.4742647058823528, "grad_norm": 15.04475936959345, "learning_rate": 6.019621595757987e-06, "loss": 2.7511398792266846, "step": 2807 }, { "epoch": 1.4747899159663866, "grad_norm": 8.409463833082263, "learning_rate": 6.016629598795587e-06, "loss": 2.1453425884246826, "step": 2808 }, { "epoch": 1.47531512605042, "grad_norm": 10.467846936437327, "learning_rate": 6.013637222049554e-06, "loss": 1.3417030572891235, "step": 2809 }, { "epoch": 1.4758403361344539, "grad_norm": 18.74141609993802, "learning_rate": 6.010644466637756e-06, "loss": 1.423673152923584, "step": 2810 }, { "epoch": 1.4763655462184873, "grad_norm": 11.343846267002052, "learning_rate": 6.007651333678199e-06, "loss": 1.3917582035064697, "step": 2811 }, { "epoch": 1.476890756302521, "grad_norm": 11.211814827240355, "learning_rate": 6.004657824289031e-06, "loss": 1.8203171491622925, "step": 2812 }, { "epoch": 1.4774159663865547, "grad_norm": 8.32431234549036, "learning_rate": 6.0016639395885424e-06, "loss": 1.677531123161316, "step": 2813 }, { "epoch": 1.4779411764705883, "grad_norm": 9.081860364888588, "learning_rate": 5.9986696806951625e-06, "loss": 1.5137662887573242, "step": 2814 }, { "epoch": 1.478466386554622, "grad_norm": 7.08997854765275, "learning_rate": 5.995675048727461e-06, "loss": 1.5967217683792114, "step": 2815 }, { "epoch": 1.4789915966386555, "grad_norm": 11.620168337811307, "learning_rate": 5.9926800448041446e-06, "loss": 2.290982723236084, "step": 2816 }, { "epoch": 1.4795168067226891, "grad_norm": 9.169178568275145, "learning_rate": 5.98968467004406e-06, "loss": 1.9395719766616821, "step": 2817 }, { "epoch": 1.4800420168067228, "grad_norm": 19.43973270209733, "learning_rate": 5.986688925566198e-06, "loss": 1.914243221282959, "step": 2818 }, { "epoch": 1.4805672268907564, "grad_norm": 14.79213281848686, "learning_rate": 5.983692812489679e-06, "loss": 1.8757904767990112, "step": 2819 }, { "epoch": 1.48109243697479, "grad_norm": 6.623780138555651, "learning_rate": 5.980696331933764e-06, "loss": 1.7306898832321167, "step": 2820 }, { "epoch": 1.4816176470588236, "grad_norm": 14.665166895474977, "learning_rate": 5.977699485017855e-06, "loss": 1.2540236711502075, "step": 2821 }, { "epoch": 1.4821428571428572, "grad_norm": 8.833773573076922, "learning_rate": 5.974702272861487e-06, "loss": 1.315044641494751, "step": 2822 }, { "epoch": 1.4826680672268908, "grad_norm": 10.48223864739591, "learning_rate": 5.971704696584332e-06, "loss": 1.7251434326171875, "step": 2823 }, { "epoch": 1.4831932773109244, "grad_norm": 11.048554063714633, "learning_rate": 5.9687067573061965e-06, "loss": 1.7319231033325195, "step": 2824 }, { "epoch": 1.483718487394958, "grad_norm": 8.38825084535047, "learning_rate": 5.965708456147028e-06, "loss": 1.8665804862976074, "step": 2825 }, { "epoch": 1.4842436974789917, "grad_norm": 7.369771367824362, "learning_rate": 5.962709794226905e-06, "loss": 0.691155731678009, "step": 2826 }, { "epoch": 1.4847689075630253, "grad_norm": 14.073045570916406, "learning_rate": 5.959710772666041e-06, "loss": 1.813007116317749, "step": 2827 }, { "epoch": 1.4852941176470589, "grad_norm": 9.572866240140566, "learning_rate": 5.956711392584782e-06, "loss": 2.897813558578491, "step": 2828 }, { "epoch": 1.4858193277310925, "grad_norm": 16.91968264850313, "learning_rate": 5.953711655103615e-06, "loss": 1.4889321327209473, "step": 2829 }, { "epoch": 1.486344537815126, "grad_norm": 9.994243480776813, "learning_rate": 5.950711561343152e-06, "loss": 1.1888728141784668, "step": 2830 }, { "epoch": 1.4868697478991597, "grad_norm": 18.090993697846038, "learning_rate": 5.947711112424142e-06, "loss": 2.311788558959961, "step": 2831 }, { "epoch": 1.4873949579831933, "grad_norm": 10.698621788325813, "learning_rate": 5.94471030946747e-06, "loss": 1.3138668537139893, "step": 2832 }, { "epoch": 1.487920168067227, "grad_norm": 15.754072248005881, "learning_rate": 5.941709153594146e-06, "loss": 1.4495904445648193, "step": 2833 }, { "epoch": 1.4884453781512605, "grad_norm": 15.435243495547523, "learning_rate": 5.9387076459253175e-06, "loss": 1.3696357011795044, "step": 2834 }, { "epoch": 1.4889705882352942, "grad_norm": 9.970533354540235, "learning_rate": 5.935705787582261e-06, "loss": 1.2953077554702759, "step": 2835 }, { "epoch": 1.4894957983193278, "grad_norm": 11.608649036564305, "learning_rate": 5.932703579686385e-06, "loss": 2.515267848968506, "step": 2836 }, { "epoch": 1.4900210084033614, "grad_norm": 11.861396337213627, "learning_rate": 5.92970102335923e-06, "loss": 1.2390263080596924, "step": 2837 }, { "epoch": 1.490546218487395, "grad_norm": 10.330756083096254, "learning_rate": 5.9266981197224615e-06, "loss": 1.96694016456604, "step": 2838 }, { "epoch": 1.4910714285714286, "grad_norm": 9.1598301030574, "learning_rate": 5.923694869897879e-06, "loss": 2.5066561698913574, "step": 2839 }, { "epoch": 1.4915966386554622, "grad_norm": 9.009991462237936, "learning_rate": 5.920691275007412e-06, "loss": 1.6626849174499512, "step": 2840 }, { "epoch": 1.4921218487394958, "grad_norm": 14.128976680056349, "learning_rate": 5.917687336173116e-06, "loss": 1.5072438716888428, "step": 2841 }, { "epoch": 1.4926470588235294, "grad_norm": 13.988741293808523, "learning_rate": 5.914683054517176e-06, "loss": 2.1789119243621826, "step": 2842 }, { "epoch": 1.493172268907563, "grad_norm": 10.173288182889692, "learning_rate": 5.911678431161907e-06, "loss": 1.802624225616455, "step": 2843 }, { "epoch": 1.4936974789915967, "grad_norm": 11.26877200199326, "learning_rate": 5.908673467229749e-06, "loss": 1.7642006874084473, "step": 2844 }, { "epoch": 1.4942226890756303, "grad_norm": 19.46610092594002, "learning_rate": 5.905668163843269e-06, "loss": 1.7786991596221924, "step": 2845 }, { "epoch": 1.4947478991596639, "grad_norm": 18.165207809056835, "learning_rate": 5.902662522125163e-06, "loss": 1.3276612758636475, "step": 2846 }, { "epoch": 1.4952731092436975, "grad_norm": 12.780426008235525, "learning_rate": 5.899656543198254e-06, "loss": 1.3048717975616455, "step": 2847 }, { "epoch": 1.495798319327731, "grad_norm": 12.545158971853072, "learning_rate": 5.8966502281854885e-06, "loss": 2.6292881965637207, "step": 2848 }, { "epoch": 1.4963235294117647, "grad_norm": 14.0427189863656, "learning_rate": 5.893643578209939e-06, "loss": 1.301329493522644, "step": 2849 }, { "epoch": 1.4968487394957983, "grad_norm": 14.90071697810995, "learning_rate": 5.890636594394803e-06, "loss": 1.9319939613342285, "step": 2850 }, { "epoch": 1.497373949579832, "grad_norm": 10.235982684128693, "learning_rate": 5.887629277863405e-06, "loss": 1.2810033559799194, "step": 2851 }, { "epoch": 1.4978991596638656, "grad_norm": 11.051027681332855, "learning_rate": 5.884621629739191e-06, "loss": 1.858025074005127, "step": 2852 }, { "epoch": 1.4984243697478992, "grad_norm": 11.768480745372527, "learning_rate": 5.881613651145732e-06, "loss": 1.4819607734680176, "step": 2853 }, { "epoch": 1.4989495798319328, "grad_norm": 10.47462659426203, "learning_rate": 5.878605343206722e-06, "loss": 1.3072776794433594, "step": 2854 }, { "epoch": 1.4994747899159664, "grad_norm": 9.485016559187, "learning_rate": 5.875596707045982e-06, "loss": 1.3459935188293457, "step": 2855 }, { "epoch": 1.5, "grad_norm": 10.238123655792794, "learning_rate": 5.872587743787447e-06, "loss": 1.3451051712036133, "step": 2856 }, { "epoch": 1.5005252100840336, "grad_norm": 17.508158441331556, "learning_rate": 5.8695784545551815e-06, "loss": 1.6816987991333008, "step": 2857 }, { "epoch": 1.5010504201680672, "grad_norm": 13.919394577120519, "learning_rate": 5.86656884047337e-06, "loss": 2.0135819911956787, "step": 2858 }, { "epoch": 1.5015756302521008, "grad_norm": 9.85490795789548, "learning_rate": 5.863558902666318e-06, "loss": 1.4393882751464844, "step": 2859 }, { "epoch": 1.5021008403361344, "grad_norm": 12.027119139841197, "learning_rate": 5.860548642258451e-06, "loss": 1.8411206007003784, "step": 2860 }, { "epoch": 1.502626050420168, "grad_norm": 11.578607467451782, "learning_rate": 5.8575380603743155e-06, "loss": 1.3521913290023804, "step": 2861 }, { "epoch": 1.5031512605042017, "grad_norm": 15.786064218086343, "learning_rate": 5.85452715813858e-06, "loss": 1.388970971107483, "step": 2862 }, { "epoch": 1.5036764705882353, "grad_norm": 20.27257774565252, "learning_rate": 5.851515936676031e-06, "loss": 1.084679126739502, "step": 2863 }, { "epoch": 1.504201680672269, "grad_norm": 14.026872328523538, "learning_rate": 5.848504397111573e-06, "loss": 1.4439821243286133, "step": 2864 }, { "epoch": 1.5047268907563025, "grad_norm": 38.96072770612892, "learning_rate": 5.8454925405702326e-06, "loss": 1.7968759536743164, "step": 2865 }, { "epoch": 1.5052521008403361, "grad_norm": 23.567587821248285, "learning_rate": 5.8424803681771505e-06, "loss": 1.5943264961242676, "step": 2866 }, { "epoch": 1.5057773109243697, "grad_norm": 19.27584586072049, "learning_rate": 5.83946788105759e-06, "loss": 1.861040472984314, "step": 2867 }, { "epoch": 1.5063025210084033, "grad_norm": 11.820467374303973, "learning_rate": 5.836455080336929e-06, "loss": 1.2037864923477173, "step": 2868 }, { "epoch": 1.506827731092437, "grad_norm": 15.137479556828536, "learning_rate": 5.833441967140662e-06, "loss": 1.1817970275878906, "step": 2869 }, { "epoch": 1.5073529411764706, "grad_norm": 8.516086510212022, "learning_rate": 5.830428542594404e-06, "loss": 1.6967062950134277, "step": 2870 }, { "epoch": 1.5078781512605042, "grad_norm": 10.560017119280444, "learning_rate": 5.827414807823884e-06, "loss": 1.0605194568634033, "step": 2871 }, { "epoch": 1.5084033613445378, "grad_norm": 10.834636238912983, "learning_rate": 5.824400763954944e-06, "loss": 2.2708017826080322, "step": 2872 }, { "epoch": 1.5089285714285714, "grad_norm": 11.979837739536155, "learning_rate": 5.821386412113546e-06, "loss": 2.6514854431152344, "step": 2873 }, { "epoch": 1.509453781512605, "grad_norm": 19.12069884389941, "learning_rate": 5.818371753425764e-06, "loss": 1.6612210273742676, "step": 2874 }, { "epoch": 1.5099789915966386, "grad_norm": 9.907309346645576, "learning_rate": 5.815356789017791e-06, "loss": 1.5609991550445557, "step": 2875 }, { "epoch": 1.5105042016806722, "grad_norm": 14.457357561212142, "learning_rate": 5.812341520015929e-06, "loss": 2.3147692680358887, "step": 2876 }, { "epoch": 1.5110294117647058, "grad_norm": 9.089480918573374, "learning_rate": 5.809325947546596e-06, "loss": 1.7276939153671265, "step": 2877 }, { "epoch": 1.5115546218487395, "grad_norm": 8.081566611738515, "learning_rate": 5.806310072736323e-06, "loss": 1.9004578590393066, "step": 2878 }, { "epoch": 1.512079831932773, "grad_norm": 13.897502533404047, "learning_rate": 5.803293896711756e-06, "loss": 1.6075999736785889, "step": 2879 }, { "epoch": 1.5126050420168067, "grad_norm": 10.021929429682709, "learning_rate": 5.800277420599649e-06, "loss": 2.075315475463867, "step": 2880 }, { "epoch": 1.5131302521008403, "grad_norm": 9.306843698596571, "learning_rate": 5.797260645526873e-06, "loss": 1.6688923835754395, "step": 2881 }, { "epoch": 1.513655462184874, "grad_norm": 10.036367052729391, "learning_rate": 5.794243572620408e-06, "loss": 1.193693995475769, "step": 2882 }, { "epoch": 1.5141806722689075, "grad_norm": 10.740232814889865, "learning_rate": 5.791226203007346e-06, "loss": 2.004913091659546, "step": 2883 }, { "epoch": 1.5147058823529411, "grad_norm": 11.999598724419366, "learning_rate": 5.788208537814889e-06, "loss": 1.4250155687332153, "step": 2884 }, { "epoch": 1.5152310924369747, "grad_norm": 12.223837924953441, "learning_rate": 5.785190578170351e-06, "loss": 1.5953729152679443, "step": 2885 }, { "epoch": 1.5157563025210083, "grad_norm": 24.216139786646707, "learning_rate": 5.782172325201155e-06, "loss": 1.6154977083206177, "step": 2886 }, { "epoch": 1.5162815126050422, "grad_norm": 7.471918874765334, "learning_rate": 5.779153780034833e-06, "loss": 1.424997329711914, "step": 2887 }, { "epoch": 1.5168067226890756, "grad_norm": 13.808427928663413, "learning_rate": 5.7761349437990255e-06, "loss": 1.3849037885665894, "step": 2888 }, { "epoch": 1.5173319327731094, "grad_norm": 9.123275417455284, "learning_rate": 5.773115817621487e-06, "loss": 1.92661714553833, "step": 2889 }, { "epoch": 1.5178571428571428, "grad_norm": 13.72838263775455, "learning_rate": 5.770096402630073e-06, "loss": 1.0330466032028198, "step": 2890 }, { "epoch": 1.5183823529411766, "grad_norm": 7.655300808201822, "learning_rate": 5.767076699952751e-06, "loss": 1.8486248254776, "step": 2891 }, { "epoch": 1.51890756302521, "grad_norm": 7.8014609929651435, "learning_rate": 5.764056710717596e-06, "loss": 1.4996156692504883, "step": 2892 }, { "epoch": 1.5194327731092439, "grad_norm": 12.039568608523037, "learning_rate": 5.761036436052788e-06, "loss": 1.8443527221679688, "step": 2893 }, { "epoch": 1.5199579831932772, "grad_norm": 8.659391542247635, "learning_rate": 5.758015877086616e-06, "loss": 1.3554350137710571, "step": 2894 }, { "epoch": 1.520483193277311, "grad_norm": 12.95903705623489, "learning_rate": 5.754995034947474e-06, "loss": 1.8053975105285645, "step": 2895 }, { "epoch": 1.5210084033613445, "grad_norm": 9.327048639656558, "learning_rate": 5.751973910763862e-06, "loss": 1.6848835945129395, "step": 2896 }, { "epoch": 1.5215336134453783, "grad_norm": 19.302375729414894, "learning_rate": 5.748952505664385e-06, "loss": 3.980196952819824, "step": 2897 }, { "epoch": 1.5220588235294117, "grad_norm": 18.66316165606761, "learning_rate": 5.745930820777753e-06, "loss": 1.6223571300506592, "step": 2898 }, { "epoch": 1.5225840336134455, "grad_norm": 12.734923210269024, "learning_rate": 5.74290885723278e-06, "loss": 1.6548211574554443, "step": 2899 }, { "epoch": 1.523109243697479, "grad_norm": 7.1223652189520665, "learning_rate": 5.739886616158386e-06, "loss": 0.8713272213935852, "step": 2900 }, { "epoch": 1.5236344537815127, "grad_norm": 9.470825827621523, "learning_rate": 5.736864098683595e-06, "loss": 2.23360538482666, "step": 2901 }, { "epoch": 1.5241596638655461, "grad_norm": 8.751133349958469, "learning_rate": 5.7338413059375285e-06, "loss": 1.426957130432129, "step": 2902 }, { "epoch": 1.52468487394958, "grad_norm": 13.733697650918987, "learning_rate": 5.7308182390494185e-06, "loss": 1.4930875301361084, "step": 2903 }, { "epoch": 1.5252100840336134, "grad_norm": 12.982363979908152, "learning_rate": 5.727794899148596e-06, "loss": 1.4669435024261475, "step": 2904 }, { "epoch": 1.5257352941176472, "grad_norm": 15.848490300949237, "learning_rate": 5.724771287364492e-06, "loss": 1.7891262769699097, "step": 2905 }, { "epoch": 1.5262605042016806, "grad_norm": 12.59163818717848, "learning_rate": 5.721747404826641e-06, "loss": 1.8997935056686401, "step": 2906 }, { "epoch": 1.5267857142857144, "grad_norm": 15.280794260801414, "learning_rate": 5.718723252664682e-06, "loss": 1.4946014881134033, "step": 2907 }, { "epoch": 1.5273109243697478, "grad_norm": 9.493566868179185, "learning_rate": 5.7156988320083485e-06, "loss": 1.504456877708435, "step": 2908 }, { "epoch": 1.5278361344537816, "grad_norm": 11.039979166730305, "learning_rate": 5.712674143987478e-06, "loss": 1.36366868019104, "step": 2909 }, { "epoch": 1.528361344537815, "grad_norm": 9.266488878179008, "learning_rate": 5.709649189732006e-06, "loss": 1.622373104095459, "step": 2910 }, { "epoch": 1.5288865546218489, "grad_norm": 13.513185773637307, "learning_rate": 5.706623970371972e-06, "loss": 1.2717863321304321, "step": 2911 }, { "epoch": 1.5294117647058822, "grad_norm": 20.433933596775127, "learning_rate": 5.7035984870375075e-06, "loss": 1.8660664558410645, "step": 2912 }, { "epoch": 1.529936974789916, "grad_norm": 18.23207759654784, "learning_rate": 5.700572740858847e-06, "loss": 1.1541085243225098, "step": 2913 }, { "epoch": 1.5304621848739495, "grad_norm": 13.14098786673324, "learning_rate": 5.697546732966323e-06, "loss": 0.860084056854248, "step": 2914 }, { "epoch": 1.5309873949579833, "grad_norm": 16.3322604680947, "learning_rate": 5.694520464490365e-06, "loss": 1.4657001495361328, "step": 2915 }, { "epoch": 1.5315126050420167, "grad_norm": 17.006787249171897, "learning_rate": 5.6914939365615e-06, "loss": 1.237950325012207, "step": 2916 }, { "epoch": 1.5320378151260505, "grad_norm": 13.491224137589745, "learning_rate": 5.688467150310353e-06, "loss": 1.5527464151382446, "step": 2917 }, { "epoch": 1.532563025210084, "grad_norm": 10.91222177976933, "learning_rate": 5.685440106867642e-06, "loss": 1.6397098302841187, "step": 2918 }, { "epoch": 1.5330882352941178, "grad_norm": 10.607586571968769, "learning_rate": 5.682412807364187e-06, "loss": 2.186699390411377, "step": 2919 }, { "epoch": 1.5336134453781511, "grad_norm": 7.1677369047294235, "learning_rate": 5.6793852529308965e-06, "loss": 1.648890733718872, "step": 2920 }, { "epoch": 1.534138655462185, "grad_norm": 12.859974108533141, "learning_rate": 5.67635744469878e-06, "loss": 1.0208791494369507, "step": 2921 }, { "epoch": 1.5346638655462184, "grad_norm": 9.512064502130299, "learning_rate": 5.67332938379894e-06, "loss": 1.5499924421310425, "step": 2922 }, { "epoch": 1.5351890756302522, "grad_norm": 13.143085652501446, "learning_rate": 5.6703010713625715e-06, "loss": 1.8655292987823486, "step": 2923 }, { "epoch": 1.5357142857142856, "grad_norm": 9.426891918390485, "learning_rate": 5.667272508520968e-06, "loss": 1.5644574165344238, "step": 2924 }, { "epoch": 1.5362394957983194, "grad_norm": 17.52336736081476, "learning_rate": 5.664243696405509e-06, "loss": 2.126572608947754, "step": 2925 }, { "epoch": 1.5367647058823528, "grad_norm": 12.395544361341729, "learning_rate": 5.661214636147676e-06, "loss": 1.1755940914154053, "step": 2926 }, { "epoch": 1.5372899159663866, "grad_norm": 9.61854386385847, "learning_rate": 5.658185328879037e-06, "loss": 1.557518482208252, "step": 2927 }, { "epoch": 1.53781512605042, "grad_norm": 9.378644950617781, "learning_rate": 5.6551557757312536e-06, "loss": 1.422531008720398, "step": 2928 }, { "epoch": 1.5383403361344539, "grad_norm": 11.002887083092874, "learning_rate": 5.652125977836083e-06, "loss": 1.052585244178772, "step": 2929 }, { "epoch": 1.5388655462184873, "grad_norm": 8.084211781317082, "learning_rate": 5.649095936325367e-06, "loss": 1.8470664024353027, "step": 2930 }, { "epoch": 1.539390756302521, "grad_norm": 9.200810971099617, "learning_rate": 5.646065652331045e-06, "loss": 2.062453269958496, "step": 2931 }, { "epoch": 1.5399159663865545, "grad_norm": 17.16929349192554, "learning_rate": 5.643035126985141e-06, "loss": 1.3507643938064575, "step": 2932 }, { "epoch": 1.5404411764705883, "grad_norm": 9.172600802244732, "learning_rate": 5.640004361419776e-06, "loss": 1.5793935060501099, "step": 2933 }, { "epoch": 1.5409663865546217, "grad_norm": 12.898758593657767, "learning_rate": 5.636973356767155e-06, "loss": 1.768698811531067, "step": 2934 }, { "epoch": 1.5414915966386555, "grad_norm": 7.5890227638448575, "learning_rate": 5.633942114159574e-06, "loss": 1.1846299171447754, "step": 2935 }, { "epoch": 1.542016806722689, "grad_norm": 10.259227153472677, "learning_rate": 5.630910634729418e-06, "loss": 0.9566595554351807, "step": 2936 }, { "epoch": 1.5425420168067228, "grad_norm": 13.925142766796322, "learning_rate": 5.627878919609162e-06, "loss": 0.9908967018127441, "step": 2937 }, { "epoch": 1.5430672268907561, "grad_norm": 13.667068248895111, "learning_rate": 5.6248469699313664e-06, "loss": 1.5293811559677124, "step": 2938 }, { "epoch": 1.54359243697479, "grad_norm": 11.894972075141927, "learning_rate": 5.621814786828683e-06, "loss": 1.386354684829712, "step": 2939 }, { "epoch": 1.5441176470588234, "grad_norm": 7.098368154191767, "learning_rate": 5.618782371433844e-06, "loss": 1.5454857349395752, "step": 2940 }, { "epoch": 1.5446428571428572, "grad_norm": 13.395432727163477, "learning_rate": 5.615749724879677e-06, "loss": 1.8663475513458252, "step": 2941 }, { "epoch": 1.5451680672268906, "grad_norm": 8.472009180366035, "learning_rate": 5.6127168482990905e-06, "loss": 1.6517784595489502, "step": 2942 }, { "epoch": 1.5456932773109244, "grad_norm": 13.967163618889897, "learning_rate": 5.609683742825078e-06, "loss": 1.6994513273239136, "step": 2943 }, { "epoch": 1.5462184873949578, "grad_norm": 9.92545576421003, "learning_rate": 5.6066504095907225e-06, "loss": 1.5950546264648438, "step": 2944 }, { "epoch": 1.5467436974789917, "grad_norm": 12.846927435632132, "learning_rate": 5.603616849729191e-06, "loss": 1.1596665382385254, "step": 2945 }, { "epoch": 1.5472689075630253, "grad_norm": 14.860186674245476, "learning_rate": 5.600583064373733e-06, "loss": 1.8125252723693848, "step": 2946 }, { "epoch": 1.5477941176470589, "grad_norm": 17.166450175533903, "learning_rate": 5.5975490546576834e-06, "loss": 1.009368658065796, "step": 2947 }, { "epoch": 1.5483193277310925, "grad_norm": 13.64043369742361, "learning_rate": 5.594514821714462e-06, "loss": 1.7966917753219604, "step": 2948 }, { "epoch": 1.548844537815126, "grad_norm": 10.492051004075568, "learning_rate": 5.591480366677571e-06, "loss": 1.428999423980713, "step": 2949 }, { "epoch": 1.5493697478991597, "grad_norm": 13.030384864945013, "learning_rate": 5.588445690680596e-06, "loss": 1.6084507703781128, "step": 2950 }, { "epoch": 1.5498949579831933, "grad_norm": 10.52587210445605, "learning_rate": 5.585410794857203e-06, "loss": 1.3045061826705933, "step": 2951 }, { "epoch": 1.550420168067227, "grad_norm": 13.819258830455226, "learning_rate": 5.582375680341144e-06, "loss": 1.658186435699463, "step": 2952 }, { "epoch": 1.5509453781512605, "grad_norm": 14.27016281304172, "learning_rate": 5.579340348266251e-06, "loss": 1.7536265850067139, "step": 2953 }, { "epoch": 1.5514705882352942, "grad_norm": 15.953136280416974, "learning_rate": 5.576304799766436e-06, "loss": 2.103753089904785, "step": 2954 }, { "epoch": 1.5519957983193278, "grad_norm": 11.989180422717773, "learning_rate": 5.5732690359756906e-06, "loss": 1.8845781087875366, "step": 2955 }, { "epoch": 1.5525210084033614, "grad_norm": 10.435327143905482, "learning_rate": 5.570233058028092e-06, "loss": 2.2095136642456055, "step": 2956 }, { "epoch": 1.553046218487395, "grad_norm": 18.792925731063573, "learning_rate": 5.5671968670577935e-06, "loss": 1.4067503213882446, "step": 2957 }, { "epoch": 1.5535714285714286, "grad_norm": 14.986631022612844, "learning_rate": 5.564160464199029e-06, "loss": 1.8566360473632812, "step": 2958 }, { "epoch": 1.5540966386554622, "grad_norm": 10.1765285542977, "learning_rate": 5.5611238505861094e-06, "loss": 1.1972577571868896, "step": 2959 }, { "epoch": 1.5546218487394958, "grad_norm": 9.902480520159905, "learning_rate": 5.55808702735343e-06, "loss": 1.118326187133789, "step": 2960 }, { "epoch": 1.5551470588235294, "grad_norm": 9.29939364842904, "learning_rate": 5.55504999563546e-06, "loss": 1.4431086778640747, "step": 2961 }, { "epoch": 1.555672268907563, "grad_norm": 13.345996711773882, "learning_rate": 5.552012756566745e-06, "loss": 0.9997822642326355, "step": 2962 }, { "epoch": 1.5561974789915967, "grad_norm": 9.198928087670938, "learning_rate": 5.548975311281911e-06, "loss": 1.9770004749298096, "step": 2963 }, { "epoch": 1.5567226890756303, "grad_norm": 7.7037085052309, "learning_rate": 5.5459376609156625e-06, "loss": 1.7865469455718994, "step": 2964 }, { "epoch": 1.5572478991596639, "grad_norm": 16.600075656757618, "learning_rate": 5.542899806602776e-06, "loss": 1.614283800125122, "step": 2965 }, { "epoch": 1.5577731092436975, "grad_norm": 13.040205367211637, "learning_rate": 5.539861749478107e-06, "loss": 1.5482186079025269, "step": 2966 }, { "epoch": 1.558298319327731, "grad_norm": 11.213426120437903, "learning_rate": 5.5368234906765874e-06, "loss": 1.320069432258606, "step": 2967 }, { "epoch": 1.5588235294117647, "grad_norm": 7.343748682897271, "learning_rate": 5.533785031333224e-06, "loss": 1.4869799613952637, "step": 2968 }, { "epoch": 1.5593487394957983, "grad_norm": 8.219843277676649, "learning_rate": 5.530746372583097e-06, "loss": 1.561574935913086, "step": 2969 }, { "epoch": 1.559873949579832, "grad_norm": 10.486421831260104, "learning_rate": 5.52770751556136e-06, "loss": 1.6030337810516357, "step": 2970 }, { "epoch": 1.5603991596638656, "grad_norm": 7.2914544240777905, "learning_rate": 5.524668461403247e-06, "loss": 1.5702407360076904, "step": 2971 }, { "epoch": 1.5609243697478992, "grad_norm": 9.967087925098134, "learning_rate": 5.521629211244058e-06, "loss": 1.2014076709747314, "step": 2972 }, { "epoch": 1.5614495798319328, "grad_norm": 58.633708588583836, "learning_rate": 5.518589766219173e-06, "loss": 4.672181606292725, "step": 2973 }, { "epoch": 1.5619747899159664, "grad_norm": 12.196714902900535, "learning_rate": 5.515550127464035e-06, "loss": 1.6894347667694092, "step": 2974 }, { "epoch": 1.5625, "grad_norm": 8.684063387615494, "learning_rate": 5.512510296114174e-06, "loss": 1.867067575454712, "step": 2975 }, { "epoch": 1.5630252100840336, "grad_norm": 10.141827754464805, "learning_rate": 5.509470273305179e-06, "loss": 2.030630111694336, "step": 2976 }, { "epoch": 1.5635504201680672, "grad_norm": 20.41224438593559, "learning_rate": 5.506430060172714e-06, "loss": 1.8465218544006348, "step": 2977 }, { "epoch": 1.5640756302521008, "grad_norm": 9.860469747935166, "learning_rate": 5.503389657852519e-06, "loss": 2.35500431060791, "step": 2978 }, { "epoch": 1.5646008403361344, "grad_norm": 9.985572325872047, "learning_rate": 5.5003490674804e-06, "loss": 1.1961085796356201, "step": 2979 }, { "epoch": 1.565126050420168, "grad_norm": 16.36883349113698, "learning_rate": 5.4973082901922325e-06, "loss": 1.2019355297088623, "step": 2980 }, { "epoch": 1.5656512605042017, "grad_norm": 8.020900276544133, "learning_rate": 5.494267327123965e-06, "loss": 1.6488561630249023, "step": 2981 }, { "epoch": 1.5661764705882353, "grad_norm": 9.849217556069373, "learning_rate": 5.491226179411614e-06, "loss": 1.1081463098526, "step": 2982 }, { "epoch": 1.566701680672269, "grad_norm": 8.847790649420688, "learning_rate": 5.488184848191265e-06, "loss": 1.8693208694458008, "step": 2983 }, { "epoch": 1.5672268907563025, "grad_norm": 11.0020320932377, "learning_rate": 5.485143334599071e-06, "loss": 1.8414572477340698, "step": 2984 }, { "epoch": 1.5677521008403361, "grad_norm": 8.230947119571999, "learning_rate": 5.482101639771255e-06, "loss": 1.2785500288009644, "step": 2985 }, { "epoch": 1.5682773109243697, "grad_norm": 10.201231107069894, "learning_rate": 5.479059764844107e-06, "loss": 1.2749035358428955, "step": 2986 }, { "epoch": 1.5688025210084033, "grad_norm": 13.983397280782574, "learning_rate": 5.476017710953983e-06, "loss": 1.3269745111465454, "step": 2987 }, { "epoch": 1.569327731092437, "grad_norm": 13.733439163295257, "learning_rate": 5.4729754792373094e-06, "loss": 1.130143642425537, "step": 2988 }, { "epoch": 1.5698529411764706, "grad_norm": 11.45075614439256, "learning_rate": 5.469933070830574e-06, "loss": 1.440808653831482, "step": 2989 }, { "epoch": 1.5703781512605042, "grad_norm": 9.24461823293225, "learning_rate": 5.466890486870335e-06, "loss": 1.6493146419525146, "step": 2990 }, { "epoch": 1.5709033613445378, "grad_norm": 9.325880222470737, "learning_rate": 5.463847728493214e-06, "loss": 1.273420810699463, "step": 2991 }, { "epoch": 1.5714285714285714, "grad_norm": 10.582311245428702, "learning_rate": 5.4608047968358965e-06, "loss": 1.924250841140747, "step": 2992 }, { "epoch": 1.571953781512605, "grad_norm": 11.29854702128637, "learning_rate": 5.457761693035139e-06, "loss": 1.616943597793579, "step": 2993 }, { "epoch": 1.5724789915966386, "grad_norm": 13.24705526533712, "learning_rate": 5.454718418227752e-06, "loss": 1.4549560546875, "step": 2994 }, { "epoch": 1.5730042016806722, "grad_norm": 16.67667661305697, "learning_rate": 5.451674973550619e-06, "loss": 1.9000680446624756, "step": 2995 }, { "epoch": 1.5735294117647058, "grad_norm": 14.461185745097746, "learning_rate": 5.448631360140683e-06, "loss": 1.6825497150421143, "step": 2996 }, { "epoch": 1.5740546218487395, "grad_norm": 9.206924388508579, "learning_rate": 5.44558757913495e-06, "loss": 1.6047449111938477, "step": 2997 }, { "epoch": 1.574579831932773, "grad_norm": 15.300112344155286, "learning_rate": 5.4425436316704905e-06, "loss": 1.962047815322876, "step": 2998 }, { "epoch": 1.5751050420168067, "grad_norm": 10.787994607732422, "learning_rate": 5.439499518884433e-06, "loss": 0.8550975322723389, "step": 2999 }, { "epoch": 1.5756302521008403, "grad_norm": 12.515381428962108, "learning_rate": 5.436455241913974e-06, "loss": 2.231415271759033, "step": 3000 }, { "epoch": 1.576155462184874, "grad_norm": 8.683070628759427, "learning_rate": 5.433410801896366e-06, "loss": 1.4393153190612793, "step": 3001 }, { "epoch": 1.5766806722689075, "grad_norm": 22.534135031948075, "learning_rate": 5.4303661999689265e-06, "loss": 1.3972318172454834, "step": 3002 }, { "epoch": 1.5772058823529411, "grad_norm": 13.98836802533251, "learning_rate": 5.427321437269027e-06, "loss": 1.539790391921997, "step": 3003 }, { "epoch": 1.5777310924369747, "grad_norm": 12.131343502431259, "learning_rate": 5.424276514934109e-06, "loss": 1.2395825386047363, "step": 3004 }, { "epoch": 1.5782563025210083, "grad_norm": 13.41694701583357, "learning_rate": 5.4212314341016645e-06, "loss": 1.933267593383789, "step": 3005 }, { "epoch": 1.5787815126050422, "grad_norm": 12.739135132339845, "learning_rate": 5.418186195909249e-06, "loss": 2.522068738937378, "step": 3006 }, { "epoch": 1.5793067226890756, "grad_norm": 12.596879198874607, "learning_rate": 5.415140801494475e-06, "loss": 0.8189308643341064, "step": 3007 }, { "epoch": 1.5798319327731094, "grad_norm": 34.20284374633111, "learning_rate": 5.412095251995017e-06, "loss": 1.449036955833435, "step": 3008 }, { "epoch": 1.5803571428571428, "grad_norm": 11.13227098032211, "learning_rate": 5.409049548548604e-06, "loss": 1.3301892280578613, "step": 3009 }, { "epoch": 1.5808823529411766, "grad_norm": 51.155319989963644, "learning_rate": 5.406003692293022e-06, "loss": 1.5270702838897705, "step": 3010 }, { "epoch": 1.58140756302521, "grad_norm": 10.342910703611519, "learning_rate": 5.402957684366116e-06, "loss": 1.5434386730194092, "step": 3011 }, { "epoch": 1.5819327731092439, "grad_norm": 10.563548868112887, "learning_rate": 5.399911525905787e-06, "loss": 1.764906644821167, "step": 3012 }, { "epoch": 1.5824579831932772, "grad_norm": 8.647277699602647, "learning_rate": 5.396865218049995e-06, "loss": 1.9067412614822388, "step": 3013 }, { "epoch": 1.582983193277311, "grad_norm": 10.777840029312719, "learning_rate": 5.393818761936749e-06, "loss": 1.3426694869995117, "step": 3014 }, { "epoch": 1.5835084033613445, "grad_norm": 15.586663362020502, "learning_rate": 5.390772158704119e-06, "loss": 1.3299593925476074, "step": 3015 }, { "epoch": 1.5840336134453783, "grad_norm": 20.257025956666645, "learning_rate": 5.387725409490231e-06, "loss": 1.310839056968689, "step": 3016 }, { "epoch": 1.5845588235294117, "grad_norm": 15.582753618919837, "learning_rate": 5.38467851543326e-06, "loss": 1.757559061050415, "step": 3017 }, { "epoch": 1.5850840336134455, "grad_norm": 23.639658603172965, "learning_rate": 5.381631477671439e-06, "loss": 1.3903396129608154, "step": 3018 }, { "epoch": 1.585609243697479, "grad_norm": 9.434937981744953, "learning_rate": 5.378584297343053e-06, "loss": 1.4597529172897339, "step": 3019 }, { "epoch": 1.5861344537815127, "grad_norm": 17.80952656483005, "learning_rate": 5.375536975586444e-06, "loss": 2.0107779502868652, "step": 3020 }, { "epoch": 1.5866596638655461, "grad_norm": 9.516485496132065, "learning_rate": 5.3724895135400015e-06, "loss": 1.5562787055969238, "step": 3021 }, { "epoch": 1.58718487394958, "grad_norm": 9.009365409721646, "learning_rate": 5.369441912342169e-06, "loss": 1.2896162271499634, "step": 3022 }, { "epoch": 1.5877100840336134, "grad_norm": 8.809121455676818, "learning_rate": 5.366394173131445e-06, "loss": 1.3252286911010742, "step": 3023 }, { "epoch": 1.5882352941176472, "grad_norm": 15.278197336749606, "learning_rate": 5.363346297046376e-06, "loss": 0.9057502150535583, "step": 3024 }, { "epoch": 1.5887605042016806, "grad_norm": 10.265475431681976, "learning_rate": 5.360298285225564e-06, "loss": 1.4344761371612549, "step": 3025 }, { "epoch": 1.5892857142857144, "grad_norm": 8.980900818541313, "learning_rate": 5.357250138807652e-06, "loss": 1.5517914295196533, "step": 3026 }, { "epoch": 1.5898109243697478, "grad_norm": 9.865844630288834, "learning_rate": 5.354201858931348e-06, "loss": 1.2274580001831055, "step": 3027 }, { "epoch": 1.5903361344537816, "grad_norm": 17.581322571225986, "learning_rate": 5.351153446735398e-06, "loss": 2.2450833320617676, "step": 3028 }, { "epoch": 1.590861344537815, "grad_norm": 10.983447659569535, "learning_rate": 5.3481049033586e-06, "loss": 1.5925889015197754, "step": 3029 }, { "epoch": 1.5913865546218489, "grad_norm": 11.374823923313846, "learning_rate": 5.345056229939802e-06, "loss": 2.034247636795044, "step": 3030 }, { "epoch": 1.5919117647058822, "grad_norm": 9.395067091317914, "learning_rate": 5.342007427617906e-06, "loss": 1.5270767211914062, "step": 3031 }, { "epoch": 1.592436974789916, "grad_norm": 10.107363521608857, "learning_rate": 5.338958497531852e-06, "loss": 1.012595772743225, "step": 3032 }, { "epoch": 1.5929621848739495, "grad_norm": 10.518752553859517, "learning_rate": 5.335909440820635e-06, "loss": 1.5243604183197021, "step": 3033 }, { "epoch": 1.5934873949579833, "grad_norm": 9.787692364746867, "learning_rate": 5.332860258623292e-06, "loss": 0.798157811164856, "step": 3034 }, { "epoch": 1.5940126050420167, "grad_norm": 7.431465451336366, "learning_rate": 5.329810952078914e-06, "loss": 1.5136232376098633, "step": 3035 }, { "epoch": 1.5945378151260505, "grad_norm": 11.535268048562655, "learning_rate": 5.326761522326633e-06, "loss": 0.9737481474876404, "step": 3036 }, { "epoch": 1.595063025210084, "grad_norm": 12.974696635069863, "learning_rate": 5.323711970505627e-06, "loss": 1.685730218887329, "step": 3037 }, { "epoch": 1.5955882352941178, "grad_norm": 14.400191708556283, "learning_rate": 5.320662297755123e-06, "loss": 1.4592320919036865, "step": 3038 }, { "epoch": 1.5961134453781511, "grad_norm": 10.848519871961543, "learning_rate": 5.3176125052143905e-06, "loss": 1.5373382568359375, "step": 3039 }, { "epoch": 1.596638655462185, "grad_norm": 12.23179320445541, "learning_rate": 5.314562594022744e-06, "loss": 2.0527639389038086, "step": 3040 }, { "epoch": 1.5971638655462184, "grad_norm": 11.62823977967023, "learning_rate": 5.311512565319542e-06, "loss": 1.5563435554504395, "step": 3041 }, { "epoch": 1.5976890756302522, "grad_norm": 6.9020257481060625, "learning_rate": 5.308462420244189e-06, "loss": 1.3120524883270264, "step": 3042 }, { "epoch": 1.5982142857142856, "grad_norm": 11.1950524441662, "learning_rate": 5.305412159936133e-06, "loss": 1.188920259475708, "step": 3043 }, { "epoch": 1.5987394957983194, "grad_norm": 13.860457797728966, "learning_rate": 5.302361785534861e-06, "loss": 1.412747859954834, "step": 3044 }, { "epoch": 1.5992647058823528, "grad_norm": 8.955326254548957, "learning_rate": 5.299311298179904e-06, "loss": 1.6942559480667114, "step": 3045 }, { "epoch": 1.5997899159663866, "grad_norm": 15.0471789601037, "learning_rate": 5.2962606990108415e-06, "loss": 1.9774577617645264, "step": 3046 }, { "epoch": 1.60031512605042, "grad_norm": 18.8562441769276, "learning_rate": 5.293209989167286e-06, "loss": 1.4774854183197021, "step": 3047 }, { "epoch": 1.6008403361344539, "grad_norm": 7.800871805592767, "learning_rate": 5.290159169788895e-06, "loss": 1.2230358123779297, "step": 3048 }, { "epoch": 1.6013655462184873, "grad_norm": 10.408727190375572, "learning_rate": 5.287108242015371e-06, "loss": 1.5123305320739746, "step": 3049 }, { "epoch": 1.601890756302521, "grad_norm": 10.773882300648125, "learning_rate": 5.284057206986449e-06, "loss": 1.538888692855835, "step": 3050 }, { "epoch": 1.6024159663865545, "grad_norm": 12.521891812571724, "learning_rate": 5.2810060658419095e-06, "loss": 1.007758378982544, "step": 3051 }, { "epoch": 1.6029411764705883, "grad_norm": 12.84736092836692, "learning_rate": 5.277954819721569e-06, "loss": 1.2584798336029053, "step": 3052 }, { "epoch": 1.6034663865546217, "grad_norm": 20.570205363136893, "learning_rate": 5.27490346976529e-06, "loss": 1.558570146560669, "step": 3053 }, { "epoch": 1.6039915966386555, "grad_norm": 8.336119242439366, "learning_rate": 5.2718520171129664e-06, "loss": 1.7295467853546143, "step": 3054 }, { "epoch": 1.604516806722689, "grad_norm": 20.642779786925278, "learning_rate": 5.268800462904533e-06, "loss": 1.83451247215271, "step": 3055 }, { "epoch": 1.6050420168067228, "grad_norm": 12.38385381998978, "learning_rate": 5.265748808279963e-06, "loss": 2.1253252029418945, "step": 3056 }, { "epoch": 1.6055672268907561, "grad_norm": 18.712846899400745, "learning_rate": 5.2626970543792685e-06, "loss": 1.9685165882110596, "step": 3057 }, { "epoch": 1.60609243697479, "grad_norm": 11.370481775078325, "learning_rate": 5.259645202342496e-06, "loss": 1.7771251201629639, "step": 3058 }, { "epoch": 1.6066176470588234, "grad_norm": 19.03876477951974, "learning_rate": 5.256593253309728e-06, "loss": 1.5930149555206299, "step": 3059 }, { "epoch": 1.6071428571428572, "grad_norm": 10.254101681931608, "learning_rate": 5.25354120842109e-06, "loss": 1.1894960403442383, "step": 3060 }, { "epoch": 1.6076680672268906, "grad_norm": 10.653231763764932, "learning_rate": 5.250489068816734e-06, "loss": 1.458434820175171, "step": 3061 }, { "epoch": 1.6081932773109244, "grad_norm": 10.829776918269657, "learning_rate": 5.247436835636853e-06, "loss": 0.9005193710327148, "step": 3062 }, { "epoch": 1.6087184873949578, "grad_norm": 10.326349390118326, "learning_rate": 5.244384510021673e-06, "loss": 1.6701995134353638, "step": 3063 }, { "epoch": 1.6092436974789917, "grad_norm": 13.18199371129073, "learning_rate": 5.241332093111457e-06, "loss": 1.5033565759658813, "step": 3064 }, { "epoch": 1.6097689075630253, "grad_norm": 9.466469794566347, "learning_rate": 5.238279586046499e-06, "loss": 1.618055820465088, "step": 3065 }, { "epoch": 1.6102941176470589, "grad_norm": 7.960384961155846, "learning_rate": 5.235226989967129e-06, "loss": 1.7577903270721436, "step": 3066 }, { "epoch": 1.6108193277310925, "grad_norm": 8.303735290895537, "learning_rate": 5.232174306013706e-06, "loss": 1.4577488899230957, "step": 3067 }, { "epoch": 1.611344537815126, "grad_norm": 9.683235980279202, "learning_rate": 5.2291215353266315e-06, "loss": 1.25355863571167, "step": 3068 }, { "epoch": 1.6118697478991597, "grad_norm": 10.863844807133503, "learning_rate": 5.226068679046327e-06, "loss": 1.4512593746185303, "step": 3069 }, { "epoch": 1.6123949579831933, "grad_norm": 15.636049396995174, "learning_rate": 5.223015738313254e-06, "loss": 1.479020595550537, "step": 3070 }, { "epoch": 1.612920168067227, "grad_norm": 12.599607237865373, "learning_rate": 5.219962714267903e-06, "loss": 1.4065523147583008, "step": 3071 }, { "epoch": 1.6134453781512605, "grad_norm": 12.051796673506267, "learning_rate": 5.2169096080507975e-06, "loss": 1.7732646465301514, "step": 3072 }, { "epoch": 1.6139705882352942, "grad_norm": 11.207618487420092, "learning_rate": 5.21385642080249e-06, "loss": 1.3979735374450684, "step": 3073 }, { "epoch": 1.6144957983193278, "grad_norm": 7.1466018349447795, "learning_rate": 5.2108031536635614e-06, "loss": 1.436557412147522, "step": 3074 }, { "epoch": 1.6150210084033614, "grad_norm": 11.01119895282242, "learning_rate": 5.2077498077746295e-06, "loss": 1.8917148113250732, "step": 3075 }, { "epoch": 1.615546218487395, "grad_norm": 6.396958668584958, "learning_rate": 5.204696384276332e-06, "loss": 1.7135422229766846, "step": 3076 }, { "epoch": 1.6160714285714286, "grad_norm": 11.435546743896237, "learning_rate": 5.201642884309341e-06, "loss": 2.1062474250793457, "step": 3077 }, { "epoch": 1.6165966386554622, "grad_norm": 8.398390970244984, "learning_rate": 5.198589309014358e-06, "loss": 1.3851127624511719, "step": 3078 }, { "epoch": 1.6171218487394958, "grad_norm": 10.678438945745349, "learning_rate": 5.195535659532111e-06, "loss": 2.4275312423706055, "step": 3079 }, { "epoch": 1.6176470588235294, "grad_norm": 11.411212604789487, "learning_rate": 5.192481937003354e-06, "loss": 1.3018128871917725, "step": 3080 }, { "epoch": 1.618172268907563, "grad_norm": 13.5116399134382, "learning_rate": 5.189428142568872e-06, "loss": 1.7556755542755127, "step": 3081 }, { "epoch": 1.6186974789915967, "grad_norm": 17.603190116269715, "learning_rate": 5.186374277369474e-06, "loss": 1.702657699584961, "step": 3082 }, { "epoch": 1.6192226890756303, "grad_norm": 10.3907493586627, "learning_rate": 5.183320342545995e-06, "loss": 1.9639397859573364, "step": 3083 }, { "epoch": 1.6197478991596639, "grad_norm": 13.03413836769717, "learning_rate": 5.180266339239301e-06, "loss": 1.04927396774292, "step": 3084 }, { "epoch": 1.6202731092436975, "grad_norm": 8.706697489285661, "learning_rate": 5.177212268590277e-06, "loss": 2.341620445251465, "step": 3085 }, { "epoch": 1.620798319327731, "grad_norm": 10.577233595092286, "learning_rate": 5.174158131739837e-06, "loss": 1.6827950477600098, "step": 3086 }, { "epoch": 1.6213235294117647, "grad_norm": 8.615706829388369, "learning_rate": 5.171103929828919e-06, "loss": 1.5805842876434326, "step": 3087 }, { "epoch": 1.6218487394957983, "grad_norm": 7.718213532602144, "learning_rate": 5.168049663998485e-06, "loss": 2.093625068664551, "step": 3088 }, { "epoch": 1.622373949579832, "grad_norm": 10.10945225111664, "learning_rate": 5.16499533538952e-06, "loss": 1.1579737663269043, "step": 3089 }, { "epoch": 1.6228991596638656, "grad_norm": 8.908457376782547, "learning_rate": 5.161940945143036e-06, "loss": 1.6166542768478394, "step": 3090 }, { "epoch": 1.6234243697478992, "grad_norm": 13.615864412530772, "learning_rate": 5.158886494400062e-06, "loss": 1.6508545875549316, "step": 3091 }, { "epoch": 1.6239495798319328, "grad_norm": 24.439336687364076, "learning_rate": 5.155831984301657e-06, "loss": 1.3991830348968506, "step": 3092 }, { "epoch": 1.6244747899159664, "grad_norm": 8.583808073820869, "learning_rate": 5.152777415988894e-06, "loss": 1.5706121921539307, "step": 3093 }, { "epoch": 1.625, "grad_norm": 15.653129337077532, "learning_rate": 5.1497227906028764e-06, "loss": 1.4092233180999756, "step": 3094 }, { "epoch": 1.6255252100840336, "grad_norm": 10.34787712355263, "learning_rate": 5.146668109284723e-06, "loss": 1.3636889457702637, "step": 3095 }, { "epoch": 1.6260504201680672, "grad_norm": 10.599082953760776, "learning_rate": 5.143613373175573e-06, "loss": 2.0063517093658447, "step": 3096 }, { "epoch": 1.6265756302521008, "grad_norm": 12.200199833393542, "learning_rate": 5.140558583416591e-06, "loss": 1.997770071029663, "step": 3097 }, { "epoch": 1.6271008403361344, "grad_norm": 15.849921841994798, "learning_rate": 5.137503741148957e-06, "loss": 1.7975128889083862, "step": 3098 }, { "epoch": 1.627626050420168, "grad_norm": 9.632688041383286, "learning_rate": 5.134448847513873e-06, "loss": 1.5118117332458496, "step": 3099 }, { "epoch": 1.6281512605042017, "grad_norm": 9.128526242946805, "learning_rate": 5.1313939036525585e-06, "loss": 0.9926509857177734, "step": 3100 }, { "epoch": 1.6286764705882353, "grad_norm": 12.285035917057023, "learning_rate": 5.128338910706254e-06, "loss": 1.096555233001709, "step": 3101 }, { "epoch": 1.629201680672269, "grad_norm": 10.10042325655656, "learning_rate": 5.125283869816218e-06, "loss": 1.4132643938064575, "step": 3102 }, { "epoch": 1.6297268907563025, "grad_norm": 24.57875669580783, "learning_rate": 5.122228782123723e-06, "loss": 1.9915111064910889, "step": 3103 }, { "epoch": 1.6302521008403361, "grad_norm": 17.48422070876946, "learning_rate": 5.119173648770065e-06, "loss": 1.8281275033950806, "step": 3104 }, { "epoch": 1.6307773109243697, "grad_norm": 14.307260743404967, "learning_rate": 5.1161184708965525e-06, "loss": 1.4649648666381836, "step": 3105 }, { "epoch": 1.6313025210084033, "grad_norm": 15.466741659182713, "learning_rate": 5.113063249644514e-06, "loss": 1.4001120328903198, "step": 3106 }, { "epoch": 1.631827731092437, "grad_norm": 9.286672187247927, "learning_rate": 5.110007986155291e-06, "loss": 1.1792571544647217, "step": 3107 }, { "epoch": 1.6323529411764706, "grad_norm": 7.957869759941225, "learning_rate": 5.106952681570242e-06, "loss": 1.5156265497207642, "step": 3108 }, { "epoch": 1.6328781512605042, "grad_norm": 14.367417229332117, "learning_rate": 5.103897337030742e-06, "loss": 1.281886339187622, "step": 3109 }, { "epoch": 1.6334033613445378, "grad_norm": 11.117524175921625, "learning_rate": 5.10084195367818e-06, "loss": 1.5550462007522583, "step": 3110 }, { "epoch": 1.6339285714285714, "grad_norm": 6.486761326314236, "learning_rate": 5.097786532653959e-06, "loss": 1.6664419174194336, "step": 3111 }, { "epoch": 1.634453781512605, "grad_norm": 9.371319968744523, "learning_rate": 5.094731075099496e-06, "loss": 1.7581782341003418, "step": 3112 }, { "epoch": 1.6349789915966386, "grad_norm": 9.595668596262676, "learning_rate": 5.091675582156224e-06, "loss": 0.9803217649459839, "step": 3113 }, { "epoch": 1.6355042016806722, "grad_norm": 12.477620939681469, "learning_rate": 5.088620054965585e-06, "loss": 0.9094586372375488, "step": 3114 }, { "epoch": 1.6360294117647058, "grad_norm": 8.432020340556203, "learning_rate": 5.0855644946690385e-06, "loss": 2.6701955795288086, "step": 3115 }, { "epoch": 1.6365546218487395, "grad_norm": 12.544280458189542, "learning_rate": 5.082508902408053e-06, "loss": 3.882866144180298, "step": 3116 }, { "epoch": 1.637079831932773, "grad_norm": 12.564515372602196, "learning_rate": 5.07945327932411e-06, "loss": 1.5386974811553955, "step": 3117 }, { "epoch": 1.6376050420168067, "grad_norm": 12.879838017395324, "learning_rate": 5.076397626558704e-06, "loss": 2.0772528648376465, "step": 3118 }, { "epoch": 1.6381302521008403, "grad_norm": 34.9462007007443, "learning_rate": 5.073341945253336e-06, "loss": 1.662086844444275, "step": 3119 }, { "epoch": 1.638655462184874, "grad_norm": 12.641927607679888, "learning_rate": 5.0702862365495245e-06, "loss": 1.4871139526367188, "step": 3120 }, { "epoch": 1.6391806722689075, "grad_norm": 18.879200948971203, "learning_rate": 5.067230501588792e-06, "loss": 1.8591716289520264, "step": 3121 }, { "epoch": 1.6397058823529411, "grad_norm": 17.96823659271845, "learning_rate": 5.0641747415126755e-06, "loss": 1.3843923807144165, "step": 3122 }, { "epoch": 1.6402310924369747, "grad_norm": 14.323540358602784, "learning_rate": 5.061118957462716e-06, "loss": 1.3466473817825317, "step": 3123 }, { "epoch": 1.6407563025210083, "grad_norm": 18.644907363918527, "learning_rate": 5.05806315058047e-06, "loss": 1.204024314880371, "step": 3124 }, { "epoch": 1.6412815126050422, "grad_norm": 8.132668098754044, "learning_rate": 5.055007322007497e-06, "loss": 1.843780755996704, "step": 3125 }, { "epoch": 1.6418067226890756, "grad_norm": 13.460077018059597, "learning_rate": 5.051951472885368e-06, "loss": 1.2028506994247437, "step": 3126 }, { "epoch": 1.6423319327731094, "grad_norm": 11.506403572417291, "learning_rate": 5.0488956043556604e-06, "loss": 1.1519665718078613, "step": 3127 }, { "epoch": 1.6428571428571428, "grad_norm": 14.578043514916429, "learning_rate": 5.045839717559958e-06, "loss": 1.559743046760559, "step": 3128 }, { "epoch": 1.6433823529411766, "grad_norm": 15.560723367508388, "learning_rate": 5.0427838136398545e-06, "loss": 1.5789740085601807, "step": 3129 }, { "epoch": 1.64390756302521, "grad_norm": 11.434104869431177, "learning_rate": 5.039727893736945e-06, "loss": 1.1219725608825684, "step": 3130 }, { "epoch": 1.6444327731092439, "grad_norm": 11.436070994270105, "learning_rate": 5.036671958992836e-06, "loss": 1.9839292764663696, "step": 3131 }, { "epoch": 1.6449579831932772, "grad_norm": 11.96687655010819, "learning_rate": 5.033616010549135e-06, "loss": 1.3363629579544067, "step": 3132 }, { "epoch": 1.645483193277311, "grad_norm": 12.623420160559558, "learning_rate": 5.0305600495474586e-06, "loss": 1.426746129989624, "step": 3133 }, { "epoch": 1.6460084033613445, "grad_norm": 12.38490349651787, "learning_rate": 5.027504077129424e-06, "loss": 1.629065990447998, "step": 3134 }, { "epoch": 1.6465336134453783, "grad_norm": 18.971260479020643, "learning_rate": 5.0244480944366555e-06, "loss": 1.889040470123291, "step": 3135 }, { "epoch": 1.6470588235294117, "grad_norm": 8.304161013849223, "learning_rate": 5.021392102610782e-06, "loss": 1.562378168106079, "step": 3136 }, { "epoch": 1.6475840336134455, "grad_norm": 9.3929417613621, "learning_rate": 5.018336102793433e-06, "loss": 1.1140542030334473, "step": 3137 }, { "epoch": 1.648109243697479, "grad_norm": 8.893889734172177, "learning_rate": 5.015280096126242e-06, "loss": 1.2760379314422607, "step": 3138 }, { "epoch": 1.6486344537815127, "grad_norm": 8.2080442981099, "learning_rate": 5.012224083750845e-06, "loss": 0.7374924421310425, "step": 3139 }, { "epoch": 1.6491596638655461, "grad_norm": 10.475997502549836, "learning_rate": 5.009168066808883e-06, "loss": 1.6853827238082886, "step": 3140 }, { "epoch": 1.64968487394958, "grad_norm": 10.022161747941537, "learning_rate": 5.006112046441993e-06, "loss": 1.3828448057174683, "step": 3141 }, { "epoch": 1.6502100840336134, "grad_norm": 13.963782723901204, "learning_rate": 5.003056023791818e-06, "loss": 1.758962631225586, "step": 3142 }, { "epoch": 1.6507352941176472, "grad_norm": 13.49899842100994, "learning_rate": 5e-06, "loss": 1.9127230644226074, "step": 3143 }, { "epoch": 1.6512605042016806, "grad_norm": 10.142974274623832, "learning_rate": 4.996943976208184e-06, "loss": 1.7255250215530396, "step": 3144 }, { "epoch": 1.6517857142857144, "grad_norm": 12.893091416940615, "learning_rate": 4.993887953558008e-06, "loss": 1.6206003427505493, "step": 3145 }, { "epoch": 1.6523109243697478, "grad_norm": 8.755376382428233, "learning_rate": 4.990831933191119e-06, "loss": 1.5673696994781494, "step": 3146 }, { "epoch": 1.6528361344537816, "grad_norm": 12.819958677420079, "learning_rate": 4.987775916249157e-06, "loss": 1.5775996446609497, "step": 3147 }, { "epoch": 1.653361344537815, "grad_norm": 9.980497121979969, "learning_rate": 4.98471990387376e-06, "loss": 1.0711749792099, "step": 3148 }, { "epoch": 1.6538865546218489, "grad_norm": 16.850694249864684, "learning_rate": 4.981663897206568e-06, "loss": 1.4993646144866943, "step": 3149 }, { "epoch": 1.6544117647058822, "grad_norm": 16.7741948805016, "learning_rate": 4.9786078973892195e-06, "loss": 1.149838924407959, "step": 3150 }, { "epoch": 1.654936974789916, "grad_norm": 12.130252797235269, "learning_rate": 4.975551905563345e-06, "loss": 1.384533166885376, "step": 3151 }, { "epoch": 1.6554621848739495, "grad_norm": 25.324741934323857, "learning_rate": 4.9724959228705776e-06, "loss": 1.4310171604156494, "step": 3152 }, { "epoch": 1.6559873949579833, "grad_norm": 10.197357937884343, "learning_rate": 4.969439950452543e-06, "loss": 1.5560858249664307, "step": 3153 }, { "epoch": 1.6565126050420167, "grad_norm": 8.991127525865842, "learning_rate": 4.966383989450866e-06, "loss": 1.1132553815841675, "step": 3154 }, { "epoch": 1.6570378151260505, "grad_norm": 12.927546827610996, "learning_rate": 4.963328041007166e-06, "loss": 1.6278947591781616, "step": 3155 }, { "epoch": 1.657563025210084, "grad_norm": 17.68273433728424, "learning_rate": 4.960272106263056e-06, "loss": 1.769431471824646, "step": 3156 }, { "epoch": 1.6580882352941178, "grad_norm": 9.760403079756896, "learning_rate": 4.957216186360147e-06, "loss": 1.55194091796875, "step": 3157 }, { "epoch": 1.6586134453781511, "grad_norm": 16.400495262986592, "learning_rate": 4.954160282440043e-06, "loss": 2.3243887424468994, "step": 3158 }, { "epoch": 1.659138655462185, "grad_norm": 12.452626182995566, "learning_rate": 4.951104395644342e-06, "loss": 1.3925807476043701, "step": 3159 }, { "epoch": 1.6596638655462184, "grad_norm": 9.727453645049202, "learning_rate": 4.948048527114633e-06, "loss": 1.1422160863876343, "step": 3160 }, { "epoch": 1.6601890756302522, "grad_norm": 8.757952499913177, "learning_rate": 4.944992677992505e-06, "loss": 1.4326893091201782, "step": 3161 }, { "epoch": 1.6607142857142856, "grad_norm": 11.48282641238962, "learning_rate": 4.941936849419532e-06, "loss": 1.1868253946304321, "step": 3162 }, { "epoch": 1.6612394957983194, "grad_norm": 17.28460228596912, "learning_rate": 4.938881042537286e-06, "loss": 1.201799750328064, "step": 3163 }, { "epoch": 1.6617647058823528, "grad_norm": 14.028568607468157, "learning_rate": 4.935825258487326e-06, "loss": 1.7789372205734253, "step": 3164 }, { "epoch": 1.6622899159663866, "grad_norm": 11.097269911557811, "learning_rate": 4.932769498411209e-06, "loss": 0.9179167151451111, "step": 3165 }, { "epoch": 1.66281512605042, "grad_norm": 11.915851912764433, "learning_rate": 4.929713763450477e-06, "loss": 1.5717015266418457, "step": 3166 }, { "epoch": 1.6633403361344539, "grad_norm": 10.38024475807239, "learning_rate": 4.926658054746665e-06, "loss": 1.34568190574646, "step": 3167 }, { "epoch": 1.6638655462184873, "grad_norm": 9.239344551818904, "learning_rate": 4.923602373441297e-06, "loss": 2.074063539505005, "step": 3168 }, { "epoch": 1.664390756302521, "grad_norm": 13.857889716143573, "learning_rate": 4.9205467206758914e-06, "loss": 1.703376054763794, "step": 3169 }, { "epoch": 1.6649159663865545, "grad_norm": 10.877505036281866, "learning_rate": 4.917491097591949e-06, "loss": 1.6291817426681519, "step": 3170 }, { "epoch": 1.6654411764705883, "grad_norm": 8.301892458791308, "learning_rate": 4.914435505330962e-06, "loss": 1.6869958639144897, "step": 3171 }, { "epoch": 1.6659663865546217, "grad_norm": 7.734194438514324, "learning_rate": 4.911379945034416e-06, "loss": 1.8701092004776, "step": 3172 }, { "epoch": 1.6664915966386555, "grad_norm": 14.316183760785892, "learning_rate": 4.908324417843779e-06, "loss": 1.4611387252807617, "step": 3173 }, { "epoch": 1.667016806722689, "grad_norm": 9.62109786158016, "learning_rate": 4.905268924900506e-06, "loss": 2.080125331878662, "step": 3174 }, { "epoch": 1.6675420168067228, "grad_norm": 14.680946727705855, "learning_rate": 4.902213467346043e-06, "loss": 1.9285434484481812, "step": 3175 }, { "epoch": 1.6680672268907561, "grad_norm": 11.629182883064736, "learning_rate": 4.899158046321821e-06, "loss": 1.3539671897888184, "step": 3176 }, { "epoch": 1.66859243697479, "grad_norm": 13.737142860943834, "learning_rate": 4.896102662969259e-06, "loss": 1.6036796569824219, "step": 3177 }, { "epoch": 1.6691176470588234, "grad_norm": 10.12493423922255, "learning_rate": 4.89304731842976e-06, "loss": 2.090482711791992, "step": 3178 }, { "epoch": 1.6696428571428572, "grad_norm": 7.235051189669831, "learning_rate": 4.889992013844711e-06, "loss": 1.5542412996292114, "step": 3179 }, { "epoch": 1.6701680672268906, "grad_norm": 9.834356759414641, "learning_rate": 4.886936750355487e-06, "loss": 1.2818894386291504, "step": 3180 }, { "epoch": 1.6706932773109244, "grad_norm": 13.540783886719886, "learning_rate": 4.883881529103448e-06, "loss": 1.5695828199386597, "step": 3181 }, { "epoch": 1.6712184873949578, "grad_norm": 17.0558721728669, "learning_rate": 4.880826351229937e-06, "loss": 2.3164563179016113, "step": 3182 }, { "epoch": 1.6717436974789917, "grad_norm": 9.226318265933307, "learning_rate": 4.877771217876279e-06, "loss": 2.172067403793335, "step": 3183 }, { "epoch": 1.6722689075630253, "grad_norm": 6.517646082610535, "learning_rate": 4.874716130183785e-06, "loss": 0.8407649993896484, "step": 3184 }, { "epoch": 1.6727941176470589, "grad_norm": 10.95684363009136, "learning_rate": 4.8716610892937486e-06, "loss": 1.159510612487793, "step": 3185 }, { "epoch": 1.6733193277310925, "grad_norm": 15.441378641715309, "learning_rate": 4.868606096347443e-06, "loss": 1.2223591804504395, "step": 3186 }, { "epoch": 1.673844537815126, "grad_norm": 10.039105001076978, "learning_rate": 4.8655511524861295e-06, "loss": 1.8431533575057983, "step": 3187 }, { "epoch": 1.6743697478991597, "grad_norm": 14.004494334212994, "learning_rate": 4.8624962588510456e-06, "loss": 1.2883567810058594, "step": 3188 }, { "epoch": 1.6748949579831933, "grad_norm": 9.295372095190801, "learning_rate": 4.859441416583412e-06, "loss": 1.9788686037063599, "step": 3189 }, { "epoch": 1.675420168067227, "grad_norm": 12.096116013390292, "learning_rate": 4.856386626824428e-06, "loss": 1.2554341554641724, "step": 3190 }, { "epoch": 1.6759453781512605, "grad_norm": 21.76745049762305, "learning_rate": 4.8533318907152795e-06, "loss": 1.0712530612945557, "step": 3191 }, { "epoch": 1.6764705882352942, "grad_norm": 12.938479251535309, "learning_rate": 4.850277209397125e-06, "loss": 1.199103593826294, "step": 3192 }, { "epoch": 1.6769957983193278, "grad_norm": 9.238235049662206, "learning_rate": 4.847222584011107e-06, "loss": 1.598036289215088, "step": 3193 }, { "epoch": 1.6775210084033614, "grad_norm": 12.131559366166508, "learning_rate": 4.8441680156983455e-06, "loss": 1.5239869356155396, "step": 3194 }, { "epoch": 1.678046218487395, "grad_norm": 8.232943246354864, "learning_rate": 4.841113505599939e-06, "loss": 1.785506010055542, "step": 3195 }, { "epoch": 1.6785714285714286, "grad_norm": 11.917059082826425, "learning_rate": 4.838059054856967e-06, "loss": 1.4921265840530396, "step": 3196 }, { "epoch": 1.6790966386554622, "grad_norm": 8.43131298640434, "learning_rate": 4.8350046646104815e-06, "loss": 2.3236143589019775, "step": 3197 }, { "epoch": 1.6796218487394958, "grad_norm": 20.492115302876115, "learning_rate": 4.831950336001518e-06, "loss": 1.3714215755462646, "step": 3198 }, { "epoch": 1.6801470588235294, "grad_norm": 9.50181582139722, "learning_rate": 4.828896070171084e-06, "loss": 0.9516867399215698, "step": 3199 }, { "epoch": 1.680672268907563, "grad_norm": 10.560738331135111, "learning_rate": 4.825841868260166e-06, "loss": 1.1114282608032227, "step": 3200 }, { "epoch": 1.6811974789915967, "grad_norm": 10.11621562454737, "learning_rate": 4.8227877314097245e-06, "loss": 2.444409132003784, "step": 3201 }, { "epoch": 1.6817226890756303, "grad_norm": 12.090676048057713, "learning_rate": 4.819733660760701e-06, "loss": 1.4144461154937744, "step": 3202 }, { "epoch": 1.6822478991596639, "grad_norm": 9.303163790082237, "learning_rate": 4.8166796574540065e-06, "loss": 1.714726209640503, "step": 3203 }, { "epoch": 1.6827731092436975, "grad_norm": 9.100656720600508, "learning_rate": 4.8136257226305295e-06, "loss": 1.570493459701538, "step": 3204 }, { "epoch": 1.683298319327731, "grad_norm": 10.776076519499956, "learning_rate": 4.81057185743113e-06, "loss": 2.1080472469329834, "step": 3205 }, { "epoch": 1.6838235294117647, "grad_norm": 7.0919272657071994, "learning_rate": 4.807518062996648e-06, "loss": 2.0740365982055664, "step": 3206 }, { "epoch": 1.6843487394957983, "grad_norm": 6.9403304143763185, "learning_rate": 4.804464340467892e-06, "loss": 1.694742202758789, "step": 3207 }, { "epoch": 1.684873949579832, "grad_norm": 9.7811299170044, "learning_rate": 4.801410690985643e-06, "loss": 1.5359952449798584, "step": 3208 }, { "epoch": 1.6853991596638656, "grad_norm": 7.2935403500847515, "learning_rate": 4.798357115690661e-06, "loss": 1.348632574081421, "step": 3209 }, { "epoch": 1.6859243697478992, "grad_norm": 10.50041348163982, "learning_rate": 4.795303615723671e-06, "loss": 1.2157429456710815, "step": 3210 }, { "epoch": 1.6864495798319328, "grad_norm": 11.848709652594424, "learning_rate": 4.792250192225374e-06, "loss": 0.9487016201019287, "step": 3211 }, { "epoch": 1.6869747899159664, "grad_norm": 12.14284727204427, "learning_rate": 4.789196846336439e-06, "loss": 1.5461490154266357, "step": 3212 }, { "epoch": 1.6875, "grad_norm": 8.496025704487556, "learning_rate": 4.7861435791975124e-06, "loss": 1.9235889911651611, "step": 3213 }, { "epoch": 1.6880252100840336, "grad_norm": 14.438632669416723, "learning_rate": 4.783090391949204e-06, "loss": 1.9737489223480225, "step": 3214 }, { "epoch": 1.6885504201680672, "grad_norm": 15.957219220291341, "learning_rate": 4.7800372857320995e-06, "loss": 1.7104718685150146, "step": 3215 }, { "epoch": 1.6890756302521008, "grad_norm": 7.0164333325553585, "learning_rate": 4.776984261686749e-06, "loss": 1.6765732765197754, "step": 3216 }, { "epoch": 1.6896008403361344, "grad_norm": 9.860218309864889, "learning_rate": 4.7739313209536755e-06, "loss": 1.4308388233184814, "step": 3217 }, { "epoch": 1.690126050420168, "grad_norm": 7.395648801906977, "learning_rate": 4.770878464673372e-06, "loss": 1.5400488376617432, "step": 3218 }, { "epoch": 1.6906512605042017, "grad_norm": 14.302049953275954, "learning_rate": 4.767825693986295e-06, "loss": 0.966601550579071, "step": 3219 }, { "epoch": 1.6911764705882353, "grad_norm": 10.78606883215266, "learning_rate": 4.764773010032874e-06, "loss": 1.2765212059020996, "step": 3220 }, { "epoch": 1.691701680672269, "grad_norm": 9.91042531527672, "learning_rate": 4.761720413953503e-06, "loss": 1.379080057144165, "step": 3221 }, { "epoch": 1.6922268907563025, "grad_norm": 9.998140278256704, "learning_rate": 4.758667906888545e-06, "loss": 1.800308346748352, "step": 3222 }, { "epoch": 1.6927521008403361, "grad_norm": 15.001657765860703, "learning_rate": 4.755615489978328e-06, "loss": 1.1739035844802856, "step": 3223 }, { "epoch": 1.6932773109243697, "grad_norm": 11.251658744838176, "learning_rate": 4.752563164363148e-06, "loss": 1.0766520500183105, "step": 3224 }, { "epoch": 1.6938025210084033, "grad_norm": 10.488158413312608, "learning_rate": 4.7495109311832665e-06, "loss": 1.6786444187164307, "step": 3225 }, { "epoch": 1.694327731092437, "grad_norm": 11.222410267635443, "learning_rate": 4.746458791578911e-06, "loss": 1.9987932443618774, "step": 3226 }, { "epoch": 1.6948529411764706, "grad_norm": 10.654387624082723, "learning_rate": 4.743406746690272e-06, "loss": 1.249092698097229, "step": 3227 }, { "epoch": 1.6953781512605042, "grad_norm": 9.329416887100699, "learning_rate": 4.740354797657504e-06, "loss": 1.3510708808898926, "step": 3228 }, { "epoch": 1.6959033613445378, "grad_norm": 8.588233129565467, "learning_rate": 4.737302945620732e-06, "loss": 1.4824929237365723, "step": 3229 }, { "epoch": 1.6964285714285714, "grad_norm": 7.59823268809456, "learning_rate": 4.7342511917200375e-06, "loss": 1.5797340869903564, "step": 3230 }, { "epoch": 1.696953781512605, "grad_norm": 11.89070608878343, "learning_rate": 4.731199537095468e-06, "loss": 1.2261934280395508, "step": 3231 }, { "epoch": 1.6974789915966386, "grad_norm": 21.459642852780544, "learning_rate": 4.728147982887034e-06, "loss": 1.3860855102539062, "step": 3232 }, { "epoch": 1.6980042016806722, "grad_norm": 7.461367326422217, "learning_rate": 4.72509653023471e-06, "loss": 1.7877720594406128, "step": 3233 }, { "epoch": 1.6985294117647058, "grad_norm": 13.236189493835539, "learning_rate": 4.722045180278431e-06, "loss": 2.2877883911132812, "step": 3234 }, { "epoch": 1.6990546218487395, "grad_norm": 8.133082883032813, "learning_rate": 4.718993934158092e-06, "loss": 1.3185124397277832, "step": 3235 }, { "epoch": 1.699579831932773, "grad_norm": 9.230899339281175, "learning_rate": 4.715942793013551e-06, "loss": 1.5218816995620728, "step": 3236 }, { "epoch": 1.7001050420168067, "grad_norm": 11.986139333558564, "learning_rate": 4.712891757984629e-06, "loss": 1.2758816480636597, "step": 3237 }, { "epoch": 1.7006302521008403, "grad_norm": 8.2272559909767, "learning_rate": 4.709840830211105e-06, "loss": 1.7246310710906982, "step": 3238 }, { "epoch": 1.701155462184874, "grad_norm": 8.356415866718445, "learning_rate": 4.706790010832714e-06, "loss": 1.6995426416397095, "step": 3239 }, { "epoch": 1.7016806722689075, "grad_norm": 12.47543946791477, "learning_rate": 4.703739300989159e-06, "loss": 1.4347538948059082, "step": 3240 }, { "epoch": 1.7022058823529411, "grad_norm": 13.775898806027204, "learning_rate": 4.700688701820096e-06, "loss": 2.091193914413452, "step": 3241 }, { "epoch": 1.7027310924369747, "grad_norm": 11.814585500912411, "learning_rate": 4.697638214465141e-06, "loss": 0.9931437969207764, "step": 3242 }, { "epoch": 1.7032563025210083, "grad_norm": 12.695009444158382, "learning_rate": 4.694587840063868e-06, "loss": 1.3028037548065186, "step": 3243 }, { "epoch": 1.7037815126050422, "grad_norm": 8.928646860224024, "learning_rate": 4.691537579755811e-06, "loss": 1.7238823175430298, "step": 3244 }, { "epoch": 1.7043067226890756, "grad_norm": 12.040154975154097, "learning_rate": 4.688487434680459e-06, "loss": 1.7754493951797485, "step": 3245 }, { "epoch": 1.7048319327731094, "grad_norm": 7.535248898985819, "learning_rate": 4.685437405977258e-06, "loss": 1.4713134765625, "step": 3246 }, { "epoch": 1.7053571428571428, "grad_norm": 12.984821814540634, "learning_rate": 4.68238749478561e-06, "loss": 1.2907443046569824, "step": 3247 }, { "epoch": 1.7058823529411766, "grad_norm": 9.124845111639345, "learning_rate": 4.679337702244877e-06, "loss": 1.702890157699585, "step": 3248 }, { "epoch": 1.70640756302521, "grad_norm": 9.278216348282228, "learning_rate": 4.6762880294943734e-06, "loss": 0.8058304786682129, "step": 3249 }, { "epoch": 1.7069327731092439, "grad_norm": 12.30031432785795, "learning_rate": 4.673238477673367e-06, "loss": 1.395681619644165, "step": 3250 }, { "epoch": 1.7074579831932772, "grad_norm": 12.83788411809448, "learning_rate": 4.670189047921086e-06, "loss": 1.439389705657959, "step": 3251 }, { "epoch": 1.707983193277311, "grad_norm": 12.65720366210558, "learning_rate": 4.667139741376708e-06, "loss": 1.636742353439331, "step": 3252 }, { "epoch": 1.7085084033613445, "grad_norm": 13.730427366000661, "learning_rate": 4.664090559179367e-06, "loss": 1.541890025138855, "step": 3253 }, { "epoch": 1.7090336134453783, "grad_norm": 17.642703265607015, "learning_rate": 4.661041502468149e-06, "loss": 1.2382596731185913, "step": 3254 }, { "epoch": 1.7095588235294117, "grad_norm": 12.39135845211606, "learning_rate": 4.657992572382095e-06, "loss": 1.3252674341201782, "step": 3255 }, { "epoch": 1.7100840336134455, "grad_norm": 8.631156447445784, "learning_rate": 4.654943770060197e-06, "loss": 2.0390126705169678, "step": 3256 }, { "epoch": 1.710609243697479, "grad_norm": 7.443829683098495, "learning_rate": 4.651895096641402e-06, "loss": 1.3086891174316406, "step": 3257 }, { "epoch": 1.7111344537815127, "grad_norm": 14.10406049817156, "learning_rate": 4.648846553264603e-06, "loss": 1.1268144845962524, "step": 3258 }, { "epoch": 1.7116596638655461, "grad_norm": 19.653444071824573, "learning_rate": 4.6457981410686524e-06, "loss": 1.4325177669525146, "step": 3259 }, { "epoch": 1.71218487394958, "grad_norm": 13.769415760561396, "learning_rate": 4.642749861192347e-06, "loss": 1.728335976600647, "step": 3260 }, { "epoch": 1.7127100840336134, "grad_norm": 9.118957495131754, "learning_rate": 4.639701714774439e-06, "loss": 2.0761523246765137, "step": 3261 }, { "epoch": 1.7132352941176472, "grad_norm": 13.743227667640902, "learning_rate": 4.6366537029536236e-06, "loss": 1.4593192338943481, "step": 3262 }, { "epoch": 1.7137605042016806, "grad_norm": 9.878282202025337, "learning_rate": 4.633605826868556e-06, "loss": 1.1992371082305908, "step": 3263 }, { "epoch": 1.7142857142857144, "grad_norm": 21.10567191645549, "learning_rate": 4.630558087657832e-06, "loss": 2.5870370864868164, "step": 3264 }, { "epoch": 1.7148109243697478, "grad_norm": 8.03974236204865, "learning_rate": 4.627510486459999e-06, "loss": 1.3676621913909912, "step": 3265 }, { "epoch": 1.7153361344537816, "grad_norm": 14.39685510115169, "learning_rate": 4.624463024413557e-06, "loss": 1.3622328042984009, "step": 3266 }, { "epoch": 1.715861344537815, "grad_norm": 10.89889824608526, "learning_rate": 4.621415702656948e-06, "loss": 1.913252592086792, "step": 3267 }, { "epoch": 1.7163865546218489, "grad_norm": 10.04186725134007, "learning_rate": 4.618368522328563e-06, "loss": 1.382278561592102, "step": 3268 }, { "epoch": 1.7169117647058822, "grad_norm": 10.013418711296895, "learning_rate": 4.615321484566741e-06, "loss": 1.0073938369750977, "step": 3269 }, { "epoch": 1.717436974789916, "grad_norm": 7.891439724675239, "learning_rate": 4.61227459050977e-06, "loss": 1.5652869939804077, "step": 3270 }, { "epoch": 1.7179621848739495, "grad_norm": 13.824348774478754, "learning_rate": 4.609227841295882e-06, "loss": 1.2578716278076172, "step": 3271 }, { "epoch": 1.7184873949579833, "grad_norm": 13.651501915607122, "learning_rate": 4.606181238063253e-06, "loss": 1.4699373245239258, "step": 3272 }, { "epoch": 1.7190126050420167, "grad_norm": 8.972590551870093, "learning_rate": 4.603134781950007e-06, "loss": 1.0772695541381836, "step": 3273 }, { "epoch": 1.7195378151260505, "grad_norm": 17.37886197109619, "learning_rate": 4.6000884740942135e-06, "loss": 1.1347074508666992, "step": 3274 }, { "epoch": 1.720063025210084, "grad_norm": 9.280507396035626, "learning_rate": 4.597042315633885e-06, "loss": 2.2019081115722656, "step": 3275 }, { "epoch": 1.7205882352941178, "grad_norm": 10.394213281708955, "learning_rate": 4.593996307706979e-06, "loss": 1.4583905935287476, "step": 3276 }, { "epoch": 1.7211134453781511, "grad_norm": 12.516320878145171, "learning_rate": 4.590950451451397e-06, "loss": 1.6682995557785034, "step": 3277 }, { "epoch": 1.721638655462185, "grad_norm": 11.057605401044427, "learning_rate": 4.587904748004984e-06, "loss": 1.3575992584228516, "step": 3278 }, { "epoch": 1.7221638655462184, "grad_norm": 19.151720887311562, "learning_rate": 4.584859198505526e-06, "loss": 1.5850321054458618, "step": 3279 }, { "epoch": 1.7226890756302522, "grad_norm": 7.253040274635175, "learning_rate": 4.581813804090752e-06, "loss": 1.7625868320465088, "step": 3280 }, { "epoch": 1.7232142857142856, "grad_norm": 8.94724572141308, "learning_rate": 4.578768565898337e-06, "loss": 1.3413604497909546, "step": 3281 }, { "epoch": 1.7237394957983194, "grad_norm": 12.060253151336724, "learning_rate": 4.575723485065893e-06, "loss": 1.479799747467041, "step": 3282 }, { "epoch": 1.7242647058823528, "grad_norm": 8.933308583101603, "learning_rate": 4.5726785627309736e-06, "loss": 1.6844385862350464, "step": 3283 }, { "epoch": 1.7247899159663866, "grad_norm": 9.109789906182867, "learning_rate": 4.569633800031075e-06, "loss": 1.1544994115829468, "step": 3284 }, { "epoch": 1.72531512605042, "grad_norm": 10.890598985413234, "learning_rate": 4.566589198103635e-06, "loss": 1.9263209104537964, "step": 3285 }, { "epoch": 1.7258403361344539, "grad_norm": 10.304930311250292, "learning_rate": 4.563544758086028e-06, "loss": 1.5095046758651733, "step": 3286 }, { "epoch": 1.7263655462184873, "grad_norm": 11.191141727298945, "learning_rate": 4.560500481115568e-06, "loss": 1.7304835319519043, "step": 3287 }, { "epoch": 1.726890756302521, "grad_norm": 9.159675759410801, "learning_rate": 4.557456368329511e-06, "loss": 1.6753727197647095, "step": 3288 }, { "epoch": 1.7274159663865545, "grad_norm": 9.301470579587201, "learning_rate": 4.554412420865052e-06, "loss": 1.813326358795166, "step": 3289 }, { "epoch": 1.7279411764705883, "grad_norm": 9.427261962366169, "learning_rate": 4.551368639859319e-06, "loss": 2.2732186317443848, "step": 3290 }, { "epoch": 1.7284663865546217, "grad_norm": 15.513657851664618, "learning_rate": 4.5483250264493816e-06, "loss": 1.6619755029678345, "step": 3291 }, { "epoch": 1.7289915966386555, "grad_norm": 8.855328874944085, "learning_rate": 4.545281581772249e-06, "loss": 2.7943356037139893, "step": 3292 }, { "epoch": 1.729516806722689, "grad_norm": 24.748749458259724, "learning_rate": 4.542238306964863e-06, "loss": 2.413684844970703, "step": 3293 }, { "epoch": 1.7300420168067228, "grad_norm": 12.258830497307358, "learning_rate": 4.539195203164104e-06, "loss": 1.4870705604553223, "step": 3294 }, { "epoch": 1.7305672268907561, "grad_norm": 13.412270345655795, "learning_rate": 4.536152271506787e-06, "loss": 2.348081111907959, "step": 3295 }, { "epoch": 1.73109243697479, "grad_norm": 11.602057489602387, "learning_rate": 4.533109513129666e-06, "loss": 1.6497387886047363, "step": 3296 }, { "epoch": 1.7316176470588234, "grad_norm": 8.0057633669754, "learning_rate": 4.530066929169427e-06, "loss": 2.1390504837036133, "step": 3297 }, { "epoch": 1.7321428571428572, "grad_norm": 10.114554074478095, "learning_rate": 4.527024520762693e-06, "loss": 1.4101120233535767, "step": 3298 }, { "epoch": 1.7326680672268906, "grad_norm": 9.537079628839239, "learning_rate": 4.5239822890460174e-06, "loss": 2.055835247039795, "step": 3299 }, { "epoch": 1.7331932773109244, "grad_norm": 10.16957557101485, "learning_rate": 4.520940235155895e-06, "loss": 1.576056957244873, "step": 3300 }, { "epoch": 1.7337184873949578, "grad_norm": 12.650944492447211, "learning_rate": 4.5178983602287476e-06, "loss": 0.8783017992973328, "step": 3301 }, { "epoch": 1.7342436974789917, "grad_norm": 13.313690645956767, "learning_rate": 4.514856665400931e-06, "loss": 1.7578872442245483, "step": 3302 }, { "epoch": 1.7347689075630253, "grad_norm": 7.932697646965227, "learning_rate": 4.511815151808737e-06, "loss": 1.4982397556304932, "step": 3303 }, { "epoch": 1.7352941176470589, "grad_norm": 9.13772049030606, "learning_rate": 4.508773820588388e-06, "loss": 0.7393516898155212, "step": 3304 }, { "epoch": 1.7358193277310925, "grad_norm": 13.73448788701065, "learning_rate": 4.505732672876037e-06, "loss": 1.6219474077224731, "step": 3305 }, { "epoch": 1.736344537815126, "grad_norm": 10.12350304306231, "learning_rate": 4.502691709807769e-06, "loss": 0.7352396249771118, "step": 3306 }, { "epoch": 1.7368697478991597, "grad_norm": 12.420606931770829, "learning_rate": 4.499650932519602e-06, "loss": 0.8984926342964172, "step": 3307 }, { "epoch": 1.7373949579831933, "grad_norm": 8.47908913163684, "learning_rate": 4.496610342147482e-06, "loss": 1.7162363529205322, "step": 3308 }, { "epoch": 1.737920168067227, "grad_norm": 7.619976734371435, "learning_rate": 4.493569939827288e-06, "loss": 1.6412806510925293, "step": 3309 }, { "epoch": 1.7384453781512605, "grad_norm": 12.06422786644797, "learning_rate": 4.490529726694823e-06, "loss": 2.057121515274048, "step": 3310 }, { "epoch": 1.7389705882352942, "grad_norm": 6.4841010505199215, "learning_rate": 4.487489703885828e-06, "loss": 1.6091282367706299, "step": 3311 }, { "epoch": 1.7394957983193278, "grad_norm": 10.9727966659055, "learning_rate": 4.4844498725359655e-06, "loss": 1.5243709087371826, "step": 3312 }, { "epoch": 1.7400210084033614, "grad_norm": 14.925627239097777, "learning_rate": 4.48141023378083e-06, "loss": 1.7952152490615845, "step": 3313 }, { "epoch": 1.740546218487395, "grad_norm": 9.18924821922222, "learning_rate": 4.478370788755943e-06, "loss": 1.1616581678390503, "step": 3314 }, { "epoch": 1.7410714285714286, "grad_norm": 27.642984586784024, "learning_rate": 4.475331538596755e-06, "loss": 1.4816443920135498, "step": 3315 }, { "epoch": 1.7415966386554622, "grad_norm": 21.937547201530496, "learning_rate": 4.472292484438642e-06, "loss": 1.9132540225982666, "step": 3316 }, { "epoch": 1.7421218487394958, "grad_norm": 7.625323026752423, "learning_rate": 4.4692536274169055e-06, "loss": 1.10856032371521, "step": 3317 }, { "epoch": 1.7426470588235294, "grad_norm": 10.424587074678794, "learning_rate": 4.466214968666777e-06, "loss": 1.2741047143936157, "step": 3318 }, { "epoch": 1.743172268907563, "grad_norm": 7.191319094455383, "learning_rate": 4.463176509323413e-06, "loss": 1.3879424333572388, "step": 3319 }, { "epoch": 1.7436974789915967, "grad_norm": 9.07896005083348, "learning_rate": 4.460138250521895e-06, "loss": 1.0268973112106323, "step": 3320 }, { "epoch": 1.7442226890756303, "grad_norm": 9.850944379791787, "learning_rate": 4.457100193397226e-06, "loss": 1.3899263143539429, "step": 3321 }, { "epoch": 1.7447478991596639, "grad_norm": 13.18865769279598, "learning_rate": 4.454062339084339e-06, "loss": 1.6372075080871582, "step": 3322 }, { "epoch": 1.7452731092436975, "grad_norm": 8.569394293651449, "learning_rate": 4.45102468871809e-06, "loss": 1.781775712966919, "step": 3323 }, { "epoch": 1.745798319327731, "grad_norm": 11.961694616258718, "learning_rate": 4.447987243433256e-06, "loss": 1.275670051574707, "step": 3324 }, { "epoch": 1.7463235294117647, "grad_norm": 8.835867447955053, "learning_rate": 4.444950004364542e-06, "loss": 1.282447338104248, "step": 3325 }, { "epoch": 1.7468487394957983, "grad_norm": 9.805430443800143, "learning_rate": 4.4419129726465706e-06, "loss": 1.631540298461914, "step": 3326 }, { "epoch": 1.747373949579832, "grad_norm": 18.016814087823914, "learning_rate": 4.438876149413891e-06, "loss": 1.465498685836792, "step": 3327 }, { "epoch": 1.7478991596638656, "grad_norm": 7.270863933286584, "learning_rate": 4.435839535800973e-06, "loss": 2.469688892364502, "step": 3328 }, { "epoch": 1.7484243697478992, "grad_norm": 13.441130430294614, "learning_rate": 4.432803132942208e-06, "loss": 1.0278105735778809, "step": 3329 }, { "epoch": 1.7489495798319328, "grad_norm": 12.152997262367775, "learning_rate": 4.42976694197191e-06, "loss": 2.481796979904175, "step": 3330 }, { "epoch": 1.7494747899159664, "grad_norm": 11.785155259849487, "learning_rate": 4.426730964024312e-06, "loss": 1.6311910152435303, "step": 3331 }, { "epoch": 1.75, "grad_norm": 12.053458159721158, "learning_rate": 4.423695200233567e-06, "loss": 1.555237889289856, "step": 3332 }, { "epoch": 1.7505252100840336, "grad_norm": 15.267338325021921, "learning_rate": 4.420659651733751e-06, "loss": 1.6818935871124268, "step": 3333 }, { "epoch": 1.7510504201680672, "grad_norm": 12.925611257584087, "learning_rate": 4.417624319658857e-06, "loss": 1.6823792457580566, "step": 3334 }, { "epoch": 1.7515756302521008, "grad_norm": 10.39583822503065, "learning_rate": 4.4145892051427985e-06, "loss": 1.2439255714416504, "step": 3335 }, { "epoch": 1.7521008403361344, "grad_norm": 12.40423078330543, "learning_rate": 4.4115543093194055e-06, "loss": 1.1736741065979004, "step": 3336 }, { "epoch": 1.752626050420168, "grad_norm": 10.300241726801236, "learning_rate": 4.40851963332243e-06, "loss": 1.4935336112976074, "step": 3337 }, { "epoch": 1.7531512605042017, "grad_norm": 8.653039954025626, "learning_rate": 4.40548517828554e-06, "loss": 1.4860565662384033, "step": 3338 }, { "epoch": 1.7536764705882353, "grad_norm": 12.089012376892505, "learning_rate": 4.402450945342317e-06, "loss": 1.3899967670440674, "step": 3339 }, { "epoch": 1.754201680672269, "grad_norm": 15.542259417644336, "learning_rate": 4.399416935626268e-06, "loss": 1.4277682304382324, "step": 3340 }, { "epoch": 1.7547268907563025, "grad_norm": 11.847544756393665, "learning_rate": 4.396383150270811e-06, "loss": 1.3811930418014526, "step": 3341 }, { "epoch": 1.7552521008403361, "grad_norm": 12.396390017275069, "learning_rate": 4.393349590409279e-06, "loss": 1.3842015266418457, "step": 3342 }, { "epoch": 1.7557773109243697, "grad_norm": 9.907882075907828, "learning_rate": 4.3903162571749234e-06, "loss": 1.997004747390747, "step": 3343 }, { "epoch": 1.7563025210084033, "grad_norm": 10.20639038058395, "learning_rate": 4.387283151700912e-06, "loss": 1.267422080039978, "step": 3344 }, { "epoch": 1.756827731092437, "grad_norm": 21.384778136434107, "learning_rate": 4.384250275120325e-06, "loss": 1.5731201171875, "step": 3345 }, { "epoch": 1.7573529411764706, "grad_norm": 8.928437297808067, "learning_rate": 4.381217628566158e-06, "loss": 1.5649099349975586, "step": 3346 }, { "epoch": 1.7578781512605042, "grad_norm": 14.0399760249689, "learning_rate": 4.3781852131713195e-06, "loss": 1.6794086694717407, "step": 3347 }, { "epoch": 1.7584033613445378, "grad_norm": 9.829103731915518, "learning_rate": 4.375153030068635e-06, "loss": 1.2046095132827759, "step": 3348 }, { "epoch": 1.7589285714285714, "grad_norm": 7.8112019758600395, "learning_rate": 4.372121080390841e-06, "loss": 1.5325660705566406, "step": 3349 }, { "epoch": 1.759453781512605, "grad_norm": 7.860698784140351, "learning_rate": 4.3690893652705835e-06, "loss": 1.4487473964691162, "step": 3350 }, { "epoch": 1.7599789915966386, "grad_norm": 12.49874827300103, "learning_rate": 4.366057885840429e-06, "loss": 1.4863296747207642, "step": 3351 }, { "epoch": 1.7605042016806722, "grad_norm": 10.411555532184561, "learning_rate": 4.363026643232847e-06, "loss": 1.3299884796142578, "step": 3352 }, { "epoch": 1.7610294117647058, "grad_norm": 9.623678550194937, "learning_rate": 4.359995638580226e-06, "loss": 1.5486626625061035, "step": 3353 }, { "epoch": 1.7615546218487395, "grad_norm": 11.083364968840693, "learning_rate": 4.356964873014859e-06, "loss": 1.5557365417480469, "step": 3354 }, { "epoch": 1.762079831932773, "grad_norm": 15.604329450730432, "learning_rate": 4.353934347668957e-06, "loss": 1.2399992942810059, "step": 3355 }, { "epoch": 1.7626050420168067, "grad_norm": 10.485855074872733, "learning_rate": 4.350904063674635e-06, "loss": 1.7098774909973145, "step": 3356 }, { "epoch": 1.7631302521008403, "grad_norm": 9.49596531837048, "learning_rate": 4.34787402216392e-06, "loss": 1.345895767211914, "step": 3357 }, { "epoch": 1.763655462184874, "grad_norm": 10.495052412531466, "learning_rate": 4.344844224268747e-06, "loss": 1.2275123596191406, "step": 3358 }, { "epoch": 1.7641806722689075, "grad_norm": 12.124976384551145, "learning_rate": 4.341814671120965e-06, "loss": 2.058727264404297, "step": 3359 }, { "epoch": 1.7647058823529411, "grad_norm": 11.515539949504657, "learning_rate": 4.338785363852327e-06, "loss": 1.6749471426010132, "step": 3360 }, { "epoch": 1.7652310924369747, "grad_norm": 13.502176996716772, "learning_rate": 4.335756303594493e-06, "loss": 1.5013811588287354, "step": 3361 }, { "epoch": 1.7657563025210083, "grad_norm": 20.891526609455187, "learning_rate": 4.332727491479035e-06, "loss": 1.570244312286377, "step": 3362 }, { "epoch": 1.7662815126050422, "grad_norm": 17.691419361444233, "learning_rate": 4.32969892863743e-06, "loss": 1.6747288703918457, "step": 3363 }, { "epoch": 1.7668067226890756, "grad_norm": 7.550067948267033, "learning_rate": 4.326670616201063e-06, "loss": 1.5018736124038696, "step": 3364 }, { "epoch": 1.7673319327731094, "grad_norm": 22.559536920091926, "learning_rate": 4.323642555301222e-06, "loss": 2.438303232192993, "step": 3365 }, { "epoch": 1.7678571428571428, "grad_norm": 9.962722549165758, "learning_rate": 4.320614747069106e-06, "loss": 1.6288079023361206, "step": 3366 }, { "epoch": 1.7683823529411766, "grad_norm": 27.48397340970555, "learning_rate": 4.317587192635816e-06, "loss": 1.795086145401001, "step": 3367 }, { "epoch": 1.76890756302521, "grad_norm": 7.209285926501159, "learning_rate": 4.31455989313236e-06, "loss": 1.5091423988342285, "step": 3368 }, { "epoch": 1.7694327731092439, "grad_norm": 13.77553263087726, "learning_rate": 4.311532849689649e-06, "loss": 1.4646027088165283, "step": 3369 }, { "epoch": 1.7699579831932772, "grad_norm": 17.485984667911005, "learning_rate": 4.308506063438502e-06, "loss": 1.3757256269454956, "step": 3370 }, { "epoch": 1.770483193277311, "grad_norm": 10.05897873277297, "learning_rate": 4.305479535509637e-06, "loss": 1.4939477443695068, "step": 3371 }, { "epoch": 1.7710084033613445, "grad_norm": 11.529853793582946, "learning_rate": 4.30245326703368e-06, "loss": 1.780709147453308, "step": 3372 }, { "epoch": 1.7715336134453783, "grad_norm": 9.210731002187607, "learning_rate": 4.299427259141155e-06, "loss": 1.1474990844726562, "step": 3373 }, { "epoch": 1.7720588235294117, "grad_norm": 14.251205872521062, "learning_rate": 4.296401512962496e-06, "loss": 1.3692691326141357, "step": 3374 }, { "epoch": 1.7725840336134455, "grad_norm": 19.659885966250275, "learning_rate": 4.293376029628031e-06, "loss": 1.4936809539794922, "step": 3375 }, { "epoch": 1.773109243697479, "grad_norm": 13.936876011199358, "learning_rate": 4.290350810267995e-06, "loss": 1.599841594696045, "step": 3376 }, { "epoch": 1.7736344537815127, "grad_norm": 18.38570066850407, "learning_rate": 4.2873258560125244e-06, "loss": 1.963813066482544, "step": 3377 }, { "epoch": 1.7741596638655461, "grad_norm": 12.675988020173412, "learning_rate": 4.284301167991654e-06, "loss": 1.684263825416565, "step": 3378 }, { "epoch": 1.77468487394958, "grad_norm": 10.321127312765812, "learning_rate": 4.2812767473353205e-06, "loss": 1.0244954824447632, "step": 3379 }, { "epoch": 1.7752100840336134, "grad_norm": 13.745296316011132, "learning_rate": 4.27825259517336e-06, "loss": 1.9200084209442139, "step": 3380 }, { "epoch": 1.7757352941176472, "grad_norm": 9.358755329470656, "learning_rate": 4.275228712635511e-06, "loss": 1.404308795928955, "step": 3381 }, { "epoch": 1.7762605042016806, "grad_norm": 9.831000105275193, "learning_rate": 4.272205100851407e-06, "loss": 1.6431167125701904, "step": 3382 }, { "epoch": 1.7767857142857144, "grad_norm": 11.556416613911555, "learning_rate": 4.269181760950584e-06, "loss": 1.0737049579620361, "step": 3383 }, { "epoch": 1.7773109243697478, "grad_norm": 10.021238929483955, "learning_rate": 4.266158694062472e-06, "loss": 1.6588964462280273, "step": 3384 }, { "epoch": 1.7778361344537816, "grad_norm": 15.66006574277899, "learning_rate": 4.263135901316406e-06, "loss": 1.2914001941680908, "step": 3385 }, { "epoch": 1.778361344537815, "grad_norm": 11.129240868288132, "learning_rate": 4.2601133838416145e-06, "loss": 1.1047592163085938, "step": 3386 }, { "epoch": 1.7788865546218489, "grad_norm": 11.265281757073822, "learning_rate": 4.257091142767221e-06, "loss": 1.5732263326644897, "step": 3387 }, { "epoch": 1.7794117647058822, "grad_norm": 7.82767456156718, "learning_rate": 4.2540691792222485e-06, "loss": 1.2557446956634521, "step": 3388 }, { "epoch": 1.779936974789916, "grad_norm": 9.590464284366783, "learning_rate": 4.251047494335616e-06, "loss": 1.7582063674926758, "step": 3389 }, { "epoch": 1.7804621848739495, "grad_norm": 14.349341692367407, "learning_rate": 4.248026089236138e-06, "loss": 1.4493112564086914, "step": 3390 }, { "epoch": 1.7809873949579833, "grad_norm": 9.463167679822382, "learning_rate": 4.245004965052526e-06, "loss": 1.999086618423462, "step": 3391 }, { "epoch": 1.7815126050420167, "grad_norm": 9.883404140634354, "learning_rate": 4.2419841229133844e-06, "loss": 1.8912391662597656, "step": 3392 }, { "epoch": 1.7820378151260505, "grad_norm": 10.167465917169888, "learning_rate": 4.238963563947212e-06, "loss": 1.529426097869873, "step": 3393 }, { "epoch": 1.782563025210084, "grad_norm": 10.573807092259026, "learning_rate": 4.235943289282405e-06, "loss": 3.1425058841705322, "step": 3394 }, { "epoch": 1.7830882352941178, "grad_norm": 7.899886108099249, "learning_rate": 4.23292330004725e-06, "loss": 1.1963062286376953, "step": 3395 }, { "epoch": 1.7836134453781511, "grad_norm": 9.0409811724384, "learning_rate": 4.229903597369927e-06, "loss": 0.9920728206634521, "step": 3396 }, { "epoch": 1.784138655462185, "grad_norm": 18.720442044695666, "learning_rate": 4.226884182378513e-06, "loss": 1.8170883655548096, "step": 3397 }, { "epoch": 1.7846638655462184, "grad_norm": 14.796618315149875, "learning_rate": 4.2238650562009744e-06, "loss": 1.424818515777588, "step": 3398 }, { "epoch": 1.7851890756302522, "grad_norm": 8.456353077155875, "learning_rate": 4.220846219965168e-06, "loss": 1.2337695360183716, "step": 3399 }, { "epoch": 1.7857142857142856, "grad_norm": 22.928900834329983, "learning_rate": 4.217827674798845e-06, "loss": 1.0628197193145752, "step": 3400 }, { "epoch": 1.7862394957983194, "grad_norm": 13.308907523581551, "learning_rate": 4.2148094218296485e-06, "loss": 1.5001177787780762, "step": 3401 }, { "epoch": 1.7867647058823528, "grad_norm": 12.124316450927475, "learning_rate": 4.211791462185111e-06, "loss": 1.252639889717102, "step": 3402 }, { "epoch": 1.7872899159663866, "grad_norm": 8.049429858413667, "learning_rate": 4.2087737969926545e-06, "loss": 1.6508002281188965, "step": 3403 }, { "epoch": 1.78781512605042, "grad_norm": 13.534543634030266, "learning_rate": 4.205756427379592e-06, "loss": 1.526294231414795, "step": 3404 }, { "epoch": 1.7883403361344539, "grad_norm": 7.514592695953646, "learning_rate": 4.202739354473127e-06, "loss": 1.4986779689788818, "step": 3405 }, { "epoch": 1.7888655462184873, "grad_norm": 22.581629755018152, "learning_rate": 4.1997225794003515e-06, "loss": 1.3045344352722168, "step": 3406 }, { "epoch": 1.789390756302521, "grad_norm": 16.174143043144188, "learning_rate": 4.196706103288244e-06, "loss": 1.1019316911697388, "step": 3407 }, { "epoch": 1.7899159663865545, "grad_norm": 10.848517249633964, "learning_rate": 4.193689927263677e-06, "loss": 1.3397021293640137, "step": 3408 }, { "epoch": 1.7904411764705883, "grad_norm": 9.829259824797061, "learning_rate": 4.190674052453405e-06, "loss": 1.1827586889266968, "step": 3409 }, { "epoch": 1.7909663865546217, "grad_norm": 11.140713687457362, "learning_rate": 4.187658479984072e-06, "loss": 1.4190447330474854, "step": 3410 }, { "epoch": 1.7914915966386555, "grad_norm": 17.73096309388992, "learning_rate": 4.184643210982209e-06, "loss": 2.6395411491394043, "step": 3411 }, { "epoch": 1.792016806722689, "grad_norm": 17.588221957204123, "learning_rate": 4.181628246574236e-06, "loss": 1.595414638519287, "step": 3412 }, { "epoch": 1.7925420168067228, "grad_norm": 16.78050267554127, "learning_rate": 4.178613587886455e-06, "loss": 1.1024020910263062, "step": 3413 }, { "epoch": 1.7930672268907561, "grad_norm": 13.800256123851637, "learning_rate": 4.175599236045058e-06, "loss": 1.1093014478683472, "step": 3414 }, { "epoch": 1.79359243697479, "grad_norm": 16.134478004079746, "learning_rate": 4.172585192176117e-06, "loss": 2.031782865524292, "step": 3415 }, { "epoch": 1.7941176470588234, "grad_norm": 12.224926683605693, "learning_rate": 4.169571457405597e-06, "loss": 1.4426114559173584, "step": 3416 }, { "epoch": 1.7946428571428572, "grad_norm": 10.468125001528573, "learning_rate": 4.166558032859339e-06, "loss": 1.1929881572723389, "step": 3417 }, { "epoch": 1.7951680672268906, "grad_norm": 13.003193560226709, "learning_rate": 4.163544919663073e-06, "loss": 1.6875252723693848, "step": 3418 }, { "epoch": 1.7956932773109244, "grad_norm": 8.945094165404068, "learning_rate": 4.160532118942411e-06, "loss": 1.7581286430358887, "step": 3419 }, { "epoch": 1.7962184873949578, "grad_norm": 9.69869538124073, "learning_rate": 4.15751963182285e-06, "loss": 1.492494821548462, "step": 3420 }, { "epoch": 1.7967436974789917, "grad_norm": 12.917286761789113, "learning_rate": 4.154507459429769e-06, "loss": 1.3444740772247314, "step": 3421 }, { "epoch": 1.7972689075630253, "grad_norm": 10.12759682759791, "learning_rate": 4.1514956028884265e-06, "loss": 2.462442636489868, "step": 3422 }, { "epoch": 1.7977941176470589, "grad_norm": 9.11132367715039, "learning_rate": 4.148484063323969e-06, "loss": 1.1968019008636475, "step": 3423 }, { "epoch": 1.7983193277310925, "grad_norm": 9.751469363461084, "learning_rate": 4.14547284186142e-06, "loss": 1.1196238994598389, "step": 3424 }, { "epoch": 1.798844537815126, "grad_norm": 14.281769005761861, "learning_rate": 4.142461939625685e-06, "loss": 1.6557984352111816, "step": 3425 }, { "epoch": 1.7993697478991597, "grad_norm": 10.895223535255173, "learning_rate": 4.13945135774155e-06, "loss": 1.4526193141937256, "step": 3426 }, { "epoch": 1.7998949579831933, "grad_norm": 15.280765866171025, "learning_rate": 4.136441097333683e-06, "loss": 2.054003953933716, "step": 3427 }, { "epoch": 1.800420168067227, "grad_norm": 9.246251364954835, "learning_rate": 4.133431159526631e-06, "loss": 2.2219438552856445, "step": 3428 }, { "epoch": 1.8009453781512605, "grad_norm": 13.129971947418955, "learning_rate": 4.13042154544482e-06, "loss": 1.625846266746521, "step": 3429 }, { "epoch": 1.8014705882352942, "grad_norm": 12.978736800632362, "learning_rate": 4.127412256212554e-06, "loss": 2.6511335372924805, "step": 3430 }, { "epoch": 1.8019957983193278, "grad_norm": 9.79788739331427, "learning_rate": 4.12440329295402e-06, "loss": 1.8296453952789307, "step": 3431 }, { "epoch": 1.8025210084033614, "grad_norm": 9.167691812725113, "learning_rate": 4.121394656793279e-06, "loss": 1.5473147630691528, "step": 3432 }, { "epoch": 1.803046218487395, "grad_norm": 16.886183234527234, "learning_rate": 4.1183863488542686e-06, "loss": 1.3791701793670654, "step": 3433 }, { "epoch": 1.8035714285714286, "grad_norm": 11.204075303756833, "learning_rate": 4.1153783702608105e-06, "loss": 2.1408419609069824, "step": 3434 }, { "epoch": 1.8040966386554622, "grad_norm": 10.731028395399425, "learning_rate": 4.112370722136597e-06, "loss": 1.5430549383163452, "step": 3435 }, { "epoch": 1.8046218487394958, "grad_norm": 9.869315215658188, "learning_rate": 4.109363405605198e-06, "loss": 1.401909351348877, "step": 3436 }, { "epoch": 1.8051470588235294, "grad_norm": 9.031705262630664, "learning_rate": 4.106356421790062e-06, "loss": 1.6186398267745972, "step": 3437 }, { "epoch": 1.805672268907563, "grad_norm": 11.210346683322042, "learning_rate": 4.103349771814512e-06, "loss": 0.7343453168869019, "step": 3438 }, { "epoch": 1.8061974789915967, "grad_norm": 9.795269402981832, "learning_rate": 4.100343456801747e-06, "loss": 1.707342505455017, "step": 3439 }, { "epoch": 1.8067226890756303, "grad_norm": 9.607912720799668, "learning_rate": 4.0973374778748385e-06, "loss": 2.2637925148010254, "step": 3440 }, { "epoch": 1.8072478991596639, "grad_norm": 9.30696000541257, "learning_rate": 4.094331836156732e-06, "loss": 1.817786455154419, "step": 3441 }, { "epoch": 1.8077731092436975, "grad_norm": 9.120924809969319, "learning_rate": 4.091326532770253e-06, "loss": 1.3939745426177979, "step": 3442 }, { "epoch": 1.808298319327731, "grad_norm": 12.791618153069194, "learning_rate": 4.088321568838095e-06, "loss": 1.0895910263061523, "step": 3443 }, { "epoch": 1.8088235294117647, "grad_norm": 15.805091519866417, "learning_rate": 4.0853169454828245e-06, "loss": 1.1233513355255127, "step": 3444 }, { "epoch": 1.8093487394957983, "grad_norm": 9.67114073383788, "learning_rate": 4.082312663826886e-06, "loss": 1.3852238655090332, "step": 3445 }, { "epoch": 1.809873949579832, "grad_norm": 13.11357620584965, "learning_rate": 4.07930872499259e-06, "loss": 1.895456075668335, "step": 3446 }, { "epoch": 1.8103991596638656, "grad_norm": 16.472186007534404, "learning_rate": 4.0763051301021225e-06, "loss": 1.219724416732788, "step": 3447 }, { "epoch": 1.8109243697478992, "grad_norm": 13.682498737920504, "learning_rate": 4.07330188027754e-06, "loss": 1.4474141597747803, "step": 3448 }, { "epoch": 1.8114495798319328, "grad_norm": 16.945705699886382, "learning_rate": 4.070298976640772e-06, "loss": 1.7105793952941895, "step": 3449 }, { "epoch": 1.8119747899159664, "grad_norm": 13.491181501760696, "learning_rate": 4.067296420313616e-06, "loss": 1.7510032653808594, "step": 3450 }, { "epoch": 1.8125, "grad_norm": 9.557034896649517, "learning_rate": 4.0642942124177405e-06, "loss": 1.811830997467041, "step": 3451 }, { "epoch": 1.8130252100840336, "grad_norm": 6.855976914768561, "learning_rate": 4.061292354074683e-06, "loss": 1.4671962261199951, "step": 3452 }, { "epoch": 1.8135504201680672, "grad_norm": 10.792420388671536, "learning_rate": 4.058290846405856e-06, "loss": 1.3182471990585327, "step": 3453 }, { "epoch": 1.8140756302521008, "grad_norm": 23.892205682541743, "learning_rate": 4.055289690532533e-06, "loss": 1.6939082145690918, "step": 3454 }, { "epoch": 1.8146008403361344, "grad_norm": 7.0967763367433685, "learning_rate": 4.052288887575859e-06, "loss": 1.5236637592315674, "step": 3455 }, { "epoch": 1.815126050420168, "grad_norm": 10.597848400874861, "learning_rate": 4.04928843865685e-06, "loss": 1.867397427558899, "step": 3456 }, { "epoch": 1.8156512605042017, "grad_norm": 11.652814350156293, "learning_rate": 4.046288344896388e-06, "loss": 1.8498456478118896, "step": 3457 }, { "epoch": 1.8161764705882353, "grad_norm": 13.034029284683328, "learning_rate": 4.043288607415219e-06, "loss": 1.4994248151779175, "step": 3458 }, { "epoch": 1.816701680672269, "grad_norm": 12.089453086650884, "learning_rate": 4.040289227333961e-06, "loss": 1.8718979358673096, "step": 3459 }, { "epoch": 1.8172268907563025, "grad_norm": 14.745646890122712, "learning_rate": 4.0372902057730965e-06, "loss": 1.2499297857284546, "step": 3460 }, { "epoch": 1.8177521008403361, "grad_norm": 8.929403476902454, "learning_rate": 4.034291543852973e-06, "loss": 1.5101208686828613, "step": 3461 }, { "epoch": 1.8182773109243697, "grad_norm": 12.343841661330574, "learning_rate": 4.031293242693804e-06, "loss": 2.445464611053467, "step": 3462 }, { "epoch": 1.8188025210084033, "grad_norm": 12.469468961118855, "learning_rate": 4.02829530341567e-06, "loss": 1.645592451095581, "step": 3463 }, { "epoch": 1.819327731092437, "grad_norm": 9.840940280615914, "learning_rate": 4.025297727138515e-06, "loss": 0.6531387567520142, "step": 3464 }, { "epoch": 1.8198529411764706, "grad_norm": 11.69707365477793, "learning_rate": 4.022300514982146e-06, "loss": 1.9656018018722534, "step": 3465 }, { "epoch": 1.8203781512605042, "grad_norm": 14.598663611267417, "learning_rate": 4.019303668066238e-06, "loss": 1.7297413349151611, "step": 3466 }, { "epoch": 1.8209033613445378, "grad_norm": 8.425037055214741, "learning_rate": 4.016307187510323e-06, "loss": 1.152799367904663, "step": 3467 }, { "epoch": 1.8214285714285714, "grad_norm": 10.563128855927673, "learning_rate": 4.013311074433804e-06, "loss": 1.5894992351531982, "step": 3468 }, { "epoch": 1.821953781512605, "grad_norm": 15.959628200021928, "learning_rate": 4.010315329955941e-06, "loss": 3.25376558303833, "step": 3469 }, { "epoch": 1.8224789915966386, "grad_norm": 7.335622259363143, "learning_rate": 4.007319955195857e-06, "loss": 1.4387871026992798, "step": 3470 }, { "epoch": 1.8230042016806722, "grad_norm": 17.588397483196, "learning_rate": 4.004324951272542e-06, "loss": 2.3955492973327637, "step": 3471 }, { "epoch": 1.8235294117647058, "grad_norm": 12.966950041118393, "learning_rate": 4.001330319304839e-06, "loss": 1.4235239028930664, "step": 3472 }, { "epoch": 1.8240546218487395, "grad_norm": 9.704952732966582, "learning_rate": 3.998336060411459e-06, "loss": 1.40226411819458, "step": 3473 }, { "epoch": 1.824579831932773, "grad_norm": 10.06606318007555, "learning_rate": 3.99534217571097e-06, "loss": 1.7859091758728027, "step": 3474 }, { "epoch": 1.8251050420168067, "grad_norm": 10.228912141924729, "learning_rate": 3.992348666321803e-06, "loss": 1.8350114822387695, "step": 3475 }, { "epoch": 1.8256302521008403, "grad_norm": 11.99436743543487, "learning_rate": 3.989355533362246e-06, "loss": 1.480570912361145, "step": 3476 }, { "epoch": 1.826155462184874, "grad_norm": 10.298924229721873, "learning_rate": 3.986362777950448e-06, "loss": 1.5259010791778564, "step": 3477 }, { "epoch": 1.8266806722689075, "grad_norm": 12.035066712497297, "learning_rate": 3.983370401204415e-06, "loss": 1.3747330904006958, "step": 3478 }, { "epoch": 1.8272058823529411, "grad_norm": 8.072435006498589, "learning_rate": 3.980378404242014e-06, "loss": 1.5108224153518677, "step": 3479 }, { "epoch": 1.8277310924369747, "grad_norm": 20.28431216097508, "learning_rate": 3.9773867881809705e-06, "loss": 1.5896763801574707, "step": 3480 }, { "epoch": 1.8282563025210083, "grad_norm": 8.495392722718748, "learning_rate": 3.9743955541388645e-06, "loss": 1.28151535987854, "step": 3481 }, { "epoch": 1.8287815126050422, "grad_norm": 11.6212349030481, "learning_rate": 3.971404703233137e-06, "loss": 2.096107006072998, "step": 3482 }, { "epoch": 1.8293067226890756, "grad_norm": 10.387461788306586, "learning_rate": 3.968414236581083e-06, "loss": 1.4754010438919067, "step": 3483 }, { "epoch": 1.8298319327731094, "grad_norm": 12.209172961359403, "learning_rate": 3.965424155299854e-06, "loss": 1.67756986618042, "step": 3484 }, { "epoch": 1.8303571428571428, "grad_norm": 6.670216689112044, "learning_rate": 3.962434460506459e-06, "loss": 1.754809021949768, "step": 3485 }, { "epoch": 1.8308823529411766, "grad_norm": 17.98808673936292, "learning_rate": 3.959445153317764e-06, "loss": 2.6715197563171387, "step": 3486 }, { "epoch": 1.83140756302521, "grad_norm": 13.205661357485651, "learning_rate": 3.956456234850487e-06, "loss": 1.5148085355758667, "step": 3487 }, { "epoch": 1.8319327731092439, "grad_norm": 8.802016780785296, "learning_rate": 3.953467706221202e-06, "loss": 1.7863610982894897, "step": 3488 }, { "epoch": 1.8324579831932772, "grad_norm": 10.038342880235064, "learning_rate": 3.950479568546336e-06, "loss": 0.9457213282585144, "step": 3489 }, { "epoch": 1.832983193277311, "grad_norm": 10.635568910590035, "learning_rate": 3.947491822942174e-06, "loss": 1.2064887285232544, "step": 3490 }, { "epoch": 1.8335084033613445, "grad_norm": 20.290554699412123, "learning_rate": 3.9445044705248525e-06, "loss": 1.7002025842666626, "step": 3491 }, { "epoch": 1.8340336134453783, "grad_norm": 12.039964517380843, "learning_rate": 3.941517512410357e-06, "loss": 1.3340742588043213, "step": 3492 }, { "epoch": 1.8345588235294117, "grad_norm": 8.364285713488247, "learning_rate": 3.938530949714533e-06, "loss": 1.5468111038208008, "step": 3493 }, { "epoch": 1.8350840336134455, "grad_norm": 8.638257087094814, "learning_rate": 3.935544783553072e-06, "loss": 1.4965511560440063, "step": 3494 }, { "epoch": 1.835609243697479, "grad_norm": 7.415632342102558, "learning_rate": 3.932559015041523e-06, "loss": 1.7324585914611816, "step": 3495 }, { "epoch": 1.8361344537815127, "grad_norm": 10.51269812806555, "learning_rate": 3.929573645295278e-06, "loss": 1.1609418392181396, "step": 3496 }, { "epoch": 1.8366596638655461, "grad_norm": 15.619137489213774, "learning_rate": 3.926588675429591e-06, "loss": 1.2084197998046875, "step": 3497 }, { "epoch": 1.83718487394958, "grad_norm": 12.066436472992145, "learning_rate": 3.9236041065595596e-06, "loss": 1.248793363571167, "step": 3498 }, { "epoch": 1.8377100840336134, "grad_norm": 9.580304526892963, "learning_rate": 3.920619939800131e-06, "loss": 1.2989832162857056, "step": 3499 }, { "epoch": 1.8382352941176472, "grad_norm": 6.726292567384109, "learning_rate": 3.917636176266105e-06, "loss": 1.5274391174316406, "step": 3500 }, { "epoch": 1.8387605042016806, "grad_norm": 6.147539842834988, "learning_rate": 3.914652817072132e-06, "loss": 1.577737808227539, "step": 3501 }, { "epoch": 1.8392857142857144, "grad_norm": 11.201507595836313, "learning_rate": 3.9116698633327076e-06, "loss": 1.575178861618042, "step": 3502 }, { "epoch": 1.8398109243697478, "grad_norm": 8.926186371349456, "learning_rate": 3.908687316162178e-06, "loss": 1.581723928451538, "step": 3503 }, { "epoch": 1.8403361344537816, "grad_norm": 11.629609827402959, "learning_rate": 3.905705176674736e-06, "loss": 0.8478154540061951, "step": 3504 }, { "epoch": 1.840861344537815, "grad_norm": 16.913352119962365, "learning_rate": 3.902723445984425e-06, "loss": 1.2822133302688599, "step": 3505 }, { "epoch": 1.8413865546218489, "grad_norm": 10.084626311786446, "learning_rate": 3.899742125205135e-06, "loss": 1.6672027111053467, "step": 3506 }, { "epoch": 1.8419117647058822, "grad_norm": 17.855317629102217, "learning_rate": 3.896761215450598e-06, "loss": 1.7304069995880127, "step": 3507 }, { "epoch": 1.842436974789916, "grad_norm": 11.73475424279721, "learning_rate": 3.8937807178344004e-06, "loss": 1.579456090927124, "step": 3508 }, { "epoch": 1.8429621848739495, "grad_norm": 9.635595518117883, "learning_rate": 3.890800633469968e-06, "loss": 2.3471951484680176, "step": 3509 }, { "epoch": 1.8434873949579833, "grad_norm": 18.969351341207172, "learning_rate": 3.887820963470575e-06, "loss": 2.481104850769043, "step": 3510 }, { "epoch": 1.8440126050420167, "grad_norm": 14.561536676382035, "learning_rate": 3.8848417089493416e-06, "loss": 1.7346417903900146, "step": 3511 }, { "epoch": 1.8445378151260505, "grad_norm": 14.412763962717026, "learning_rate": 3.881862871019232e-06, "loss": 1.4879249334335327, "step": 3512 }, { "epoch": 1.845063025210084, "grad_norm": 12.01648701659342, "learning_rate": 3.878884450793053e-06, "loss": 2.0206332206726074, "step": 3513 }, { "epoch": 1.8455882352941178, "grad_norm": 10.921786140963496, "learning_rate": 3.875906449383457e-06, "loss": 1.617268681526184, "step": 3514 }, { "epoch": 1.8461134453781511, "grad_norm": 13.363315665679083, "learning_rate": 3.872928867902941e-06, "loss": 1.9489585161209106, "step": 3515 }, { "epoch": 1.846638655462185, "grad_norm": 20.84039375418646, "learning_rate": 3.869951707463844e-06, "loss": 1.4352951049804688, "step": 3516 }, { "epoch": 1.8471638655462184, "grad_norm": 11.08121858346143, "learning_rate": 3.866974969178348e-06, "loss": 2.229334592819214, "step": 3517 }, { "epoch": 1.8476890756302522, "grad_norm": 9.248862539143877, "learning_rate": 3.863998654158473e-06, "loss": 1.4621502161026, "step": 3518 }, { "epoch": 1.8482142857142856, "grad_norm": 16.4704502139439, "learning_rate": 3.861022763516091e-06, "loss": 2.312044143676758, "step": 3519 }, { "epoch": 1.8487394957983194, "grad_norm": 8.787902743848047, "learning_rate": 3.858047298362905e-06, "loss": 1.7228615283966064, "step": 3520 }, { "epoch": 1.8492647058823528, "grad_norm": 9.96677635268189, "learning_rate": 3.855072259810465e-06, "loss": 1.3734462261199951, "step": 3521 }, { "epoch": 1.8497899159663866, "grad_norm": 10.009262130324313, "learning_rate": 3.852097648970159e-06, "loss": 2.19566011428833, "step": 3522 }, { "epoch": 1.85031512605042, "grad_norm": 7.524826516500948, "learning_rate": 3.849123466953217e-06, "loss": 1.8094148635864258, "step": 3523 }, { "epoch": 1.8508403361344539, "grad_norm": 6.928065305860581, "learning_rate": 3.846149714870709e-06, "loss": 1.797644019126892, "step": 3524 }, { "epoch": 1.8513655462184873, "grad_norm": 8.994073872711708, "learning_rate": 3.8431763938335415e-06, "loss": 1.77252197265625, "step": 3525 }, { "epoch": 1.851890756302521, "grad_norm": 12.96452628798218, "learning_rate": 3.840203504952462e-06, "loss": 1.1664371490478516, "step": 3526 }, { "epoch": 1.8524159663865545, "grad_norm": 18.83396205571419, "learning_rate": 3.837231049338057e-06, "loss": 1.7924728393554688, "step": 3527 }, { "epoch": 1.8529411764705883, "grad_norm": 15.45289613980103, "learning_rate": 3.834259028100753e-06, "loss": 1.0718178749084473, "step": 3528 }, { "epoch": 1.8534663865546217, "grad_norm": 16.842392788992143, "learning_rate": 3.831287442350806e-06, "loss": 1.0396767854690552, "step": 3529 }, { "epoch": 1.8539915966386555, "grad_norm": 8.299756594769104, "learning_rate": 3.828316293198321e-06, "loss": 1.5213674306869507, "step": 3530 }, { "epoch": 1.854516806722689, "grad_norm": 20.20417577357553, "learning_rate": 3.8253455817532305e-06, "loss": 1.7097625732421875, "step": 3531 }, { "epoch": 1.8550420168067228, "grad_norm": 6.797592140416188, "learning_rate": 3.822375309125309e-06, "loss": 1.6455717086791992, "step": 3532 }, { "epoch": 1.8555672268907561, "grad_norm": 19.2025588245244, "learning_rate": 3.819405476424164e-06, "loss": 1.8461918830871582, "step": 3533 }, { "epoch": 1.85609243697479, "grad_norm": 9.984185122289594, "learning_rate": 3.816436084759239e-06, "loss": 1.5979926586151123, "step": 3534 }, { "epoch": 1.8566176470588234, "grad_norm": 11.212003212034826, "learning_rate": 3.8134671352398157e-06, "loss": 2.200836181640625, "step": 3535 }, { "epoch": 1.8571428571428572, "grad_norm": 7.719172038204905, "learning_rate": 3.810498628975007e-06, "loss": 1.2535037994384766, "step": 3536 }, { "epoch": 1.8576680672268906, "grad_norm": 10.78374603181833, "learning_rate": 3.8075305670737605e-06, "loss": 1.2252781391143799, "step": 3537 }, { "epoch": 1.8581932773109244, "grad_norm": 21.675118208744227, "learning_rate": 3.804562950644861e-06, "loss": 1.6327319145202637, "step": 3538 }, { "epoch": 1.8587184873949578, "grad_norm": 10.943859360268, "learning_rate": 3.8015957807969247e-06, "loss": 1.930145502090454, "step": 3539 }, { "epoch": 1.8592436974789917, "grad_norm": 10.628317840655198, "learning_rate": 3.7986290586383995e-06, "loss": 1.1274309158325195, "step": 3540 }, { "epoch": 1.8597689075630253, "grad_norm": 14.694661522647797, "learning_rate": 3.795662785277568e-06, "loss": 1.229578971862793, "step": 3541 }, { "epoch": 1.8602941176470589, "grad_norm": 12.117851440355302, "learning_rate": 3.792696961822546e-06, "loss": 1.7704070806503296, "step": 3542 }, { "epoch": 1.8608193277310925, "grad_norm": 12.57542697006448, "learning_rate": 3.7897315893812796e-06, "loss": 1.3353325128555298, "step": 3543 }, { "epoch": 1.861344537815126, "grad_norm": 9.029109971955679, "learning_rate": 3.786766669061545e-06, "loss": 1.8322136402130127, "step": 3544 }, { "epoch": 1.8618697478991597, "grad_norm": 12.622751677546477, "learning_rate": 3.783802201970953e-06, "loss": 1.0906827449798584, "step": 3545 }, { "epoch": 1.8623949579831933, "grad_norm": 8.854469617171919, "learning_rate": 3.780838189216943e-06, "loss": 1.5446804761886597, "step": 3546 }, { "epoch": 1.862920168067227, "grad_norm": 14.372875903305284, "learning_rate": 3.7778746319067867e-06, "loss": 2.2716293334960938, "step": 3547 }, { "epoch": 1.8634453781512605, "grad_norm": 13.435472401663668, "learning_rate": 3.774911531147582e-06, "loss": 1.5395889282226562, "step": 3548 }, { "epoch": 1.8639705882352942, "grad_norm": 15.00366731481477, "learning_rate": 3.7719488880462596e-06, "loss": 1.8546674251556396, "step": 3549 }, { "epoch": 1.8644957983193278, "grad_norm": 11.27849678903175, "learning_rate": 3.7689867037095756e-06, "loss": 1.7650783061981201, "step": 3550 }, { "epoch": 1.8650210084033614, "grad_norm": 10.62881995178189, "learning_rate": 3.7660249792441197e-06, "loss": 1.1084260940551758, "step": 3551 }, { "epoch": 1.865546218487395, "grad_norm": 8.679418209805792, "learning_rate": 3.763063715756306e-06, "loss": 1.3064913749694824, "step": 3552 }, { "epoch": 1.8660714285714286, "grad_norm": 9.825975361259932, "learning_rate": 3.7601029143523767e-06, "loss": 1.6261317729949951, "step": 3553 }, { "epoch": 1.8665966386554622, "grad_norm": 13.022200849444895, "learning_rate": 3.7571425761384038e-06, "loss": 1.4149929285049438, "step": 3554 }, { "epoch": 1.8671218487394958, "grad_norm": 9.208151871527573, "learning_rate": 3.7541827022202838e-06, "loss": 1.4480232000350952, "step": 3555 }, { "epoch": 1.8676470588235294, "grad_norm": 20.116410208061595, "learning_rate": 3.751223293703741e-06, "loss": 1.8733075857162476, "step": 3556 }, { "epoch": 1.868172268907563, "grad_norm": 10.950879834930689, "learning_rate": 3.748264351694324e-06, "loss": 1.5732142925262451, "step": 3557 }, { "epoch": 1.8686974789915967, "grad_norm": 14.323168406084772, "learning_rate": 3.7453058772974115e-06, "loss": 2.260406732559204, "step": 3558 }, { "epoch": 1.8692226890756303, "grad_norm": 12.112730133012303, "learning_rate": 3.7423478716182026e-06, "loss": 1.5210075378417969, "step": 3559 }, { "epoch": 1.8697478991596639, "grad_norm": 9.728691238390134, "learning_rate": 3.7393903357617235e-06, "loss": 1.485840916633606, "step": 3560 }, { "epoch": 1.8702731092436975, "grad_norm": 7.182419658455595, "learning_rate": 3.7364332708328232e-06, "loss": 1.4217307567596436, "step": 3561 }, { "epoch": 1.870798319327731, "grad_norm": 17.93417450916989, "learning_rate": 3.7334766779361797e-06, "loss": 1.7334325313568115, "step": 3562 }, { "epoch": 1.8713235294117647, "grad_norm": 8.933173598649242, "learning_rate": 3.7305205581762895e-06, "loss": 1.5833280086517334, "step": 3563 }, { "epoch": 1.8718487394957983, "grad_norm": 11.260953763552077, "learning_rate": 3.727564912657472e-06, "loss": 1.8907787799835205, "step": 3564 }, { "epoch": 1.872373949579832, "grad_norm": 15.444795603802612, "learning_rate": 3.7246097424838746e-06, "loss": 1.2487103939056396, "step": 3565 }, { "epoch": 1.8728991596638656, "grad_norm": 20.851161433135722, "learning_rate": 3.721655048759464e-06, "loss": 1.5314972400665283, "step": 3566 }, { "epoch": 1.8734243697478992, "grad_norm": 10.964787905031088, "learning_rate": 3.718700832588027e-06, "loss": 1.34340238571167, "step": 3567 }, { "epoch": 1.8739495798319328, "grad_norm": 13.500889527296094, "learning_rate": 3.715747095073173e-06, "loss": 1.7862898111343384, "step": 3568 }, { "epoch": 1.8744747899159664, "grad_norm": 8.227690269926454, "learning_rate": 3.712793837318338e-06, "loss": 1.5169625282287598, "step": 3569 }, { "epoch": 1.875, "grad_norm": 11.56621403964026, "learning_rate": 3.709841060426771e-06, "loss": 2.2738161087036133, "step": 3570 }, { "epoch": 1.8755252100840336, "grad_norm": 22.653913600727332, "learning_rate": 3.706888765501545e-06, "loss": 1.3991615772247314, "step": 3571 }, { "epoch": 1.8760504201680672, "grad_norm": 9.38772465128983, "learning_rate": 3.7039369536455525e-06, "loss": 1.4856879711151123, "step": 3572 }, { "epoch": 1.8765756302521008, "grad_norm": 12.572089255662306, "learning_rate": 3.7009856259615074e-06, "loss": 1.2467734813690186, "step": 3573 }, { "epoch": 1.8771008403361344, "grad_norm": 12.134114511683737, "learning_rate": 3.698034783551939e-06, "loss": 1.200655460357666, "step": 3574 }, { "epoch": 1.877626050420168, "grad_norm": 10.027598369525876, "learning_rate": 3.6950844275191973e-06, "loss": 1.10594642162323, "step": 3575 }, { "epoch": 1.8781512605042017, "grad_norm": 13.63807690445431, "learning_rate": 3.6921345589654524e-06, "loss": 1.6851091384887695, "step": 3576 }, { "epoch": 1.8786764705882353, "grad_norm": 15.168541909229665, "learning_rate": 3.689185178992689e-06, "loss": 1.5337399244308472, "step": 3577 }, { "epoch": 1.879201680672269, "grad_norm": 12.826929763580086, "learning_rate": 3.686236288702712e-06, "loss": 1.040381908416748, "step": 3578 }, { "epoch": 1.8797268907563025, "grad_norm": 5.720647829190294, "learning_rate": 3.68328788919714e-06, "loss": 1.0834476947784424, "step": 3579 }, { "epoch": 1.8802521008403361, "grad_norm": 7.91033875802829, "learning_rate": 3.6803399815774133e-06, "loss": 1.9564454555511475, "step": 3580 }, { "epoch": 1.8807773109243697, "grad_norm": 9.930278788362486, "learning_rate": 3.677392566944783e-06, "loss": 1.725341558456421, "step": 3581 }, { "epoch": 1.8813025210084033, "grad_norm": 9.461795901718977, "learning_rate": 3.674445646400321e-06, "loss": 1.4589251279830933, "step": 3582 }, { "epoch": 1.881827731092437, "grad_norm": 12.069724162160428, "learning_rate": 3.6714992210449084e-06, "loss": 1.4926224946975708, "step": 3583 }, { "epoch": 1.8823529411764706, "grad_norm": 6.813433444255876, "learning_rate": 3.66855329197925e-06, "loss": 0.6729644536972046, "step": 3584 }, { "epoch": 1.8828781512605042, "grad_norm": 12.39938011433959, "learning_rate": 3.665607860303857e-06, "loss": 1.424481749534607, "step": 3585 }, { "epoch": 1.8834033613445378, "grad_norm": 12.043233977310381, "learning_rate": 3.6626629271190594e-06, "loss": 1.4678826332092285, "step": 3586 }, { "epoch": 1.8839285714285714, "grad_norm": 17.906264162364973, "learning_rate": 3.6597184935249986e-06, "loss": 1.7656259536743164, "step": 3587 }, { "epoch": 1.884453781512605, "grad_norm": 10.028723828386402, "learning_rate": 3.656774560621632e-06, "loss": 1.4614261388778687, "step": 3588 }, { "epoch": 1.8849789915966386, "grad_norm": 10.62126280003491, "learning_rate": 3.653831129508727e-06, "loss": 1.4474071264266968, "step": 3589 }, { "epoch": 1.8855042016806722, "grad_norm": 11.587557348981067, "learning_rate": 3.6508882012858647e-06, "loss": 0.8009591102600098, "step": 3590 }, { "epoch": 1.8860294117647058, "grad_norm": 9.23527206371919, "learning_rate": 3.6479457770524413e-06, "loss": 1.29331636428833, "step": 3591 }, { "epoch": 1.8865546218487395, "grad_norm": 11.342049018828623, "learning_rate": 3.6450038579076595e-06, "loss": 1.700790286064148, "step": 3592 }, { "epoch": 1.887079831932773, "grad_norm": 12.725170037155792, "learning_rate": 3.642062444950537e-06, "loss": 1.4710191488265991, "step": 3593 }, { "epoch": 1.8876050420168067, "grad_norm": 19.69353183373372, "learning_rate": 3.6391215392798994e-06, "loss": 1.856687068939209, "step": 3594 }, { "epoch": 1.8881302521008403, "grad_norm": 12.660195112849834, "learning_rate": 3.636181141994387e-06, "loss": 1.7518823146820068, "step": 3595 }, { "epoch": 1.888655462184874, "grad_norm": 13.469637240764241, "learning_rate": 3.6332412541924473e-06, "loss": 2.629429340362549, "step": 3596 }, { "epoch": 1.8891806722689075, "grad_norm": 14.229616913477427, "learning_rate": 3.630301876972337e-06, "loss": 1.3876407146453857, "step": 3597 }, { "epoch": 1.8897058823529411, "grad_norm": 11.029448677979465, "learning_rate": 3.6273630114321223e-06, "loss": 2.240974187850952, "step": 3598 }, { "epoch": 1.8902310924369747, "grad_norm": 10.6206510207668, "learning_rate": 3.624424658669682e-06, "loss": 1.4731332063674927, "step": 3599 }, { "epoch": 1.8907563025210083, "grad_norm": 12.444114612667324, "learning_rate": 3.621486819782698e-06, "loss": 1.0779138803482056, "step": 3600 }, { "epoch": 1.8912815126050422, "grad_norm": 8.349489876337945, "learning_rate": 3.618549495868662e-06, "loss": 1.559354305267334, "step": 3601 }, { "epoch": 1.8918067226890756, "grad_norm": 8.863323426779825, "learning_rate": 3.6156126880248765e-06, "loss": 1.8395235538482666, "step": 3602 }, { "epoch": 1.8923319327731094, "grad_norm": 15.85107512087656, "learning_rate": 3.612676397348447e-06, "loss": 1.1617748737335205, "step": 3603 }, { "epoch": 1.8928571428571428, "grad_norm": 15.80706900262539, "learning_rate": 3.6097406249362877e-06, "loss": 1.8298149108886719, "step": 3604 }, { "epoch": 1.8933823529411766, "grad_norm": 8.974253810745306, "learning_rate": 3.606805371885117e-06, "loss": 1.0484018325805664, "step": 3605 }, { "epoch": 1.89390756302521, "grad_norm": 11.802935511606998, "learning_rate": 3.6038706392914647e-06, "loss": 1.3665120601654053, "step": 3606 }, { "epoch": 1.8944327731092439, "grad_norm": 12.486120252985891, "learning_rate": 3.60093642825166e-06, "loss": 1.7603936195373535, "step": 3607 }, { "epoch": 1.8949579831932772, "grad_norm": 10.979063442675901, "learning_rate": 3.598002739861841e-06, "loss": 1.2447429895401, "step": 3608 }, { "epoch": 1.895483193277311, "grad_norm": 11.24906092027409, "learning_rate": 3.5950695752179487e-06, "loss": 2.0604703426361084, "step": 3609 }, { "epoch": 1.8960084033613445, "grad_norm": 9.453625736292222, "learning_rate": 3.5921369354157303e-06, "loss": 1.775797724723816, "step": 3610 }, { "epoch": 1.8965336134453783, "grad_norm": 7.1879898781777785, "learning_rate": 3.5892048215507356e-06, "loss": 1.567115306854248, "step": 3611 }, { "epoch": 1.8970588235294117, "grad_norm": 5.651610017591024, "learning_rate": 3.5862732347183165e-06, "loss": 1.0143564939498901, "step": 3612 }, { "epoch": 1.8975840336134455, "grad_norm": 8.618979134107734, "learning_rate": 3.5833421760136323e-06, "loss": 2.064939022064209, "step": 3613 }, { "epoch": 1.898109243697479, "grad_norm": 10.291790714204135, "learning_rate": 3.580411646531641e-06, "loss": 1.848331093788147, "step": 3614 }, { "epoch": 1.8986344537815127, "grad_norm": 7.4177590835040395, "learning_rate": 3.5774816473671037e-06, "loss": 1.7782447338104248, "step": 3615 }, { "epoch": 1.8991596638655461, "grad_norm": 10.985342350821906, "learning_rate": 3.574552179614584e-06, "loss": 1.1551049947738647, "step": 3616 }, { "epoch": 1.89968487394958, "grad_norm": 12.559700565722542, "learning_rate": 3.5716232443684486e-06, "loss": 1.6754170656204224, "step": 3617 }, { "epoch": 1.9002100840336134, "grad_norm": 10.86033585411204, "learning_rate": 3.568694842722863e-06, "loss": 1.5450774431228638, "step": 3618 }, { "epoch": 1.9007352941176472, "grad_norm": 11.226671306631834, "learning_rate": 3.5657669757717927e-06, "loss": 2.153855323791504, "step": 3619 }, { "epoch": 1.9012605042016806, "grad_norm": 21.87746772998472, "learning_rate": 3.562839644609005e-06, "loss": 1.87334406375885, "step": 3620 }, { "epoch": 1.9017857142857144, "grad_norm": 8.756738357423524, "learning_rate": 3.559912850328069e-06, "loss": 1.6386264562606812, "step": 3621 }, { "epoch": 1.9023109243697478, "grad_norm": 7.400197604584446, "learning_rate": 3.5569865940223492e-06, "loss": 0.8230345249176025, "step": 3622 }, { "epoch": 1.9028361344537816, "grad_norm": 12.103140883663276, "learning_rate": 3.5540608767850106e-06, "loss": 1.4310319423675537, "step": 3623 }, { "epoch": 1.903361344537815, "grad_norm": 7.910084852656171, "learning_rate": 3.5511356997090176e-06, "loss": 1.5238559246063232, "step": 3624 }, { "epoch": 1.9038865546218489, "grad_norm": 13.138582623231445, "learning_rate": 3.5482110638871325e-06, "loss": 1.183509111404419, "step": 3625 }, { "epoch": 1.9044117647058822, "grad_norm": 9.591732145752783, "learning_rate": 3.5452869704119156e-06, "loss": 1.066697120666504, "step": 3626 }, { "epoch": 1.904936974789916, "grad_norm": 12.416841644173525, "learning_rate": 3.5423634203757235e-06, "loss": 1.1212185621261597, "step": 3627 }, { "epoch": 1.9054621848739495, "grad_norm": 8.640970310864347, "learning_rate": 3.539440414870712e-06, "loss": 1.0890882015228271, "step": 3628 }, { "epoch": 1.9059873949579833, "grad_norm": 14.555649963630264, "learning_rate": 3.5365179549888306e-06, "loss": 1.2889267206192017, "step": 3629 }, { "epoch": 1.9065126050420167, "grad_norm": 11.4173030386502, "learning_rate": 3.5335960418218272e-06, "loss": 1.409904956817627, "step": 3630 }, { "epoch": 1.9070378151260505, "grad_norm": 8.089996616310701, "learning_rate": 3.5306746764612433e-06, "loss": 2.159165143966675, "step": 3631 }, { "epoch": 1.907563025210084, "grad_norm": 15.391522922837247, "learning_rate": 3.527753859998419e-06, "loss": 2.1786513328552246, "step": 3632 }, { "epoch": 1.9080882352941178, "grad_norm": 13.936825236031506, "learning_rate": 3.524833593524487e-06, "loss": 1.3346937894821167, "step": 3633 }, { "epoch": 1.9086134453781511, "grad_norm": 8.008434850996032, "learning_rate": 3.5219138781303743e-06, "loss": 1.6728880405426025, "step": 3634 }, { "epoch": 1.909138655462185, "grad_norm": 8.091000502435186, "learning_rate": 3.5189947149068028e-06, "loss": 1.7466754913330078, "step": 3635 }, { "epoch": 1.9096638655462184, "grad_norm": 14.247421381338212, "learning_rate": 3.5160761049442887e-06, "loss": 2.038893461227417, "step": 3636 }, { "epoch": 1.9101890756302522, "grad_norm": 26.21073194103894, "learning_rate": 3.513158049333141e-06, "loss": 1.2838835716247559, "step": 3637 }, { "epoch": 1.9107142857142856, "grad_norm": 21.439346341804875, "learning_rate": 3.51024054916346e-06, "loss": 2.3123278617858887, "step": 3638 }, { "epoch": 1.9112394957983194, "grad_norm": 14.333166847254372, "learning_rate": 3.5073236055251425e-06, "loss": 1.8338489532470703, "step": 3639 }, { "epoch": 1.9117647058823528, "grad_norm": 16.659338932005724, "learning_rate": 3.504407219507873e-06, "loss": 1.7070486545562744, "step": 3640 }, { "epoch": 1.9122899159663866, "grad_norm": 7.982948890385432, "learning_rate": 3.50149139220113e-06, "loss": 0.487301766872406, "step": 3641 }, { "epoch": 1.91281512605042, "grad_norm": 9.460227929108488, "learning_rate": 3.4985761246941825e-06, "loss": 2.13903546333313, "step": 3642 }, { "epoch": 1.9133403361344539, "grad_norm": 15.804121583915196, "learning_rate": 3.4956614180760918e-06, "loss": 1.2893824577331543, "step": 3643 }, { "epoch": 1.9138655462184873, "grad_norm": 12.95136446520394, "learning_rate": 3.492747273435708e-06, "loss": 2.1587581634521484, "step": 3644 }, { "epoch": 1.914390756302521, "grad_norm": 9.512014326052435, "learning_rate": 3.4898336918616726e-06, "loss": 1.3404757976531982, "step": 3645 }, { "epoch": 1.9149159663865545, "grad_norm": 10.877528867749788, "learning_rate": 3.4869206744424145e-06, "loss": 1.6557071208953857, "step": 3646 }, { "epoch": 1.9154411764705883, "grad_norm": 15.992389857564886, "learning_rate": 3.4840082222661543e-06, "loss": 1.7658555507659912, "step": 3647 }, { "epoch": 1.9159663865546217, "grad_norm": 16.751926277390957, "learning_rate": 3.481096336420901e-06, "loss": 1.9302045106887817, "step": 3648 }, { "epoch": 1.9164915966386555, "grad_norm": 11.168683662508885, "learning_rate": 3.47818501799445e-06, "loss": 1.350619912147522, "step": 3649 }, { "epoch": 1.917016806722689, "grad_norm": 10.95598176963718, "learning_rate": 3.475274268074388e-06, "loss": 1.9971766471862793, "step": 3650 }, { "epoch": 1.9175420168067228, "grad_norm": 8.588886644464463, "learning_rate": 3.4723640877480875e-06, "loss": 1.612987756729126, "step": 3651 }, { "epoch": 1.9180672268907561, "grad_norm": 7.961264639918314, "learning_rate": 3.4694544781027072e-06, "loss": 1.1771678924560547, "step": 3652 }, { "epoch": 1.91859243697479, "grad_norm": 16.40130897804457, "learning_rate": 3.4665454402251937e-06, "loss": 1.7718167304992676, "step": 3653 }, { "epoch": 1.9191176470588234, "grad_norm": 15.179397950212735, "learning_rate": 3.4636369752022814e-06, "loss": 2.4633116722106934, "step": 3654 }, { "epoch": 1.9196428571428572, "grad_norm": 16.019901833368834, "learning_rate": 3.460729084120488e-06, "loss": 1.489286184310913, "step": 3655 }, { "epoch": 1.9201680672268906, "grad_norm": 9.79445114832196, "learning_rate": 3.4578217680661197e-06, "loss": 0.9999767541885376, "step": 3656 }, { "epoch": 1.9206932773109244, "grad_norm": 12.70204532003881, "learning_rate": 3.4549150281252635e-06, "loss": 1.3212666511535645, "step": 3657 }, { "epoch": 1.9212184873949578, "grad_norm": 10.65781127575427, "learning_rate": 3.452008865383797e-06, "loss": 1.320206642150879, "step": 3658 }, { "epoch": 1.9217436974789917, "grad_norm": 8.639190660447014, "learning_rate": 3.4491032809273784e-06, "loss": 0.8942955136299133, "step": 3659 }, { "epoch": 1.9222689075630253, "grad_norm": 6.963753783884818, "learning_rate": 3.4461982758414492e-06, "loss": 1.3711848258972168, "step": 3660 }, { "epoch": 1.9227941176470589, "grad_norm": 9.051633566956067, "learning_rate": 3.443293851211237e-06, "loss": 1.5953480005264282, "step": 3661 }, { "epoch": 1.9233193277310925, "grad_norm": 11.05506440209341, "learning_rate": 3.440390008121751e-06, "loss": 1.541900634765625, "step": 3662 }, { "epoch": 1.923844537815126, "grad_norm": 8.016742561486975, "learning_rate": 3.437486747657785e-06, "loss": 1.6536552906036377, "step": 3663 }, { "epoch": 1.9243697478991597, "grad_norm": 15.532138363683787, "learning_rate": 3.4345840709039113e-06, "loss": 1.8288671970367432, "step": 3664 }, { "epoch": 1.9248949579831933, "grad_norm": 15.628526432519445, "learning_rate": 3.4316819789444893e-06, "loss": 1.838546872138977, "step": 3665 }, { "epoch": 1.925420168067227, "grad_norm": 9.284027106376742, "learning_rate": 3.428780472863656e-06, "loss": 1.7238937616348267, "step": 3666 }, { "epoch": 1.9259453781512605, "grad_norm": 6.815635948477541, "learning_rate": 3.4258795537453305e-06, "loss": 1.6593295335769653, "step": 3667 }, { "epoch": 1.9264705882352942, "grad_norm": 14.041982862594873, "learning_rate": 3.4229792226732124e-06, "loss": 1.732300043106079, "step": 3668 }, { "epoch": 1.9269957983193278, "grad_norm": 8.674614768493917, "learning_rate": 3.4200794807307834e-06, "loss": 1.424330472946167, "step": 3669 }, { "epoch": 1.9275210084033614, "grad_norm": 16.168015564998782, "learning_rate": 3.4171803290013038e-06, "loss": 1.5260021686553955, "step": 3670 }, { "epoch": 1.928046218487395, "grad_norm": 13.99685376929219, "learning_rate": 3.4142817685678128e-06, "loss": 1.8458993434906006, "step": 3671 }, { "epoch": 1.9285714285714286, "grad_norm": 10.859423445634478, "learning_rate": 3.4113838005131285e-06, "loss": 0.9052958488464355, "step": 3672 }, { "epoch": 1.9290966386554622, "grad_norm": 18.227962265786484, "learning_rate": 3.40848642591985e-06, "loss": 1.5667917728424072, "step": 3673 }, { "epoch": 1.9296218487394958, "grad_norm": 9.098874736763168, "learning_rate": 3.405589645870354e-06, "loss": 1.3228158950805664, "step": 3674 }, { "epoch": 1.9301470588235294, "grad_norm": 13.506506614700806, "learning_rate": 3.4026934614467916e-06, "loss": 1.3141610622406006, "step": 3675 }, { "epoch": 1.930672268907563, "grad_norm": 15.543212721499522, "learning_rate": 3.3997978737310964e-06, "loss": 1.7305514812469482, "step": 3676 }, { "epoch": 1.9311974789915967, "grad_norm": 12.347081280456088, "learning_rate": 3.3969028838049765e-06, "loss": 1.2748572826385498, "step": 3677 }, { "epoch": 1.9317226890756303, "grad_norm": 12.842481440834034, "learning_rate": 3.394008492749917e-06, "loss": 1.6242189407348633, "step": 3678 }, { "epoch": 1.9322478991596639, "grad_norm": 11.68316370010674, "learning_rate": 3.3911147016471784e-06, "loss": 1.629981517791748, "step": 3679 }, { "epoch": 1.9327731092436975, "grad_norm": 9.315271218726362, "learning_rate": 3.3882215115777995e-06, "loss": 1.5515351295471191, "step": 3680 }, { "epoch": 1.933298319327731, "grad_norm": 9.92727628547744, "learning_rate": 3.3853289236225917e-06, "loss": 1.4925764799118042, "step": 3681 }, { "epoch": 1.9338235294117647, "grad_norm": 14.633372091900995, "learning_rate": 3.3824369388621435e-06, "loss": 2.2880353927612305, "step": 3682 }, { "epoch": 1.9343487394957983, "grad_norm": 9.454464207090755, "learning_rate": 3.379545558376816e-06, "loss": 1.442641258239746, "step": 3683 }, { "epoch": 1.934873949579832, "grad_norm": 6.088166946496283, "learning_rate": 3.376654783246749e-06, "loss": 1.7409708499908447, "step": 3684 }, { "epoch": 1.9353991596638656, "grad_norm": 9.464198513423897, "learning_rate": 3.37376461455185e-06, "loss": 2.063614845275879, "step": 3685 }, { "epoch": 1.9359243697478992, "grad_norm": 19.974339072441122, "learning_rate": 3.3708750533718037e-06, "loss": 2.40267014503479, "step": 3686 }, { "epoch": 1.9364495798319328, "grad_norm": 15.103949152786987, "learning_rate": 3.367986100786069e-06, "loss": 1.6338622570037842, "step": 3687 }, { "epoch": 1.9369747899159664, "grad_norm": 9.64007048692211, "learning_rate": 3.365097757873874e-06, "loss": 1.9974913597106934, "step": 3688 }, { "epoch": 1.9375, "grad_norm": 11.082012391675923, "learning_rate": 3.362210025714222e-06, "loss": 1.8873059749603271, "step": 3689 }, { "epoch": 1.9380252100840336, "grad_norm": 11.607456697270916, "learning_rate": 3.3593229053858846e-06, "loss": 1.1598262786865234, "step": 3690 }, { "epoch": 1.9385504201680672, "grad_norm": 13.096992239545292, "learning_rate": 3.356436397967409e-06, "loss": 1.7868146896362305, "step": 3691 }, { "epoch": 1.9390756302521008, "grad_norm": 13.043105799818077, "learning_rate": 3.353550504537111e-06, "loss": 1.3639363050460815, "step": 3692 }, { "epoch": 1.9396008403361344, "grad_norm": 18.45181622734499, "learning_rate": 3.350665226173078e-06, "loss": 1.4068472385406494, "step": 3693 }, { "epoch": 1.940126050420168, "grad_norm": 9.678660726074634, "learning_rate": 3.347780563953165e-06, "loss": 1.2107820510864258, "step": 3694 }, { "epoch": 1.9406512605042017, "grad_norm": 13.992355103663318, "learning_rate": 3.344896518955002e-06, "loss": 1.444906234741211, "step": 3695 }, { "epoch": 1.9411764705882353, "grad_norm": 13.235624185392497, "learning_rate": 3.3420130922559848e-06, "loss": 2.010563611984253, "step": 3696 }, { "epoch": 1.941701680672269, "grad_norm": 12.480438335315137, "learning_rate": 3.339130284933276e-06, "loss": 1.2893167734146118, "step": 3697 }, { "epoch": 1.9422268907563025, "grad_norm": 12.135467993135597, "learning_rate": 3.336248098063812e-06, "loss": 2.6332337856292725, "step": 3698 }, { "epoch": 1.9427521008403361, "grad_norm": 11.024369575537296, "learning_rate": 3.3333665327242965e-06, "loss": 2.1515727043151855, "step": 3699 }, { "epoch": 1.9432773109243697, "grad_norm": 13.106453011431299, "learning_rate": 3.330485589991197e-06, "loss": 1.2165805101394653, "step": 3700 }, { "epoch": 1.9438025210084033, "grad_norm": 13.85414619376918, "learning_rate": 3.327605270940751e-06, "loss": 1.3357783555984497, "step": 3701 }, { "epoch": 1.944327731092437, "grad_norm": 9.926190940706347, "learning_rate": 3.324725576648965e-06, "loss": 2.4131393432617188, "step": 3702 }, { "epoch": 1.9448529411764706, "grad_norm": 11.354582720037861, "learning_rate": 3.321846508191609e-06, "loss": 1.106598138809204, "step": 3703 }, { "epoch": 1.9453781512605042, "grad_norm": 10.70693421051955, "learning_rate": 3.3189680666442205e-06, "loss": 1.2720839977264404, "step": 3704 }, { "epoch": 1.9459033613445378, "grad_norm": 10.698068482218543, "learning_rate": 3.316090253082101e-06, "loss": 1.9747552871704102, "step": 3705 }, { "epoch": 1.9464285714285714, "grad_norm": 11.248755062109401, "learning_rate": 3.31321306858032e-06, "loss": 1.2248642444610596, "step": 3706 }, { "epoch": 1.946953781512605, "grad_norm": 12.4722586013678, "learning_rate": 3.3103365142137128e-06, "loss": 1.333898663520813, "step": 3707 }, { "epoch": 1.9474789915966386, "grad_norm": 10.862660634097313, "learning_rate": 3.307460591056877e-06, "loss": 1.921349048614502, "step": 3708 }, { "epoch": 1.9480042016806722, "grad_norm": 20.307690767329017, "learning_rate": 3.304585300184173e-06, "loss": 1.4340145587921143, "step": 3709 }, { "epoch": 1.9485294117647058, "grad_norm": 38.48257168199678, "learning_rate": 3.3017106426697286e-06, "loss": 1.2401962280273438, "step": 3710 }, { "epoch": 1.9490546218487395, "grad_norm": 8.868820101035617, "learning_rate": 3.2988366195874335e-06, "loss": 1.4671754837036133, "step": 3711 }, { "epoch": 1.949579831932773, "grad_norm": 10.470972382528588, "learning_rate": 3.2959632320109385e-06, "loss": 1.9609804153442383, "step": 3712 }, { "epoch": 1.9501050420168067, "grad_norm": 9.193010059542639, "learning_rate": 3.293090481013661e-06, "loss": 1.9202802181243896, "step": 3713 }, { "epoch": 1.9506302521008403, "grad_norm": 10.5872646772965, "learning_rate": 3.290218367668775e-06, "loss": 1.6255062818527222, "step": 3714 }, { "epoch": 1.951155462184874, "grad_norm": 9.625391905012721, "learning_rate": 3.2873468930492232e-06, "loss": 1.5651859045028687, "step": 3715 }, { "epoch": 1.9516806722689075, "grad_norm": 11.509002659147203, "learning_rate": 3.2844760582277047e-06, "loss": 1.252101182937622, "step": 3716 }, { "epoch": 1.9522058823529411, "grad_norm": 11.362263113850556, "learning_rate": 3.28160586427668e-06, "loss": 1.657231092453003, "step": 3717 }, { "epoch": 1.9527310924369747, "grad_norm": 12.492346854863209, "learning_rate": 3.2787363122683714e-06, "loss": 1.4209221601486206, "step": 3718 }, { "epoch": 1.9532563025210083, "grad_norm": 10.78083298459032, "learning_rate": 3.275867403274763e-06, "loss": 1.3532744646072388, "step": 3719 }, { "epoch": 1.9537815126050422, "grad_norm": 10.980794342686636, "learning_rate": 3.2729991383675952e-06, "loss": 1.6606961488723755, "step": 3720 }, { "epoch": 1.9543067226890756, "grad_norm": 11.617088574682121, "learning_rate": 3.2701315186183692e-06, "loss": 1.364490270614624, "step": 3721 }, { "epoch": 1.9548319327731094, "grad_norm": 7.731508149813748, "learning_rate": 3.2672645450983465e-06, "loss": 0.9687061309814453, "step": 3722 }, { "epoch": 1.9553571428571428, "grad_norm": 14.167412966780217, "learning_rate": 3.2643982188785457e-06, "loss": 1.4106699228286743, "step": 3723 }, { "epoch": 1.9558823529411766, "grad_norm": 10.325230666905087, "learning_rate": 3.261532541029744e-06, "loss": 1.0961787700653076, "step": 3724 }, { "epoch": 1.95640756302521, "grad_norm": 12.380657626003757, "learning_rate": 3.258667512622475e-06, "loss": 2.1758525371551514, "step": 3725 }, { "epoch": 1.9569327731092439, "grad_norm": 12.375978678030158, "learning_rate": 3.2558031347270337e-06, "loss": 1.6618202924728394, "step": 3726 }, { "epoch": 1.9574579831932772, "grad_norm": 18.36931525171859, "learning_rate": 3.252939408413467e-06, "loss": 1.6743544340133667, "step": 3727 }, { "epoch": 1.957983193277311, "grad_norm": 13.844652681081724, "learning_rate": 3.250076334751583e-06, "loss": 1.5532047748565674, "step": 3728 }, { "epoch": 1.9585084033613445, "grad_norm": 11.717970592519013, "learning_rate": 3.2472139148109416e-06, "loss": 1.7142189741134644, "step": 3729 }, { "epoch": 1.9590336134453783, "grad_norm": 13.045361171349294, "learning_rate": 3.244352149660862e-06, "loss": 1.671049952507019, "step": 3730 }, { "epoch": 1.9595588235294117, "grad_norm": 8.762201229168596, "learning_rate": 3.241491040370418e-06, "loss": 1.460729956626892, "step": 3731 }, { "epoch": 1.9600840336134455, "grad_norm": 11.680399571413231, "learning_rate": 3.2386305880084362e-06, "loss": 1.543219804763794, "step": 3732 }, { "epoch": 1.960609243697479, "grad_norm": 14.670911846953633, "learning_rate": 3.2357707936435013e-06, "loss": 2.0265159606933594, "step": 3733 }, { "epoch": 1.9611344537815127, "grad_norm": 18.386961995691504, "learning_rate": 3.23291165834395e-06, "loss": 1.4027315378189087, "step": 3734 }, { "epoch": 1.9616596638655461, "grad_norm": 10.452503460247428, "learning_rate": 3.2300531831778726e-06, "loss": 1.4007822275161743, "step": 3735 }, { "epoch": 1.96218487394958, "grad_norm": 11.202651971830901, "learning_rate": 3.227195369213112e-06, "loss": 1.1638128757476807, "step": 3736 }, { "epoch": 1.9627100840336134, "grad_norm": 9.390953349282208, "learning_rate": 3.224338217517269e-06, "loss": 1.9747158288955688, "step": 3737 }, { "epoch": 1.9632352941176472, "grad_norm": 11.709132535983134, "learning_rate": 3.2214817291576905e-06, "loss": 1.9067872762680054, "step": 3738 }, { "epoch": 1.9637605042016806, "grad_norm": 8.790615053164244, "learning_rate": 3.2186259052014797e-06, "loss": 1.751824140548706, "step": 3739 }, { "epoch": 1.9642857142857144, "grad_norm": 9.402188962100931, "learning_rate": 3.2157707467154893e-06, "loss": 1.681583046913147, "step": 3740 }, { "epoch": 1.9648109243697478, "grad_norm": 11.439637967018207, "learning_rate": 3.212916254766326e-06, "loss": 2.8366539478302, "step": 3741 }, { "epoch": 1.9653361344537816, "grad_norm": 13.600069767601083, "learning_rate": 3.2100624304203463e-06, "loss": 1.9346435070037842, "step": 3742 }, { "epoch": 1.965861344537815, "grad_norm": 17.194659560760936, "learning_rate": 3.2072092747436546e-06, "loss": 1.8379788398742676, "step": 3743 }, { "epoch": 1.9663865546218489, "grad_norm": 10.341331457245321, "learning_rate": 3.204356788802111e-06, "loss": 1.3968738317489624, "step": 3744 }, { "epoch": 1.9669117647058822, "grad_norm": 15.955853952873733, "learning_rate": 3.20150497366132e-06, "loss": 1.8972653150558472, "step": 3745 }, { "epoch": 1.967436974789916, "grad_norm": 9.289206563836805, "learning_rate": 3.19865383038664e-06, "loss": 2.190624713897705, "step": 3746 }, { "epoch": 1.9679621848739495, "grad_norm": 12.037389036541798, "learning_rate": 3.1958033600431736e-06, "loss": 1.724454402923584, "step": 3747 }, { "epoch": 1.9684873949579833, "grad_norm": 8.235554202708748, "learning_rate": 3.1929535636957774e-06, "loss": 1.6073118448257446, "step": 3748 }, { "epoch": 1.9690126050420167, "grad_norm": 11.501803657406654, "learning_rate": 3.190104442409052e-06, "loss": 1.774906039237976, "step": 3749 }, { "epoch": 1.9695378151260505, "grad_norm": 11.422652668633384, "learning_rate": 3.1872559972473475e-06, "loss": 1.8382604122161865, "step": 3750 }, { "epoch": 1.970063025210084, "grad_norm": 24.28132793523804, "learning_rate": 3.18440822927476e-06, "loss": 1.1020464897155762, "step": 3751 }, { "epoch": 1.9705882352941178, "grad_norm": 7.63934910351465, "learning_rate": 3.1815611395551373e-06, "loss": 1.5953342914581299, "step": 3752 }, { "epoch": 1.9711134453781511, "grad_norm": 8.920141330418113, "learning_rate": 3.1787147291520675e-06, "loss": 1.7396833896636963, "step": 3753 }, { "epoch": 1.971638655462185, "grad_norm": 13.786573748934304, "learning_rate": 3.1758689991288886e-06, "loss": 1.9062604904174805, "step": 3754 }, { "epoch": 1.9721638655462184, "grad_norm": 9.332335856936437, "learning_rate": 3.1730239505486827e-06, "loss": 1.7307486534118652, "step": 3755 }, { "epoch": 1.9726890756302522, "grad_norm": 9.633958070288003, "learning_rate": 3.1701795844742806e-06, "loss": 1.846346378326416, "step": 3756 }, { "epoch": 1.9732142857142856, "grad_norm": 9.441284749946968, "learning_rate": 3.1673359019682538e-06, "loss": 1.3852074146270752, "step": 3757 }, { "epoch": 1.9737394957983194, "grad_norm": 17.471973480024587, "learning_rate": 3.164492904092921e-06, "loss": 1.958652377128601, "step": 3758 }, { "epoch": 1.9742647058823528, "grad_norm": 16.607085375072238, "learning_rate": 3.1616505919103446e-06, "loss": 1.3102972507476807, "step": 3759 }, { "epoch": 1.9747899159663866, "grad_norm": 8.699856897784624, "learning_rate": 3.158808966482331e-06, "loss": 1.1039776802062988, "step": 3760 }, { "epoch": 1.97531512605042, "grad_norm": 8.566075459963004, "learning_rate": 3.1559680288704297e-06, "loss": 1.9869599342346191, "step": 3761 }, { "epoch": 1.9758403361344539, "grad_norm": 10.829440072378784, "learning_rate": 3.1531277801359326e-06, "loss": 1.6302375793457031, "step": 3762 }, { "epoch": 1.9763655462184873, "grad_norm": 7.122397427214688, "learning_rate": 3.1502882213398776e-06, "loss": 0.7667158842086792, "step": 3763 }, { "epoch": 1.976890756302521, "grad_norm": 11.39418212477519, "learning_rate": 3.1474493535430408e-06, "loss": 1.8233389854431152, "step": 3764 }, { "epoch": 1.9774159663865545, "grad_norm": 11.53953144938256, "learning_rate": 3.1446111778059405e-06, "loss": 1.3942296504974365, "step": 3765 }, { "epoch": 1.9779411764705883, "grad_norm": 11.918470999286875, "learning_rate": 3.1417736951888385e-06, "loss": 1.4948136806488037, "step": 3766 }, { "epoch": 1.9784663865546217, "grad_norm": 13.96097017851373, "learning_rate": 3.1389369067517383e-06, "loss": 1.5201667547225952, "step": 3767 }, { "epoch": 1.9789915966386555, "grad_norm": 20.99552237339867, "learning_rate": 3.136100813554381e-06, "loss": 2.2616405487060547, "step": 3768 }, { "epoch": 1.979516806722689, "grad_norm": 14.868637286663587, "learning_rate": 3.1332654166562494e-06, "loss": 1.4352052211761475, "step": 3769 }, { "epoch": 1.9800420168067228, "grad_norm": 7.7878468995648324, "learning_rate": 3.1304307171165675e-06, "loss": 1.4993510246276855, "step": 3770 }, { "epoch": 1.9805672268907561, "grad_norm": 9.912002918695766, "learning_rate": 3.1275967159942976e-06, "loss": 1.2762047052383423, "step": 3771 }, { "epoch": 1.98109243697479, "grad_norm": 10.98363667889731, "learning_rate": 3.124763414348141e-06, "loss": 1.6206231117248535, "step": 3772 }, { "epoch": 1.9816176470588234, "grad_norm": 13.468199718917681, "learning_rate": 3.1219308132365365e-06, "loss": 1.6075819730758667, "step": 3773 }, { "epoch": 1.9821428571428572, "grad_norm": 12.48978364742403, "learning_rate": 3.1190989137176653e-06, "loss": 1.6008939743041992, "step": 3774 }, { "epoch": 1.9826680672268906, "grad_norm": 8.891890225396363, "learning_rate": 3.116267716849441e-06, "loss": 1.1342711448669434, "step": 3775 }, { "epoch": 1.9831932773109244, "grad_norm": 7.782369396395457, "learning_rate": 3.1134372236895193e-06, "loss": 1.7301146984100342, "step": 3776 }, { "epoch": 1.9837184873949578, "grad_norm": 14.291541152153517, "learning_rate": 3.110607435295289e-06, "loss": 1.9669077396392822, "step": 3777 }, { "epoch": 1.9842436974789917, "grad_norm": 11.545226440558016, "learning_rate": 3.1077783527238807e-06, "loss": 1.5780227184295654, "step": 3778 }, { "epoch": 1.9847689075630253, "grad_norm": 10.03545786660131, "learning_rate": 3.1049499770321572e-06, "loss": 1.5493851900100708, "step": 3779 }, { "epoch": 1.9852941176470589, "grad_norm": 9.881074989678645, "learning_rate": 3.102122309276717e-06, "loss": 1.2470885515213013, "step": 3780 }, { "epoch": 1.9858193277310925, "grad_norm": 10.085111739490527, "learning_rate": 3.099295350513898e-06, "loss": 1.616032361984253, "step": 3781 }, { "epoch": 1.986344537815126, "grad_norm": 8.1920549728376, "learning_rate": 3.09646910179977e-06, "loss": 1.7792916297912598, "step": 3782 }, { "epoch": 1.9868697478991597, "grad_norm": 10.285570410870244, "learning_rate": 3.093643564190138e-06, "loss": 2.3514113426208496, "step": 3783 }, { "epoch": 1.9873949579831933, "grad_norm": 11.833868373130606, "learning_rate": 3.0908187387405407e-06, "loss": 1.5242578983306885, "step": 3784 }, { "epoch": 1.987920168067227, "grad_norm": 7.152017643377308, "learning_rate": 3.087994626506254e-06, "loss": 1.1745996475219727, "step": 3785 }, { "epoch": 1.9884453781512605, "grad_norm": 7.9758913611007864, "learning_rate": 3.085171228542284e-06, "loss": 1.1497207880020142, "step": 3786 }, { "epoch": 1.9889705882352942, "grad_norm": 8.788039493597397, "learning_rate": 3.0823485459033707e-06, "loss": 1.6483783721923828, "step": 3787 }, { "epoch": 1.9894957983193278, "grad_norm": 23.501388834835307, "learning_rate": 3.0795265796439876e-06, "loss": 2.2067291736602783, "step": 3788 }, { "epoch": 1.9900210084033614, "grad_norm": 17.106029301572583, "learning_rate": 3.0767053308183416e-06, "loss": 1.5530177354812622, "step": 3789 }, { "epoch": 1.990546218487395, "grad_norm": 15.098012723446429, "learning_rate": 3.073884800480369e-06, "loss": 1.292409896850586, "step": 3790 }, { "epoch": 1.9910714285714286, "grad_norm": 16.860824491413464, "learning_rate": 3.0710649896837386e-06, "loss": 1.5788860321044922, "step": 3791 }, { "epoch": 1.9915966386554622, "grad_norm": 14.040446919933688, "learning_rate": 3.068245899481851e-06, "loss": 1.4469319581985474, "step": 3792 }, { "epoch": 1.9921218487394958, "grad_norm": 14.268241574021172, "learning_rate": 3.0654275309278382e-06, "loss": 1.6369696855545044, "step": 3793 }, { "epoch": 1.9926470588235294, "grad_norm": 10.859225606953428, "learning_rate": 3.0626098850745616e-06, "loss": 1.3132859468460083, "step": 3794 }, { "epoch": 1.993172268907563, "grad_norm": 12.100034108218185, "learning_rate": 3.0597929629746114e-06, "loss": 1.1590535640716553, "step": 3795 }, { "epoch": 1.9936974789915967, "grad_norm": 9.661986359853291, "learning_rate": 3.0569767656803106e-06, "loss": 1.2120416164398193, "step": 3796 }, { "epoch": 1.9942226890756303, "grad_norm": 20.096411091337497, "learning_rate": 3.0541612942437095e-06, "loss": 1.4346848726272583, "step": 3797 }, { "epoch": 1.9947478991596639, "grad_norm": 9.594464947917247, "learning_rate": 3.051346549716586e-06, "loss": 0.8941363096237183, "step": 3798 }, { "epoch": 1.9952731092436975, "grad_norm": 11.038374544376225, "learning_rate": 3.0485325331504477e-06, "loss": 1.2042505741119385, "step": 3799 }, { "epoch": 1.995798319327731, "grad_norm": 9.0076016644004, "learning_rate": 3.045719245596533e-06, "loss": 1.5936388969421387, "step": 3800 }, { "epoch": 1.9963235294117647, "grad_norm": 11.03543858156108, "learning_rate": 3.0429066881058036e-06, "loss": 0.7732980251312256, "step": 3801 }, { "epoch": 1.9968487394957983, "grad_norm": 12.590583200344826, "learning_rate": 3.04009486172895e-06, "loss": 1.8531079292297363, "step": 3802 }, { "epoch": 1.997373949579832, "grad_norm": 11.735574804351145, "learning_rate": 3.037283767516389e-06, "loss": 1.287428855895996, "step": 3803 }, { "epoch": 1.9978991596638656, "grad_norm": 10.496275378962904, "learning_rate": 3.034473406518268e-06, "loss": 1.0967342853546143, "step": 3804 }, { "epoch": 1.9984243697478992, "grad_norm": 9.232693829502468, "learning_rate": 3.031663779784454e-06, "loss": 1.1606006622314453, "step": 3805 }, { "epoch": 1.9989495798319328, "grad_norm": 18.46446368760907, "learning_rate": 3.028854888364543e-06, "loss": 1.6747987270355225, "step": 3806 }, { "epoch": 1.9994747899159664, "grad_norm": 8.51216303954961, "learning_rate": 3.026046733307859e-06, "loss": 1.3044161796569824, "step": 3807 }, { "epoch": 2.0, "grad_norm": 11.305158060912134, "learning_rate": 3.0232393156634455e-06, "loss": 1.2226500511169434, "step": 3808 }, { "epoch": 2.000525210084034, "grad_norm": 11.017549290867258, "learning_rate": 3.020432636480074e-06, "loss": 0.3734968304634094, "step": 3809 }, { "epoch": 2.0010504201680672, "grad_norm": 12.858661865707514, "learning_rate": 3.0176266968062386e-06, "loss": 0.8799101710319519, "step": 3810 }, { "epoch": 2.001575630252101, "grad_norm": 11.04993858918612, "learning_rate": 3.014821497690159e-06, "loss": 0.7394332885742188, "step": 3811 }, { "epoch": 2.0021008403361344, "grad_norm": 11.846825155028316, "learning_rate": 3.0120170401797765e-06, "loss": 0.5659482479095459, "step": 3812 }, { "epoch": 2.0026260504201683, "grad_norm": 14.090147360190999, "learning_rate": 3.0092133253227563e-06, "loss": 0.8160654306411743, "step": 3813 }, { "epoch": 2.0031512605042017, "grad_norm": 11.588286715579699, "learning_rate": 3.0064103541664837e-06, "loss": 0.4015316367149353, "step": 3814 }, { "epoch": 2.0036764705882355, "grad_norm": 12.505131624805335, "learning_rate": 3.0036081277580715e-06, "loss": 0.685259222984314, "step": 3815 }, { "epoch": 2.004201680672269, "grad_norm": 8.20036099369699, "learning_rate": 3.0008066471443486e-06, "loss": 0.28031444549560547, "step": 3816 }, { "epoch": 2.0047268907563027, "grad_norm": 13.69636019869882, "learning_rate": 2.9980059133718687e-06, "loss": 0.49320584535598755, "step": 3817 }, { "epoch": 2.005252100840336, "grad_norm": 7.201972156017342, "learning_rate": 2.995205927486905e-06, "loss": 0.5989927649497986, "step": 3818 }, { "epoch": 2.00577731092437, "grad_norm": 9.1127502561537, "learning_rate": 2.992406690535453e-06, "loss": 0.3774471879005432, "step": 3819 }, { "epoch": 2.0063025210084033, "grad_norm": 8.926901854429863, "learning_rate": 2.989608203563227e-06, "loss": 0.6756539344787598, "step": 3820 }, { "epoch": 2.006827731092437, "grad_norm": 6.096267081837672, "learning_rate": 2.986810467615659e-06, "loss": 0.23616181313991547, "step": 3821 }, { "epoch": 2.0073529411764706, "grad_norm": 6.715899318757637, "learning_rate": 2.984013483737906e-06, "loss": 0.5607936382293701, "step": 3822 }, { "epoch": 2.0078781512605044, "grad_norm": 10.095202426849127, "learning_rate": 2.9812172529748395e-06, "loss": 0.6747167706489563, "step": 3823 }, { "epoch": 2.008403361344538, "grad_norm": 6.2149130542882585, "learning_rate": 2.978421776371051e-06, "loss": 0.14100810885429382, "step": 3824 }, { "epoch": 2.0089285714285716, "grad_norm": 8.725547893562963, "learning_rate": 2.9756270549708497e-06, "loss": 0.9651519656181335, "step": 3825 }, { "epoch": 2.009453781512605, "grad_norm": 13.252615132261226, "learning_rate": 2.9728330898182654e-06, "loss": 0.5731452107429504, "step": 3826 }, { "epoch": 2.009978991596639, "grad_norm": 14.293661270700504, "learning_rate": 2.9700398819570414e-06, "loss": 1.022626280784607, "step": 3827 }, { "epoch": 2.0105042016806722, "grad_norm": 17.175073827609122, "learning_rate": 2.9672474324306407e-06, "loss": 0.6618038415908813, "step": 3828 }, { "epoch": 2.011029411764706, "grad_norm": 10.76623365226854, "learning_rate": 2.9644557422822406e-06, "loss": 0.47713351249694824, "step": 3829 }, { "epoch": 2.0115546218487395, "grad_norm": 18.209384971359377, "learning_rate": 2.9616648125547395e-06, "loss": 0.7910279035568237, "step": 3830 }, { "epoch": 2.0120798319327733, "grad_norm": 9.744248855130357, "learning_rate": 2.9588746442907467e-06, "loss": 0.2927781045436859, "step": 3831 }, { "epoch": 2.0126050420168067, "grad_norm": 9.067333591111359, "learning_rate": 2.9560852385325876e-06, "loss": 0.3623805642127991, "step": 3832 }, { "epoch": 2.0131302521008405, "grad_norm": 13.754858955605707, "learning_rate": 2.9532965963223076e-06, "loss": 0.8041249513626099, "step": 3833 }, { "epoch": 2.013655462184874, "grad_norm": 14.329204658941654, "learning_rate": 2.9505087187016614e-06, "loss": 0.711012065410614, "step": 3834 }, { "epoch": 2.0141806722689077, "grad_norm": 10.33204702661545, "learning_rate": 2.9477216067121196e-06, "loss": 0.4064977467060089, "step": 3835 }, { "epoch": 2.014705882352941, "grad_norm": 13.262578282956373, "learning_rate": 2.9449352613948667e-06, "loss": 0.38866499066352844, "step": 3836 }, { "epoch": 2.015231092436975, "grad_norm": 8.693634074278547, "learning_rate": 2.9421496837908036e-06, "loss": 0.48318976163864136, "step": 3837 }, { "epoch": 2.0157563025210083, "grad_norm": 10.649148028003987, "learning_rate": 2.939364874940541e-06, "loss": 1.2861061096191406, "step": 3838 }, { "epoch": 2.016281512605042, "grad_norm": 11.18722937697221, "learning_rate": 2.936580835884404e-06, "loss": 0.2546107769012451, "step": 3839 }, { "epoch": 2.0168067226890756, "grad_norm": 13.605156504134753, "learning_rate": 2.9337975676624276e-06, "loss": 0.42386680841445923, "step": 3840 }, { "epoch": 2.0173319327731094, "grad_norm": 11.149963058265799, "learning_rate": 2.9310150713143637e-06, "loss": 0.5541623830795288, "step": 3841 }, { "epoch": 2.017857142857143, "grad_norm": 10.713457863765639, "learning_rate": 2.928233347879671e-06, "loss": 0.7140313982963562, "step": 3842 }, { "epoch": 2.0183823529411766, "grad_norm": 10.1203504368144, "learning_rate": 2.9254523983975224e-06, "loss": 0.6277381777763367, "step": 3843 }, { "epoch": 2.01890756302521, "grad_norm": 12.302024743997311, "learning_rate": 2.9226722239068006e-06, "loss": 0.5380294322967529, "step": 3844 }, { "epoch": 2.019432773109244, "grad_norm": 13.075573617347679, "learning_rate": 2.9198928254461e-06, "loss": 0.42165398597717285, "step": 3845 }, { "epoch": 2.0199579831932772, "grad_norm": 10.209053773592924, "learning_rate": 2.9171142040537225e-06, "loss": 0.454216867685318, "step": 3846 }, { "epoch": 2.020483193277311, "grad_norm": 9.385643099394109, "learning_rate": 2.9143363607676824e-06, "loss": 0.4512168765068054, "step": 3847 }, { "epoch": 2.0210084033613445, "grad_norm": 16.04554905978146, "learning_rate": 2.911559296625701e-06, "loss": 0.5049360990524292, "step": 3848 }, { "epoch": 2.0215336134453783, "grad_norm": 9.503456802547229, "learning_rate": 2.908783012665209e-06, "loss": 0.7787807583808899, "step": 3849 }, { "epoch": 2.0220588235294117, "grad_norm": 8.08266768016149, "learning_rate": 2.9060075099233496e-06, "loss": 0.6721572279930115, "step": 3850 }, { "epoch": 2.0225840336134455, "grad_norm": 11.000241803723165, "learning_rate": 2.9032327894369693e-06, "loss": 0.45701268315315247, "step": 3851 }, { "epoch": 2.023109243697479, "grad_norm": 7.913564797158489, "learning_rate": 2.9004588522426235e-06, "loss": 0.3375888168811798, "step": 3852 }, { "epoch": 2.0236344537815127, "grad_norm": 14.779120786899409, "learning_rate": 2.8976856993765766e-06, "loss": 0.350841224193573, "step": 3853 }, { "epoch": 2.024159663865546, "grad_norm": 12.045942359753681, "learning_rate": 2.894913331874798e-06, "loss": 0.714718222618103, "step": 3854 }, { "epoch": 2.02468487394958, "grad_norm": 6.860206953796827, "learning_rate": 2.8921417507729644e-06, "loss": 0.5312122106552124, "step": 3855 }, { "epoch": 2.0252100840336134, "grad_norm": 18.674079867961407, "learning_rate": 2.8893709571064575e-06, "loss": 0.7049558162689209, "step": 3856 }, { "epoch": 2.025735294117647, "grad_norm": 9.550567764233456, "learning_rate": 2.8866009519103705e-06, "loss": 0.29532164335250854, "step": 3857 }, { "epoch": 2.0262605042016806, "grad_norm": 9.16837068693569, "learning_rate": 2.883831736219495e-06, "loss": 0.35209500789642334, "step": 3858 }, { "epoch": 2.0267857142857144, "grad_norm": 13.896448359397509, "learning_rate": 2.8810633110683312e-06, "loss": 0.6380374431610107, "step": 3859 }, { "epoch": 2.027310924369748, "grad_norm": 11.764312168328866, "learning_rate": 2.878295677491083e-06, "loss": 0.6972619891166687, "step": 3860 }, { "epoch": 2.0278361344537816, "grad_norm": 12.943753016303793, "learning_rate": 2.875528836521658e-06, "loss": 0.44367527961730957, "step": 3861 }, { "epoch": 2.028361344537815, "grad_norm": 11.529936458877117, "learning_rate": 2.8727627891936705e-06, "loss": 0.9527031779289246, "step": 3862 }, { "epoch": 2.028886554621849, "grad_norm": 11.630298224019956, "learning_rate": 2.869997536540435e-06, "loss": 0.5449413061141968, "step": 3863 }, { "epoch": 2.0294117647058822, "grad_norm": 9.075541387998747, "learning_rate": 2.8672330795949683e-06, "loss": 0.3742218017578125, "step": 3864 }, { "epoch": 2.029936974789916, "grad_norm": 15.437416550452978, "learning_rate": 2.864469419389997e-06, "loss": 1.433685064315796, "step": 3865 }, { "epoch": 2.0304621848739495, "grad_norm": 18.644197410513236, "learning_rate": 2.8617065569579406e-06, "loss": 1.8893550634384155, "step": 3866 }, { "epoch": 2.0309873949579833, "grad_norm": 8.28267567388336, "learning_rate": 2.858944493330929e-06, "loss": 0.27451810240745544, "step": 3867 }, { "epoch": 2.0315126050420167, "grad_norm": 8.978545368849838, "learning_rate": 2.856183229540789e-06, "loss": 0.4551701843738556, "step": 3868 }, { "epoch": 2.0320378151260505, "grad_norm": 12.2975837103672, "learning_rate": 2.8534227666190484e-06, "loss": 0.8021912574768066, "step": 3869 }, { "epoch": 2.032563025210084, "grad_norm": 7.900657783165174, "learning_rate": 2.850663105596937e-06, "loss": 0.3059179186820984, "step": 3870 }, { "epoch": 2.0330882352941178, "grad_norm": 10.982164455366245, "learning_rate": 2.8479042475053853e-06, "loss": 0.4565742015838623, "step": 3871 }, { "epoch": 2.033613445378151, "grad_norm": 9.12506877974996, "learning_rate": 2.8451461933750237e-06, "loss": 0.477339506149292, "step": 3872 }, { "epoch": 2.034138655462185, "grad_norm": 11.485943345713027, "learning_rate": 2.8423889442361797e-06, "loss": 0.8305515050888062, "step": 3873 }, { "epoch": 2.0346638655462184, "grad_norm": 10.011597215714765, "learning_rate": 2.8396325011188865e-06, "loss": 0.39306312799453735, "step": 3874 }, { "epoch": 2.035189075630252, "grad_norm": 12.329547083437253, "learning_rate": 2.83687686505287e-06, "loss": 0.4426557719707489, "step": 3875 }, { "epoch": 2.0357142857142856, "grad_norm": 10.078565663032956, "learning_rate": 2.834122037067556e-06, "loss": 0.3150191307067871, "step": 3876 }, { "epoch": 2.0362394957983194, "grad_norm": 13.17037838338679, "learning_rate": 2.831368018192071e-06, "loss": 0.9622311592102051, "step": 3877 }, { "epoch": 2.036764705882353, "grad_norm": 12.099593531796275, "learning_rate": 2.828614809455236e-06, "loss": 0.26061639189720154, "step": 3878 }, { "epoch": 2.0372899159663866, "grad_norm": 7.499008840841319, "learning_rate": 2.8258624118855704e-06, "loss": 0.4080360233783722, "step": 3879 }, { "epoch": 2.03781512605042, "grad_norm": 10.851118097126037, "learning_rate": 2.8231108265112905e-06, "loss": 0.43404507637023926, "step": 3880 }, { "epoch": 2.038340336134454, "grad_norm": 10.549301939240816, "learning_rate": 2.8203600543603116e-06, "loss": 0.273033082485199, "step": 3881 }, { "epoch": 2.0388655462184873, "grad_norm": 9.691882665058618, "learning_rate": 2.817610096460243e-06, "loss": 0.41321203112602234, "step": 3882 }, { "epoch": 2.039390756302521, "grad_norm": 9.53414614050278, "learning_rate": 2.814860953838389e-06, "loss": 0.6547293066978455, "step": 3883 }, { "epoch": 2.0399159663865545, "grad_norm": 11.465218009629313, "learning_rate": 2.8121126275217514e-06, "loss": 0.43598318099975586, "step": 3884 }, { "epoch": 2.0404411764705883, "grad_norm": 16.61752688347944, "learning_rate": 2.809365118537024e-06, "loss": 1.627102255821228, "step": 3885 }, { "epoch": 2.0409663865546217, "grad_norm": 14.211525186101717, "learning_rate": 2.8066184279106e-06, "loss": 1.2373404502868652, "step": 3886 }, { "epoch": 2.0414915966386555, "grad_norm": 9.062227856513767, "learning_rate": 2.8038725566685596e-06, "loss": 0.1954503208398819, "step": 3887 }, { "epoch": 2.042016806722689, "grad_norm": 13.707453553033107, "learning_rate": 2.801127505836687e-06, "loss": 0.5624500513076782, "step": 3888 }, { "epoch": 2.0425420168067228, "grad_norm": 8.663227093017563, "learning_rate": 2.7983832764404517e-06, "loss": 0.6806732416152954, "step": 3889 }, { "epoch": 2.043067226890756, "grad_norm": 10.834192493328237, "learning_rate": 2.79563986950502e-06, "loss": 0.5868386030197144, "step": 3890 }, { "epoch": 2.04359243697479, "grad_norm": 11.192581350434342, "learning_rate": 2.7928972860552482e-06, "loss": 0.6238075494766235, "step": 3891 }, { "epoch": 2.0441176470588234, "grad_norm": 12.320260608284606, "learning_rate": 2.7901555271156887e-06, "loss": 0.5835066437721252, "step": 3892 }, { "epoch": 2.044642857142857, "grad_norm": 10.049967753868659, "learning_rate": 2.787414593710583e-06, "loss": 0.6015195846557617, "step": 3893 }, { "epoch": 2.0451680672268906, "grad_norm": 13.849181672895295, "learning_rate": 2.784674486863865e-06, "loss": 0.6851204633712769, "step": 3894 }, { "epoch": 2.0456932773109244, "grad_norm": 7.702221390228503, "learning_rate": 2.781935207599158e-06, "loss": 0.1196926012635231, "step": 3895 }, { "epoch": 2.046218487394958, "grad_norm": 10.13467570800104, "learning_rate": 2.7791967569397815e-06, "loss": 0.35627737641334534, "step": 3896 }, { "epoch": 2.0467436974789917, "grad_norm": 14.420159313353103, "learning_rate": 2.7764591359087415e-06, "loss": 0.3981572389602661, "step": 3897 }, { "epoch": 2.047268907563025, "grad_norm": 12.415736623303292, "learning_rate": 2.7737223455287343e-06, "loss": 0.47580811381340027, "step": 3898 }, { "epoch": 2.047794117647059, "grad_norm": 8.037988546145193, "learning_rate": 2.770986386822145e-06, "loss": 0.4026325047016144, "step": 3899 }, { "epoch": 2.0483193277310923, "grad_norm": 12.677038294628572, "learning_rate": 2.76825126081105e-06, "loss": 0.589562177658081, "step": 3900 }, { "epoch": 2.048844537815126, "grad_norm": 14.783707895557464, "learning_rate": 2.7655169685172146e-06, "loss": 1.1209787130355835, "step": 3901 }, { "epoch": 2.0493697478991595, "grad_norm": 8.33452082081271, "learning_rate": 2.7627835109620886e-06, "loss": 0.2675451636314392, "step": 3902 }, { "epoch": 2.0498949579831933, "grad_norm": 7.664460906222028, "learning_rate": 2.760050889166818e-06, "loss": 0.24929499626159668, "step": 3903 }, { "epoch": 2.0504201680672267, "grad_norm": 9.846864581004077, "learning_rate": 2.7573191041522297e-06, "loss": 0.42551344633102417, "step": 3904 }, { "epoch": 2.0509453781512605, "grad_norm": 8.909887491294871, "learning_rate": 2.7545881569388404e-06, "loss": 0.5701692700386047, "step": 3905 }, { "epoch": 2.051470588235294, "grad_norm": 11.035772058186344, "learning_rate": 2.751858048546853e-06, "loss": 0.6184131503105164, "step": 3906 }, { "epoch": 2.0519957983193278, "grad_norm": 12.446621976916722, "learning_rate": 2.7491287799961586e-06, "loss": 0.32249441742897034, "step": 3907 }, { "epoch": 2.052521008403361, "grad_norm": 10.6240426881154, "learning_rate": 2.746400352306332e-06, "loss": 0.8945693969726562, "step": 3908 }, { "epoch": 2.053046218487395, "grad_norm": 9.27758123897025, "learning_rate": 2.7436727664966368e-06, "loss": 0.6663864850997925, "step": 3909 }, { "epoch": 2.0535714285714284, "grad_norm": 13.679583878748852, "learning_rate": 2.7409460235860185e-06, "loss": 0.34769487380981445, "step": 3910 }, { "epoch": 2.054096638655462, "grad_norm": 11.696669671157458, "learning_rate": 2.738220124593114e-06, "loss": 0.38508152961730957, "step": 3911 }, { "epoch": 2.0546218487394956, "grad_norm": 10.339399386513577, "learning_rate": 2.7354950705362384e-06, "loss": 0.8562803268432617, "step": 3912 }, { "epoch": 2.0551470588235294, "grad_norm": 9.77377633192539, "learning_rate": 2.7327708624333936e-06, "loss": 0.4563913941383362, "step": 3913 }, { "epoch": 2.055672268907563, "grad_norm": 11.50429196068152, "learning_rate": 2.7300475013022666e-06, "loss": 0.8931875824928284, "step": 3914 }, { "epoch": 2.0561974789915967, "grad_norm": 8.247738013539651, "learning_rate": 2.7273249881602257e-06, "loss": 0.39803647994995117, "step": 3915 }, { "epoch": 2.05672268907563, "grad_norm": 11.481771666631674, "learning_rate": 2.7246033240243246e-06, "loss": 0.6777544021606445, "step": 3916 }, { "epoch": 2.057247899159664, "grad_norm": 6.61136629234512, "learning_rate": 2.7218825099112966e-06, "loss": 0.31973081827163696, "step": 3917 }, { "epoch": 2.0577731092436973, "grad_norm": 13.368749614123901, "learning_rate": 2.7191625468375636e-06, "loss": 0.47313475608825684, "step": 3918 }, { "epoch": 2.058298319327731, "grad_norm": 14.178979557281286, "learning_rate": 2.7164434358192236e-06, "loss": 0.42458271980285645, "step": 3919 }, { "epoch": 2.0588235294117645, "grad_norm": 16.269726023071627, "learning_rate": 2.7137251778720587e-06, "loss": 0.7796242237091064, "step": 3920 }, { "epoch": 2.0593487394957983, "grad_norm": 13.820851522676165, "learning_rate": 2.7110077740115315e-06, "loss": 0.3527318239212036, "step": 3921 }, { "epoch": 2.0598739495798317, "grad_norm": 11.202078971691062, "learning_rate": 2.708291225252787e-06, "loss": 0.36367225646972656, "step": 3922 }, { "epoch": 2.0603991596638656, "grad_norm": 10.148155009117348, "learning_rate": 2.705575532610649e-06, "loss": 0.47870922088623047, "step": 3923 }, { "epoch": 2.060924369747899, "grad_norm": 6.3429649055401764, "learning_rate": 2.702860697099621e-06, "loss": 0.24576593935489655, "step": 3924 }, { "epoch": 2.0614495798319328, "grad_norm": 7.984679998337307, "learning_rate": 2.7001467197338905e-06, "loss": 0.3550044894218445, "step": 3925 }, { "epoch": 2.0619747899159666, "grad_norm": 8.67987105281844, "learning_rate": 2.6974336015273204e-06, "loss": 0.32794153690338135, "step": 3926 }, { "epoch": 2.0625, "grad_norm": 8.162135256358189, "learning_rate": 2.6947213434934537e-06, "loss": 0.3276750147342682, "step": 3927 }, { "epoch": 2.0630252100840334, "grad_norm": 14.703477596563303, "learning_rate": 2.692009946645513e-06, "loss": 0.5918416976928711, "step": 3928 }, { "epoch": 2.0635504201680672, "grad_norm": 7.921644943925436, "learning_rate": 2.6892994119963965e-06, "loss": 0.5117301940917969, "step": 3929 }, { "epoch": 2.064075630252101, "grad_norm": 8.833998811344054, "learning_rate": 2.686589740558683e-06, "loss": 0.325096070766449, "step": 3930 }, { "epoch": 2.0646008403361344, "grad_norm": 14.840585915945963, "learning_rate": 2.683880933344628e-06, "loss": 0.8907138109207153, "step": 3931 }, { "epoch": 2.0651260504201683, "grad_norm": 12.389823698067618, "learning_rate": 2.681172991366162e-06, "loss": 0.7546146512031555, "step": 3932 }, { "epoch": 2.0656512605042017, "grad_norm": 11.39509040830051, "learning_rate": 2.678465915634899e-06, "loss": 0.39673149585723877, "step": 3933 }, { "epoch": 2.0661764705882355, "grad_norm": 13.020760188029561, "learning_rate": 2.675759707162122e-06, "loss": 0.6929548978805542, "step": 3934 }, { "epoch": 2.066701680672269, "grad_norm": 7.541337829367985, "learning_rate": 2.6730543669587927e-06, "loss": 0.5798717737197876, "step": 3935 }, { "epoch": 2.0672268907563027, "grad_norm": 11.399322480534979, "learning_rate": 2.670349896035549e-06, "loss": 1.1540753841400146, "step": 3936 }, { "epoch": 2.067752100840336, "grad_norm": 7.5590983302190375, "learning_rate": 2.667646295402704e-06, "loss": 0.31627383828163147, "step": 3937 }, { "epoch": 2.06827731092437, "grad_norm": 16.506451660048146, "learning_rate": 2.664943566070244e-06, "loss": 0.3912052810192108, "step": 3938 }, { "epoch": 2.0688025210084033, "grad_norm": 17.581512777706713, "learning_rate": 2.6622417090478304e-06, "loss": 0.6730505228042603, "step": 3939 }, { "epoch": 2.069327731092437, "grad_norm": 9.1182455516819, "learning_rate": 2.6595407253448025e-06, "loss": 0.7248634099960327, "step": 3940 }, { "epoch": 2.0698529411764706, "grad_norm": 10.391271026062176, "learning_rate": 2.656840615970169e-06, "loss": 0.5894888043403625, "step": 3941 }, { "epoch": 2.0703781512605044, "grad_norm": 6.29550669571111, "learning_rate": 2.6541413819326123e-06, "loss": 0.5516920685768127, "step": 3942 }, { "epoch": 2.070903361344538, "grad_norm": 8.091673595762389, "learning_rate": 2.651443024240489e-06, "loss": 0.7397844195365906, "step": 3943 }, { "epoch": 2.0714285714285716, "grad_norm": 9.549603541141096, "learning_rate": 2.6487455439018295e-06, "loss": 0.4368700385093689, "step": 3944 }, { "epoch": 2.071953781512605, "grad_norm": 9.408959892705115, "learning_rate": 2.646048941924333e-06, "loss": 0.4290487766265869, "step": 3945 }, { "epoch": 2.072478991596639, "grad_norm": 9.45932152508389, "learning_rate": 2.643353219315374e-06, "loss": 0.44970598816871643, "step": 3946 }, { "epoch": 2.0730042016806722, "grad_norm": 8.10109812165019, "learning_rate": 2.6406583770819946e-06, "loss": 0.6798231601715088, "step": 3947 }, { "epoch": 2.073529411764706, "grad_norm": 10.04046574807137, "learning_rate": 2.6379644162309135e-06, "loss": 0.44904816150665283, "step": 3948 }, { "epoch": 2.0740546218487395, "grad_norm": 11.184632817122909, "learning_rate": 2.635271337768517e-06, "loss": 0.5745569467544556, "step": 3949 }, { "epoch": 2.0745798319327733, "grad_norm": 31.641268530029024, "learning_rate": 2.632579142700861e-06, "loss": 0.8730089664459229, "step": 3950 }, { "epoch": 2.0751050420168067, "grad_norm": 12.92673192821059, "learning_rate": 2.629887832033672e-06, "loss": 0.32680264115333557, "step": 3951 }, { "epoch": 2.0756302521008405, "grad_norm": 13.976098941662803, "learning_rate": 2.6271974067723464e-06, "loss": 0.3138732314109802, "step": 3952 }, { "epoch": 2.076155462184874, "grad_norm": 12.995950302631732, "learning_rate": 2.6245078679219503e-06, "loss": 0.6802486777305603, "step": 3953 }, { "epoch": 2.0766806722689077, "grad_norm": 8.30181281232033, "learning_rate": 2.621819216487216e-06, "loss": 0.7510164976119995, "step": 3954 }, { "epoch": 2.077205882352941, "grad_norm": 14.158626810720211, "learning_rate": 2.619131453472551e-06, "loss": 0.47994887828826904, "step": 3955 }, { "epoch": 2.077731092436975, "grad_norm": 32.55403575755407, "learning_rate": 2.6164445798820235e-06, "loss": 0.6356985569000244, "step": 3956 }, { "epoch": 2.0782563025210083, "grad_norm": 11.353833429346354, "learning_rate": 2.613758596719373e-06, "loss": 0.7022428512573242, "step": 3957 }, { "epoch": 2.078781512605042, "grad_norm": 10.912176236666568, "learning_rate": 2.6110735049880054e-06, "loss": 0.9900159239768982, "step": 3958 }, { "epoch": 2.0793067226890756, "grad_norm": 12.037717700323714, "learning_rate": 2.6083893056909937e-06, "loss": 0.3570025563240051, "step": 3959 }, { "epoch": 2.0798319327731094, "grad_norm": 10.470781339000474, "learning_rate": 2.6057059998310775e-06, "loss": 0.5455389022827148, "step": 3960 }, { "epoch": 2.080357142857143, "grad_norm": 14.065778481096347, "learning_rate": 2.603023588410662e-06, "loss": 0.4174801707267761, "step": 3961 }, { "epoch": 2.0808823529411766, "grad_norm": 15.10690979874097, "learning_rate": 2.6003420724318217e-06, "loss": 0.8283196091651917, "step": 3962 }, { "epoch": 2.08140756302521, "grad_norm": 6.956561241135267, "learning_rate": 2.597661452896293e-06, "loss": 0.2815828323364258, "step": 3963 }, { "epoch": 2.081932773109244, "grad_norm": 8.890531207086603, "learning_rate": 2.594981730805478e-06, "loss": 0.5109238624572754, "step": 3964 }, { "epoch": 2.0824579831932772, "grad_norm": 10.572296459698237, "learning_rate": 2.5923029071604443e-06, "loss": 0.5378514528274536, "step": 3965 }, { "epoch": 2.082983193277311, "grad_norm": 19.01366643481593, "learning_rate": 2.589624982961923e-06, "loss": 0.4682660698890686, "step": 3966 }, { "epoch": 2.0835084033613445, "grad_norm": 16.041941215634868, "learning_rate": 2.5869479592103096e-06, "loss": 0.8170668482780457, "step": 3967 }, { "epoch": 2.0840336134453783, "grad_norm": 14.90220450351714, "learning_rate": 2.584271836905664e-06, "loss": 2.5968964099884033, "step": 3968 }, { "epoch": 2.0845588235294117, "grad_norm": 10.322535502652263, "learning_rate": 2.5815966170477065e-06, "loss": 0.36919355392456055, "step": 3969 }, { "epoch": 2.0850840336134455, "grad_norm": 12.3053493625888, "learning_rate": 2.5789223006358257e-06, "loss": 0.52881920337677, "step": 3970 }, { "epoch": 2.085609243697479, "grad_norm": 10.821405748592051, "learning_rate": 2.5762488886690682e-06, "loss": 0.5289602279663086, "step": 3971 }, { "epoch": 2.0861344537815127, "grad_norm": 9.47215839458051, "learning_rate": 2.5735763821461436e-06, "loss": 0.5808546543121338, "step": 3972 }, { "epoch": 2.086659663865546, "grad_norm": 21.32001531849674, "learning_rate": 2.5709047820654236e-06, "loss": 2.2433700561523438, "step": 3973 }, { "epoch": 2.08718487394958, "grad_norm": 10.004597607684564, "learning_rate": 2.5682340894249413e-06, "loss": 0.4893033802509308, "step": 3974 }, { "epoch": 2.0877100840336134, "grad_norm": 8.013118652559115, "learning_rate": 2.5655643052223915e-06, "loss": 0.24237364530563354, "step": 3975 }, { "epoch": 2.088235294117647, "grad_norm": 10.646044773671434, "learning_rate": 2.5628954304551263e-06, "loss": 0.8329358100891113, "step": 3976 }, { "epoch": 2.0887605042016806, "grad_norm": 12.6636279499299, "learning_rate": 2.5602274661201643e-06, "loss": 0.3509142994880676, "step": 3977 }, { "epoch": 2.0892857142857144, "grad_norm": 7.382820939104313, "learning_rate": 2.5575604132141792e-06, "loss": 0.6189857721328735, "step": 3978 }, { "epoch": 2.089810924369748, "grad_norm": 9.669628833459093, "learning_rate": 2.5548942727335046e-06, "loss": 0.6151308417320251, "step": 3979 }, { "epoch": 2.0903361344537816, "grad_norm": 13.405455885657206, "learning_rate": 2.552229045674135e-06, "loss": 0.37452220916748047, "step": 3980 }, { "epoch": 2.090861344537815, "grad_norm": 7.3374594189967866, "learning_rate": 2.549564733031722e-06, "loss": 0.7517160773277283, "step": 3981 }, { "epoch": 2.091386554621849, "grad_norm": 8.72194658983393, "learning_rate": 2.5469013358015765e-06, "loss": 0.4801388680934906, "step": 3982 }, { "epoch": 2.0919117647058822, "grad_norm": 7.948986903005862, "learning_rate": 2.5442388549786668e-06, "loss": 0.08267168700695038, "step": 3983 }, { "epoch": 2.092436974789916, "grad_norm": 20.216837402667025, "learning_rate": 2.5415772915576186e-06, "loss": 0.5310105681419373, "step": 3984 }, { "epoch": 2.0929621848739495, "grad_norm": 8.345302185716593, "learning_rate": 2.538916646532718e-06, "loss": 0.5747807025909424, "step": 3985 }, { "epoch": 2.0934873949579833, "grad_norm": 14.033375458667514, "learning_rate": 2.536256920897905e-06, "loss": 0.3640735149383545, "step": 3986 }, { "epoch": 2.0940126050420167, "grad_norm": 9.844116766559278, "learning_rate": 2.5335981156467755e-06, "loss": 0.4546549916267395, "step": 3987 }, { "epoch": 2.0945378151260505, "grad_norm": 10.084119867347672, "learning_rate": 2.530940231772584e-06, "loss": 0.3367902636528015, "step": 3988 }, { "epoch": 2.095063025210084, "grad_norm": 11.81220005335899, "learning_rate": 2.528283270268238e-06, "loss": 0.48261862993240356, "step": 3989 }, { "epoch": 2.0955882352941178, "grad_norm": 8.260811024275789, "learning_rate": 2.5256272321263037e-06, "loss": 0.5103731155395508, "step": 3990 }, { "epoch": 2.096113445378151, "grad_norm": 10.929352655377272, "learning_rate": 2.5229721183389975e-06, "loss": 0.3871062994003296, "step": 3991 }, { "epoch": 2.096638655462185, "grad_norm": 13.228202794028162, "learning_rate": 2.5203179298981996e-06, "loss": 0.9053060412406921, "step": 3992 }, { "epoch": 2.0971638655462184, "grad_norm": 8.237409376816085, "learning_rate": 2.517664667795434e-06, "loss": 0.64473956823349, "step": 3993 }, { "epoch": 2.097689075630252, "grad_norm": 9.071831782893424, "learning_rate": 2.5150123330218846e-06, "loss": 0.22496303915977478, "step": 3994 }, { "epoch": 2.0982142857142856, "grad_norm": 24.95086648414235, "learning_rate": 2.512360926568388e-06, "loss": 1.565203309059143, "step": 3995 }, { "epoch": 2.0987394957983194, "grad_norm": 12.279797914154937, "learning_rate": 2.509710449425432e-06, "loss": 0.6211872100830078, "step": 3996 }, { "epoch": 2.099264705882353, "grad_norm": 7.942257640316419, "learning_rate": 2.5070609025831605e-06, "loss": 0.6003269553184509, "step": 3997 }, { "epoch": 2.0997899159663866, "grad_norm": 13.367055926203495, "learning_rate": 2.5044122870313647e-06, "loss": 0.5250446200370789, "step": 3998 }, { "epoch": 2.10031512605042, "grad_norm": 14.042664302189854, "learning_rate": 2.5017646037594967e-06, "loss": 0.4142884314060211, "step": 3999 }, { "epoch": 2.100840336134454, "grad_norm": 11.8651253965398, "learning_rate": 2.4991178537566514e-06, "loss": 0.5125176310539246, "step": 4000 }, { "epoch": 2.1013655462184873, "grad_norm": 11.943873585595433, "learning_rate": 2.49647203801158e-06, "loss": 0.7816375494003296, "step": 4001 }, { "epoch": 2.101890756302521, "grad_norm": 21.191938443320442, "learning_rate": 2.4938271575126827e-06, "loss": 0.5782123804092407, "step": 4002 }, { "epoch": 2.1024159663865545, "grad_norm": 8.34295660762861, "learning_rate": 2.4911832132480108e-06, "loss": 0.4950450658798218, "step": 4003 }, { "epoch": 2.1029411764705883, "grad_norm": 11.826622056411919, "learning_rate": 2.4885402062052665e-06, "loss": 0.5004026889801025, "step": 4004 }, { "epoch": 2.1034663865546217, "grad_norm": 12.242468251597597, "learning_rate": 2.4858981373718006e-06, "loss": 0.3473156690597534, "step": 4005 }, { "epoch": 2.1039915966386555, "grad_norm": 10.386770019916467, "learning_rate": 2.483257007734613e-06, "loss": 0.9702704548835754, "step": 4006 }, { "epoch": 2.104516806722689, "grad_norm": 17.081894558842556, "learning_rate": 2.480616818280357e-06, "loss": 0.6458576917648315, "step": 4007 }, { "epoch": 2.1050420168067228, "grad_norm": 9.706915956311411, "learning_rate": 2.4779775699953303e-06, "loss": 0.6217638254165649, "step": 4008 }, { "epoch": 2.105567226890756, "grad_norm": 12.262829190544315, "learning_rate": 2.47533926386548e-06, "loss": 0.5689232349395752, "step": 4009 }, { "epoch": 2.10609243697479, "grad_norm": 16.265738021721162, "learning_rate": 2.472701900876402e-06, "loss": 0.7930733561515808, "step": 4010 }, { "epoch": 2.1066176470588234, "grad_norm": 13.850093582157232, "learning_rate": 2.4700654820133385e-06, "loss": 0.6801016330718994, "step": 4011 }, { "epoch": 2.107142857142857, "grad_norm": 13.226655277762632, "learning_rate": 2.4674300082611804e-06, "loss": 0.48762497305870056, "step": 4012 }, { "epoch": 2.1076680672268906, "grad_norm": 13.586369828045024, "learning_rate": 2.4647954806044633e-06, "loss": 0.5265434980392456, "step": 4013 }, { "epoch": 2.1081932773109244, "grad_norm": 12.633515021762046, "learning_rate": 2.4621619000273746e-06, "loss": 0.33787739276885986, "step": 4014 }, { "epoch": 2.108718487394958, "grad_norm": 10.953664180727404, "learning_rate": 2.4595292675137427e-06, "loss": 0.8921220898628235, "step": 4015 }, { "epoch": 2.1092436974789917, "grad_norm": 12.859362953856833, "learning_rate": 2.4568975840470432e-06, "loss": 0.3590652644634247, "step": 4016 }, { "epoch": 2.109768907563025, "grad_norm": 10.683680488236497, "learning_rate": 2.454266850610398e-06, "loss": 0.5140031576156616, "step": 4017 }, { "epoch": 2.110294117647059, "grad_norm": 13.1882645947248, "learning_rate": 2.451637068186573e-06, "loss": 0.6276907920837402, "step": 4018 }, { "epoch": 2.1108193277310923, "grad_norm": 14.307210559448931, "learning_rate": 2.4490082377579805e-06, "loss": 0.5477755069732666, "step": 4019 }, { "epoch": 2.111344537815126, "grad_norm": 8.849757383111541, "learning_rate": 2.4463803603066745e-06, "loss": 0.45354244112968445, "step": 4020 }, { "epoch": 2.1118697478991595, "grad_norm": 28.449778496920423, "learning_rate": 2.443753436814354e-06, "loss": 0.5317434668540955, "step": 4021 }, { "epoch": 2.1123949579831933, "grad_norm": 12.821823751423944, "learning_rate": 2.4411274682623654e-06, "loss": 0.5347519516944885, "step": 4022 }, { "epoch": 2.1129201680672267, "grad_norm": 11.66289792100645, "learning_rate": 2.4385024556316928e-06, "loss": 0.3007144629955292, "step": 4023 }, { "epoch": 2.1134453781512605, "grad_norm": 15.17286235663046, "learning_rate": 2.4358783999029667e-06, "loss": 0.960580587387085, "step": 4024 }, { "epoch": 2.113970588235294, "grad_norm": 19.732052043929922, "learning_rate": 2.433255302056458e-06, "loss": 1.1223571300506592, "step": 4025 }, { "epoch": 2.1144957983193278, "grad_norm": 14.03611980370771, "learning_rate": 2.4306331630720814e-06, "loss": 1.2803820371627808, "step": 4026 }, { "epoch": 2.115021008403361, "grad_norm": 10.570645410263367, "learning_rate": 2.428011983929391e-06, "loss": 0.344987154006958, "step": 4027 }, { "epoch": 2.115546218487395, "grad_norm": 9.515052114336722, "learning_rate": 2.425391765607587e-06, "loss": 0.7571660280227661, "step": 4028 }, { "epoch": 2.1160714285714284, "grad_norm": 22.800482756931466, "learning_rate": 2.4227725090855063e-06, "loss": 0.772080659866333, "step": 4029 }, { "epoch": 2.116596638655462, "grad_norm": 13.531342528451523, "learning_rate": 2.4201542153416264e-06, "loss": 0.3090960383415222, "step": 4030 }, { "epoch": 2.1171218487394956, "grad_norm": 8.453552692084463, "learning_rate": 2.41753688535407e-06, "loss": 0.36753419041633606, "step": 4031 }, { "epoch": 2.1176470588235294, "grad_norm": 13.202252424696026, "learning_rate": 2.4149205201005943e-06, "loss": 0.3277924060821533, "step": 4032 }, { "epoch": 2.118172268907563, "grad_norm": 9.125245980593691, "learning_rate": 2.412305120558599e-06, "loss": 0.8689071536064148, "step": 4033 }, { "epoch": 2.1186974789915967, "grad_norm": 13.216372617169977, "learning_rate": 2.4096906877051217e-06, "loss": 0.2384854257106781, "step": 4034 }, { "epoch": 2.11922268907563, "grad_norm": 8.202640191300103, "learning_rate": 2.40707722251684e-06, "loss": 0.42244455218315125, "step": 4035 }, { "epoch": 2.119747899159664, "grad_norm": 11.09092881260831, "learning_rate": 2.4044647259700686e-06, "loss": 0.6288996934890747, "step": 4036 }, { "epoch": 2.1202731092436973, "grad_norm": 10.894244829347828, "learning_rate": 2.40185319904076e-06, "loss": 0.8319982290267944, "step": 4037 }, { "epoch": 2.120798319327731, "grad_norm": 9.945751874907838, "learning_rate": 2.3992426427045096e-06, "loss": 0.2343321442604065, "step": 4038 }, { "epoch": 2.1213235294117645, "grad_norm": 10.185372354706145, "learning_rate": 2.3966330579365436e-06, "loss": 0.3003634810447693, "step": 4039 }, { "epoch": 2.1218487394957983, "grad_norm": 10.997731967945157, "learning_rate": 2.3940244457117295e-06, "loss": 0.41671162843704224, "step": 4040 }, { "epoch": 2.1223739495798317, "grad_norm": 19.279236304933864, "learning_rate": 2.391416807004568e-06, "loss": 0.6006914377212524, "step": 4041 }, { "epoch": 2.1228991596638656, "grad_norm": 10.69244743177686, "learning_rate": 2.3888101427891997e-06, "loss": 0.3275656998157501, "step": 4042 }, { "epoch": 2.123424369747899, "grad_norm": 7.268202003336679, "learning_rate": 2.386204454039398e-06, "loss": 0.17830371856689453, "step": 4043 }, { "epoch": 2.1239495798319328, "grad_norm": 19.345509501951057, "learning_rate": 2.3835997417285726e-06, "loss": 0.5082678198814392, "step": 4044 }, { "epoch": 2.1244747899159666, "grad_norm": 12.21970607249505, "learning_rate": 2.3809960068297732e-06, "loss": 0.5014134645462036, "step": 4045 }, { "epoch": 2.125, "grad_norm": 11.723162376756218, "learning_rate": 2.3783932503156776e-06, "loss": 0.44408175349235535, "step": 4046 }, { "epoch": 2.1255252100840334, "grad_norm": 11.893135065516487, "learning_rate": 2.375791473158602e-06, "loss": 0.3746313154697418, "step": 4047 }, { "epoch": 2.1260504201680672, "grad_norm": 15.624052700547455, "learning_rate": 2.373190676330494e-06, "loss": 0.2957392930984497, "step": 4048 }, { "epoch": 2.126575630252101, "grad_norm": 9.96269591888791, "learning_rate": 2.370590860802938e-06, "loss": 0.48576468229293823, "step": 4049 }, { "epoch": 2.1271008403361344, "grad_norm": 13.965649108367241, "learning_rate": 2.3679920275471507e-06, "loss": 0.9501616358757019, "step": 4050 }, { "epoch": 2.127626050420168, "grad_norm": 12.50214804848332, "learning_rate": 2.3653941775339808e-06, "loss": 0.8665844202041626, "step": 4051 }, { "epoch": 2.1281512605042017, "grad_norm": 10.905922472614874, "learning_rate": 2.362797311733909e-06, "loss": 0.4295923113822937, "step": 4052 }, { "epoch": 2.1286764705882355, "grad_norm": 11.523552109263122, "learning_rate": 2.3602014311170524e-06, "loss": 0.8264093995094299, "step": 4053 }, { "epoch": 2.129201680672269, "grad_norm": 16.55913585959276, "learning_rate": 2.3576065366531574e-06, "loss": 0.42858362197875977, "step": 4054 }, { "epoch": 2.1297268907563027, "grad_norm": 9.302406598385065, "learning_rate": 2.355012629311601e-06, "loss": 0.26310187578201294, "step": 4055 }, { "epoch": 2.130252100840336, "grad_norm": 11.31262283218092, "learning_rate": 2.3524197100613928e-06, "loss": 0.4573161005973816, "step": 4056 }, { "epoch": 2.13077731092437, "grad_norm": 9.631517556209394, "learning_rate": 2.3498277798711725e-06, "loss": 0.34721511602401733, "step": 4057 }, { "epoch": 2.1313025210084033, "grad_norm": 14.51207449185493, "learning_rate": 2.3472368397092123e-06, "loss": 0.6342383027076721, "step": 4058 }, { "epoch": 2.131827731092437, "grad_norm": 14.037058443061177, "learning_rate": 2.3446468905434093e-06, "loss": 0.6850436925888062, "step": 4059 }, { "epoch": 2.1323529411764706, "grad_norm": 14.932047042158299, "learning_rate": 2.342057933341299e-06, "loss": 0.3733258843421936, "step": 4060 }, { "epoch": 2.1328781512605044, "grad_norm": 14.33288445780609, "learning_rate": 2.3394699690700395e-06, "loss": 1.062628984451294, "step": 4061 }, { "epoch": 2.133403361344538, "grad_norm": 11.434354174339296, "learning_rate": 2.33688299869642e-06, "loss": 0.665610134601593, "step": 4062 }, { "epoch": 2.1339285714285716, "grad_norm": 12.173854876245024, "learning_rate": 2.3342970231868573e-06, "loss": 0.3131663203239441, "step": 4063 }, { "epoch": 2.134453781512605, "grad_norm": 14.851744130736751, "learning_rate": 2.3317120435073992e-06, "loss": 0.5863408446311951, "step": 4064 }, { "epoch": 2.134978991596639, "grad_norm": 8.659845983449085, "learning_rate": 2.3291280606237186e-06, "loss": 0.5293826460838318, "step": 4065 }, { "epoch": 2.1355042016806722, "grad_norm": 11.008841469881911, "learning_rate": 2.326545075501119e-06, "loss": 0.4050210416316986, "step": 4066 }, { "epoch": 2.136029411764706, "grad_norm": 12.752113588452234, "learning_rate": 2.3239630891045255e-06, "loss": 0.8473905920982361, "step": 4067 }, { "epoch": 2.1365546218487395, "grad_norm": 11.676075587505613, "learning_rate": 2.3213821023984994e-06, "loss": 0.6905754804611206, "step": 4068 }, { "epoch": 2.1370798319327733, "grad_norm": 6.487112493209409, "learning_rate": 2.3188021163472206e-06, "loss": 0.18758584558963776, "step": 4069 }, { "epoch": 2.1376050420168067, "grad_norm": 8.909557989873372, "learning_rate": 2.3162231319144984e-06, "loss": 0.19892190396785736, "step": 4070 }, { "epoch": 2.1381302521008405, "grad_norm": 8.227572511150989, "learning_rate": 2.313645150063767e-06, "loss": 0.3256494104862213, "step": 4071 }, { "epoch": 2.138655462184874, "grad_norm": 14.821989068500018, "learning_rate": 2.3110681717580856e-06, "loss": 0.4866800308227539, "step": 4072 }, { "epoch": 2.1391806722689077, "grad_norm": 7.70065568985152, "learning_rate": 2.308492197960141e-06, "loss": 0.3043510913848877, "step": 4073 }, { "epoch": 2.139705882352941, "grad_norm": 22.49326411043284, "learning_rate": 2.30591722963224e-06, "loss": 0.5373920202255249, "step": 4074 }, { "epoch": 2.140231092436975, "grad_norm": 9.318984538849298, "learning_rate": 2.303343267736321e-06, "loss": 0.2898182272911072, "step": 4075 }, { "epoch": 2.1407563025210083, "grad_norm": 15.944258917000777, "learning_rate": 2.3007703132339406e-06, "loss": 0.4407224953174591, "step": 4076 }, { "epoch": 2.141281512605042, "grad_norm": 9.669606729200078, "learning_rate": 2.2981983670862796e-06, "loss": 0.43881750106811523, "step": 4077 }, { "epoch": 2.1418067226890756, "grad_norm": 9.714564211649812, "learning_rate": 2.295627430254145e-06, "loss": 0.23658540844917297, "step": 4078 }, { "epoch": 2.1423319327731094, "grad_norm": 10.424042513988447, "learning_rate": 2.2930575036979634e-06, "loss": 0.676588237285614, "step": 4079 }, { "epoch": 2.142857142857143, "grad_norm": 10.492214268235987, "learning_rate": 2.2904885883777865e-06, "loss": 0.4801306128501892, "step": 4080 }, { "epoch": 2.1433823529411766, "grad_norm": 13.9587215633948, "learning_rate": 2.2879206852532854e-06, "loss": 0.2784840166568756, "step": 4081 }, { "epoch": 2.14390756302521, "grad_norm": 10.142025722393312, "learning_rate": 2.2853537952837577e-06, "loss": 0.35729077458381653, "step": 4082 }, { "epoch": 2.144432773109244, "grad_norm": 9.805524248752027, "learning_rate": 2.2827879194281196e-06, "loss": 0.30716753005981445, "step": 4083 }, { "epoch": 2.1449579831932772, "grad_norm": 12.828334694590527, "learning_rate": 2.2802230586449074e-06, "loss": 0.6840190887451172, "step": 4084 }, { "epoch": 2.145483193277311, "grad_norm": 14.622043020032047, "learning_rate": 2.2776592138922806e-06, "loss": 0.46795228123664856, "step": 4085 }, { "epoch": 2.1460084033613445, "grad_norm": 11.523832735496942, "learning_rate": 2.275096386128016e-06, "loss": 0.42503461241722107, "step": 4086 }, { "epoch": 2.1465336134453783, "grad_norm": 10.337690850053468, "learning_rate": 2.2725345763095146e-06, "loss": 0.6719300150871277, "step": 4087 }, { "epoch": 2.1470588235294117, "grad_norm": 10.17049504323966, "learning_rate": 2.269973785393794e-06, "loss": 0.48671677708625793, "step": 4088 }, { "epoch": 2.1475840336134455, "grad_norm": 11.769018661108722, "learning_rate": 2.2674140143374904e-06, "loss": 0.46760737895965576, "step": 4089 }, { "epoch": 2.148109243697479, "grad_norm": 12.905616796952135, "learning_rate": 2.2648552640968646e-06, "loss": 0.5618797540664673, "step": 4090 }, { "epoch": 2.1486344537815127, "grad_norm": 10.39339343948446, "learning_rate": 2.2622975356277903e-06, "loss": 0.36223387718200684, "step": 4091 }, { "epoch": 2.149159663865546, "grad_norm": 9.077655547989146, "learning_rate": 2.2597408298857613e-06, "loss": 0.3210272192955017, "step": 4092 }, { "epoch": 2.14968487394958, "grad_norm": 12.806312458390245, "learning_rate": 2.2571851478258903e-06, "loss": 0.33161550760269165, "step": 4093 }, { "epoch": 2.1502100840336134, "grad_norm": 9.361168044152002, "learning_rate": 2.2546304904029055e-06, "loss": 0.3387778699398041, "step": 4094 }, { "epoch": 2.150735294117647, "grad_norm": 12.34021327672009, "learning_rate": 2.2520768585711544e-06, "loss": 0.2693478465080261, "step": 4095 }, { "epoch": 2.1512605042016806, "grad_norm": 14.200250639417026, "learning_rate": 2.249524253284598e-06, "loss": 1.022866129875183, "step": 4096 }, { "epoch": 2.1517857142857144, "grad_norm": 15.085975732395568, "learning_rate": 2.2469726754968207e-06, "loss": 0.6444025039672852, "step": 4097 }, { "epoch": 2.152310924369748, "grad_norm": 19.904778098906103, "learning_rate": 2.244422126161017e-06, "loss": 0.5390111804008484, "step": 4098 }, { "epoch": 2.1528361344537816, "grad_norm": 9.404188201411367, "learning_rate": 2.241872606229998e-06, "loss": 0.6100510358810425, "step": 4099 }, { "epoch": 2.153361344537815, "grad_norm": 12.534920979878125, "learning_rate": 2.239324116656192e-06, "loss": 0.20052313804626465, "step": 4100 }, { "epoch": 2.153886554621849, "grad_norm": 7.030972536094845, "learning_rate": 2.236776658391641e-06, "loss": 0.27668434381484985, "step": 4101 }, { "epoch": 2.1544117647058822, "grad_norm": 17.569630042939462, "learning_rate": 2.2342302323880026e-06, "loss": 0.413688987493515, "step": 4102 }, { "epoch": 2.154936974789916, "grad_norm": 13.881738834149553, "learning_rate": 2.2316848395965483e-06, "loss": 0.5751949548721313, "step": 4103 }, { "epoch": 2.1554621848739495, "grad_norm": 16.076818408765543, "learning_rate": 2.2291404809681627e-06, "loss": 0.9522318243980408, "step": 4104 }, { "epoch": 2.1559873949579833, "grad_norm": 13.406280337945816, "learning_rate": 2.2265971574533474e-06, "loss": 0.40907883644104004, "step": 4105 }, { "epoch": 2.1565126050420167, "grad_norm": 11.330533682546829, "learning_rate": 2.224054870002214e-06, "loss": 0.6507560014724731, "step": 4106 }, { "epoch": 2.1570378151260505, "grad_norm": 13.994750393715663, "learning_rate": 2.2215136195644884e-06, "loss": 0.541474461555481, "step": 4107 }, { "epoch": 2.157563025210084, "grad_norm": 11.129845575276443, "learning_rate": 2.2189734070895086e-06, "loss": 0.3323579728603363, "step": 4108 }, { "epoch": 2.1580882352941178, "grad_norm": 13.908236890450672, "learning_rate": 2.2164342335262244e-06, "loss": 0.2566412091255188, "step": 4109 }, { "epoch": 2.158613445378151, "grad_norm": 9.421555202556819, "learning_rate": 2.2138960998231983e-06, "loss": 0.34640175104141235, "step": 4110 }, { "epoch": 2.159138655462185, "grad_norm": 15.501626790134464, "learning_rate": 2.2113590069286033e-06, "loss": 0.5923685431480408, "step": 4111 }, { "epoch": 2.1596638655462184, "grad_norm": 8.407652272330782, "learning_rate": 2.208822955790228e-06, "loss": 0.35512563586235046, "step": 4112 }, { "epoch": 2.160189075630252, "grad_norm": 13.002258015880964, "learning_rate": 2.2062879473554654e-06, "loss": 0.5846668481826782, "step": 4113 }, { "epoch": 2.1607142857142856, "grad_norm": 13.773114956142704, "learning_rate": 2.203753982571322e-06, "loss": 0.6553114652633667, "step": 4114 }, { "epoch": 2.1612394957983194, "grad_norm": 9.131268909989592, "learning_rate": 2.2012210623844155e-06, "loss": 0.5892950296401978, "step": 4115 }, { "epoch": 2.161764705882353, "grad_norm": 8.048857099303655, "learning_rate": 2.198689187740972e-06, "loss": 0.46284598112106323, "step": 4116 }, { "epoch": 2.1622899159663866, "grad_norm": 11.92598926356154, "learning_rate": 2.1961583595868253e-06, "loss": 0.3618340790271759, "step": 4117 }, { "epoch": 2.16281512605042, "grad_norm": 10.369574199628978, "learning_rate": 2.1936285788674204e-06, "loss": 0.34268003702163696, "step": 4118 }, { "epoch": 2.163340336134454, "grad_norm": 9.203339303925619, "learning_rate": 2.191099846527813e-06, "loss": 0.27453872561454773, "step": 4119 }, { "epoch": 2.1638655462184873, "grad_norm": 15.470084126350917, "learning_rate": 2.1885721635126646e-06, "loss": 0.5603208541870117, "step": 4120 }, { "epoch": 2.164390756302521, "grad_norm": 16.989271796983516, "learning_rate": 2.186045530766244e-06, "loss": 1.4984362125396729, "step": 4121 }, { "epoch": 2.1649159663865545, "grad_norm": 9.931917013371637, "learning_rate": 2.183519949232428e-06, "loss": 0.5418696403503418, "step": 4122 }, { "epoch": 2.1654411764705883, "grad_norm": 10.138879609496065, "learning_rate": 2.180995419854703e-06, "loss": 0.2958468198776245, "step": 4123 }, { "epoch": 2.1659663865546217, "grad_norm": 11.93201177102272, "learning_rate": 2.17847194357616e-06, "loss": 0.2131626009941101, "step": 4124 }, { "epoch": 2.1664915966386555, "grad_norm": 14.206516666505092, "learning_rate": 2.1759495213394965e-06, "loss": 1.0808008909225464, "step": 4125 }, { "epoch": 2.167016806722689, "grad_norm": 9.416428725696495, "learning_rate": 2.1734281540870158e-06, "loss": 0.390267014503479, "step": 4126 }, { "epoch": 2.1675420168067228, "grad_norm": 8.661698657424344, "learning_rate": 2.1709078427606323e-06, "loss": 0.29766106605529785, "step": 4127 }, { "epoch": 2.168067226890756, "grad_norm": 11.100999631680564, "learning_rate": 2.1683885883018596e-06, "loss": 0.6078053116798401, "step": 4128 }, { "epoch": 2.16859243697479, "grad_norm": 7.787858276858203, "learning_rate": 2.165870391651819e-06, "loss": 0.23034285008907318, "step": 4129 }, { "epoch": 2.1691176470588234, "grad_norm": 7.997285454490383, "learning_rate": 2.1633532537512374e-06, "loss": 0.2559571862220764, "step": 4130 }, { "epoch": 2.169642857142857, "grad_norm": 11.804350834609446, "learning_rate": 2.160837175540444e-06, "loss": 0.7174216508865356, "step": 4131 }, { "epoch": 2.1701680672268906, "grad_norm": 13.936453134967207, "learning_rate": 2.158322157959373e-06, "loss": 0.2922583222389221, "step": 4132 }, { "epoch": 2.1706932773109244, "grad_norm": 10.251391658667258, "learning_rate": 2.155808201947563e-06, "loss": 0.829541802406311, "step": 4133 }, { "epoch": 2.171218487394958, "grad_norm": 10.23807325621361, "learning_rate": 2.1532953084441575e-06, "loss": 1.2362592220306396, "step": 4134 }, { "epoch": 2.1717436974789917, "grad_norm": 10.838998585342512, "learning_rate": 2.1507834783879007e-06, "loss": 0.530997097492218, "step": 4135 }, { "epoch": 2.172268907563025, "grad_norm": 8.553686636875732, "learning_rate": 2.1482727127171395e-06, "loss": 0.3646453022956848, "step": 4136 }, { "epoch": 2.172794117647059, "grad_norm": 11.613822231051532, "learning_rate": 2.145763012369824e-06, "loss": 0.7255064249038696, "step": 4137 }, { "epoch": 2.1733193277310923, "grad_norm": 11.098132735933739, "learning_rate": 2.1432543782835064e-06, "loss": 0.7276588082313538, "step": 4138 }, { "epoch": 2.173844537815126, "grad_norm": 25.505741451033256, "learning_rate": 2.14074681139534e-06, "loss": 0.5854189991950989, "step": 4139 }, { "epoch": 2.1743697478991595, "grad_norm": 7.537161132561563, "learning_rate": 2.1382403126420804e-06, "loss": 0.7636845111846924, "step": 4140 }, { "epoch": 2.1748949579831933, "grad_norm": 10.668134187294704, "learning_rate": 2.1357348829600816e-06, "loss": 0.6803666949272156, "step": 4141 }, { "epoch": 2.1754201680672267, "grad_norm": 5.653673343197226, "learning_rate": 2.1332305232853036e-06, "loss": 0.2199927717447281, "step": 4142 }, { "epoch": 2.1759453781512605, "grad_norm": 12.552287838561119, "learning_rate": 2.130727234553301e-06, "loss": 0.34859079122543335, "step": 4143 }, { "epoch": 2.176470588235294, "grad_norm": 9.069673498422354, "learning_rate": 2.128225017699232e-06, "loss": 0.36638444662094116, "step": 4144 }, { "epoch": 2.1769957983193278, "grad_norm": 22.65302641357171, "learning_rate": 2.125723873657852e-06, "loss": 0.9880443811416626, "step": 4145 }, { "epoch": 2.177521008403361, "grad_norm": 12.297120435804487, "learning_rate": 2.123223803363516e-06, "loss": 0.49851250648498535, "step": 4146 }, { "epoch": 2.178046218487395, "grad_norm": 31.083247431672014, "learning_rate": 2.1207248077501796e-06, "loss": 0.7122225165367126, "step": 4147 }, { "epoch": 2.1785714285714284, "grad_norm": 18.436865021595267, "learning_rate": 2.118226887751394e-06, "loss": 0.505310595035553, "step": 4148 }, { "epoch": 2.179096638655462, "grad_norm": 12.294063749377852, "learning_rate": 2.115730044300313e-06, "loss": 0.4264427721500397, "step": 4149 }, { "epoch": 2.1796218487394956, "grad_norm": 21.35135405565712, "learning_rate": 2.113234278329685e-06, "loss": 0.2829148769378662, "step": 4150 }, { "epoch": 2.1801470588235294, "grad_norm": 11.512959775518773, "learning_rate": 2.110739590771856e-06, "loss": 0.2545013427734375, "step": 4151 }, { "epoch": 2.180672268907563, "grad_norm": 10.334224531936984, "learning_rate": 2.10824598255877e-06, "loss": 0.6813156008720398, "step": 4152 }, { "epoch": 2.1811974789915967, "grad_norm": 12.117580299201101, "learning_rate": 2.105753454621966e-06, "loss": 0.5756422281265259, "step": 4153 }, { "epoch": 2.18172268907563, "grad_norm": 10.307502170621538, "learning_rate": 2.103262007892583e-06, "loss": 0.3108716905117035, "step": 4154 }, { "epoch": 2.182247899159664, "grad_norm": 9.379900244197216, "learning_rate": 2.100771643301351e-06, "loss": 0.3554327189922333, "step": 4155 }, { "epoch": 2.1827731092436973, "grad_norm": 12.52549367725084, "learning_rate": 2.0982823617786017e-06, "loss": 1.1059948205947876, "step": 4156 }, { "epoch": 2.183298319327731, "grad_norm": 9.56281993182365, "learning_rate": 2.095794164254259e-06, "loss": 0.36582398414611816, "step": 4157 }, { "epoch": 2.1838235294117645, "grad_norm": 8.05420997514092, "learning_rate": 2.0933070516578407e-06, "loss": 0.3737989664077759, "step": 4158 }, { "epoch": 2.1843487394957983, "grad_norm": 9.748343049586008, "learning_rate": 2.090821024918462e-06, "loss": 0.6534856557846069, "step": 4159 }, { "epoch": 2.184873949579832, "grad_norm": 9.150306654086352, "learning_rate": 2.0883360849648294e-06, "loss": 0.5786707997322083, "step": 4160 }, { "epoch": 2.1853991596638656, "grad_norm": 10.123184561076345, "learning_rate": 2.0858522327252467e-06, "loss": 0.5475946664810181, "step": 4161 }, { "epoch": 2.185924369747899, "grad_norm": 9.814161672103774, "learning_rate": 2.0833694691276093e-06, "loss": 0.3300240933895111, "step": 4162 }, { "epoch": 2.1864495798319328, "grad_norm": 12.620510761109097, "learning_rate": 2.0808877950994037e-06, "loss": 0.6013469696044922, "step": 4163 }, { "epoch": 2.1869747899159666, "grad_norm": 9.001212339683478, "learning_rate": 2.078407211567717e-06, "loss": 0.5732584595680237, "step": 4164 }, { "epoch": 2.1875, "grad_norm": 10.71379548020254, "learning_rate": 2.0759277194592208e-06, "loss": 0.30177998542785645, "step": 4165 }, { "epoch": 2.1880252100840334, "grad_norm": 16.394340703005806, "learning_rate": 2.073449319700184e-06, "loss": 0.5181938409805298, "step": 4166 }, { "epoch": 2.1885504201680672, "grad_norm": 10.618518841939864, "learning_rate": 2.070972013216464e-06, "loss": 0.3636229336261749, "step": 4167 }, { "epoch": 2.189075630252101, "grad_norm": 9.175274377354294, "learning_rate": 2.0684958009335122e-06, "loss": 0.3279910087585449, "step": 4168 }, { "epoch": 2.1896008403361344, "grad_norm": 17.577912285931166, "learning_rate": 2.06602068377637e-06, "loss": 0.3370909094810486, "step": 4169 }, { "epoch": 2.190126050420168, "grad_norm": 12.503618346397172, "learning_rate": 2.0635466626696688e-06, "loss": 0.3615824580192566, "step": 4170 }, { "epoch": 2.1906512605042017, "grad_norm": 16.875652296761626, "learning_rate": 2.061073738537635e-06, "loss": 0.5076624155044556, "step": 4171 }, { "epoch": 2.1911764705882355, "grad_norm": 23.41322683525522, "learning_rate": 2.05860191230408e-06, "loss": 0.5695816278457642, "step": 4172 }, { "epoch": 2.191701680672269, "grad_norm": 8.037363880378786, "learning_rate": 2.0561311848924082e-06, "loss": 0.5932033061981201, "step": 4173 }, { "epoch": 2.1922268907563027, "grad_norm": 9.483592587045889, "learning_rate": 2.053661557225611e-06, "loss": 0.5292115211486816, "step": 4174 }, { "epoch": 2.192752100840336, "grad_norm": 15.771638520172871, "learning_rate": 2.0511930302262724e-06, "loss": 0.8906204700469971, "step": 4175 }, { "epoch": 2.19327731092437, "grad_norm": 6.953253941111415, "learning_rate": 2.048725604816561e-06, "loss": 0.506250262260437, "step": 4176 }, { "epoch": 2.1938025210084033, "grad_norm": 7.579622931622611, "learning_rate": 2.0462592819182377e-06, "loss": 0.5802386999130249, "step": 4177 }, { "epoch": 2.194327731092437, "grad_norm": 12.317691620413207, "learning_rate": 2.043794062452647e-06, "loss": 0.6706414222717285, "step": 4178 }, { "epoch": 2.1948529411764706, "grad_norm": 9.34852734409619, "learning_rate": 2.0413299473407285e-06, "loss": 0.6208543181419373, "step": 4179 }, { "epoch": 2.1953781512605044, "grad_norm": 9.086832000084609, "learning_rate": 2.0388669375030024e-06, "loss": 0.3323096036911011, "step": 4180 }, { "epoch": 2.195903361344538, "grad_norm": 11.767961058541344, "learning_rate": 2.0364050338595792e-06, "loss": 0.5645102262496948, "step": 4181 }, { "epoch": 2.1964285714285716, "grad_norm": 18.284140518390636, "learning_rate": 2.0339442373301548e-06, "loss": 0.8418555855751038, "step": 4182 }, { "epoch": 2.196953781512605, "grad_norm": 14.155789682869628, "learning_rate": 2.031484548834013e-06, "loss": 0.3883611559867859, "step": 4183 }, { "epoch": 2.197478991596639, "grad_norm": 12.515288844892636, "learning_rate": 2.0290259692900216e-06, "loss": 0.6071314811706543, "step": 4184 }, { "epoch": 2.1980042016806722, "grad_norm": 17.769203705565968, "learning_rate": 2.0265684996166345e-06, "loss": 1.4243782758712769, "step": 4185 }, { "epoch": 2.198529411764706, "grad_norm": 15.035137568837966, "learning_rate": 2.024112140731895e-06, "loss": 0.4408267140388489, "step": 4186 }, { "epoch": 2.1990546218487395, "grad_norm": 10.29686943582839, "learning_rate": 2.021656893553427e-06, "loss": 0.5268247723579407, "step": 4187 }, { "epoch": 2.1995798319327733, "grad_norm": 11.977760038537417, "learning_rate": 2.0192027589984377e-06, "loss": 0.49362415075302124, "step": 4188 }, { "epoch": 2.2001050420168067, "grad_norm": 9.706659921209024, "learning_rate": 2.0167497379837254e-06, "loss": 0.27251413464546204, "step": 4189 }, { "epoch": 2.2006302521008405, "grad_norm": 22.27753361545864, "learning_rate": 2.014297831425666e-06, "loss": 0.7175557017326355, "step": 4190 }, { "epoch": 2.201155462184874, "grad_norm": 14.991979543110716, "learning_rate": 2.0118470402402223e-06, "loss": 0.33612674474716187, "step": 4191 }, { "epoch": 2.2016806722689077, "grad_norm": 9.789047134908518, "learning_rate": 2.009397365342939e-06, "loss": 0.27348434925079346, "step": 4192 }, { "epoch": 2.202205882352941, "grad_norm": 10.804283172794818, "learning_rate": 2.0069488076489445e-06, "loss": 0.19717592000961304, "step": 4193 }, { "epoch": 2.202731092436975, "grad_norm": 11.918979990452257, "learning_rate": 2.0045013680729477e-06, "loss": 0.4407789707183838, "step": 4194 }, { "epoch": 2.2032563025210083, "grad_norm": 9.358364885066807, "learning_rate": 2.0020550475292456e-06, "loss": 0.369744211435318, "step": 4195 }, { "epoch": 2.203781512605042, "grad_norm": 10.05960294577804, "learning_rate": 1.999609846931711e-06, "loss": 0.6133785247802734, "step": 4196 }, { "epoch": 2.2043067226890756, "grad_norm": 13.33911712392511, "learning_rate": 1.997165767193801e-06, "loss": 0.5213902592658997, "step": 4197 }, { "epoch": 2.2048319327731094, "grad_norm": 19.503275644843303, "learning_rate": 1.994722809228554e-06, "loss": 0.9305657148361206, "step": 4198 }, { "epoch": 2.205357142857143, "grad_norm": 9.868231682672182, "learning_rate": 1.9922809739485883e-06, "loss": 0.7982741594314575, "step": 4199 }, { "epoch": 2.2058823529411766, "grad_norm": 10.228297500346645, "learning_rate": 1.9898402622661036e-06, "loss": 0.7370745539665222, "step": 4200 }, { "epoch": 2.20640756302521, "grad_norm": 20.771341640684224, "learning_rate": 1.9874006750928783e-06, "loss": 0.5306645035743713, "step": 4201 }, { "epoch": 2.206932773109244, "grad_norm": 9.833383007286246, "learning_rate": 1.9849622133402753e-06, "loss": 0.37817084789276123, "step": 4202 }, { "epoch": 2.2074579831932772, "grad_norm": 9.19511118204492, "learning_rate": 1.9825248779192323e-06, "loss": 0.6270235776901245, "step": 4203 }, { "epoch": 2.207983193277311, "grad_norm": 12.791260823791497, "learning_rate": 1.9800886697402684e-06, "loss": 1.0493865013122559, "step": 4204 }, { "epoch": 2.2085084033613445, "grad_norm": 9.811316938051316, "learning_rate": 1.97765358971348e-06, "loss": 0.7131880521774292, "step": 4205 }, { "epoch": 2.2090336134453783, "grad_norm": 14.093480674583617, "learning_rate": 1.9752196387485434e-06, "loss": 0.5840585231781006, "step": 4206 }, { "epoch": 2.2095588235294117, "grad_norm": 7.911030532639583, "learning_rate": 1.9727868177547126e-06, "loss": 0.33531811833381653, "step": 4207 }, { "epoch": 2.2100840336134455, "grad_norm": 15.014794444530551, "learning_rate": 1.9703551276408204e-06, "loss": 0.7113238573074341, "step": 4208 }, { "epoch": 2.210609243697479, "grad_norm": 12.543505964644165, "learning_rate": 1.967924569315275e-06, "loss": 0.45716071128845215, "step": 4209 }, { "epoch": 2.2111344537815127, "grad_norm": 8.88840872635617, "learning_rate": 1.9654951436860653e-06, "loss": 0.5780990123748779, "step": 4210 }, { "epoch": 2.211659663865546, "grad_norm": 8.302707224640224, "learning_rate": 1.9630668516607543e-06, "loss": 0.43058717250823975, "step": 4211 }, { "epoch": 2.21218487394958, "grad_norm": 11.32072871138526, "learning_rate": 1.9606396941464818e-06, "loss": 0.2899579405784607, "step": 4212 }, { "epoch": 2.2127100840336134, "grad_norm": 12.49695712166815, "learning_rate": 1.958213672049964e-06, "loss": 0.9015559554100037, "step": 4213 }, { "epoch": 2.213235294117647, "grad_norm": 10.767827231610696, "learning_rate": 1.9557887862774932e-06, "loss": 0.6473977565765381, "step": 4214 }, { "epoch": 2.2137605042016806, "grad_norm": 9.096515865985335, "learning_rate": 1.9533650377349374e-06, "loss": 0.27973464131355286, "step": 4215 }, { "epoch": 2.2142857142857144, "grad_norm": 12.690723019200991, "learning_rate": 1.950942427327737e-06, "loss": 0.7399759292602539, "step": 4216 }, { "epoch": 2.214810924369748, "grad_norm": 14.477758723294201, "learning_rate": 1.9485209559609148e-06, "loss": 0.24219633638858795, "step": 4217 }, { "epoch": 2.2153361344537816, "grad_norm": 7.656369932489473, "learning_rate": 1.9461006245390594e-06, "loss": 0.14525285363197327, "step": 4218 }, { "epoch": 2.215861344537815, "grad_norm": 8.932338220927106, "learning_rate": 1.943681433966338e-06, "loss": 0.3175009489059448, "step": 4219 }, { "epoch": 2.216386554621849, "grad_norm": 7.723831645123156, "learning_rate": 1.9412633851464905e-06, "loss": 0.41556933522224426, "step": 4220 }, { "epoch": 2.2169117647058822, "grad_norm": 17.498494880239466, "learning_rate": 1.9388464789828316e-06, "loss": 0.7165228128433228, "step": 4221 }, { "epoch": 2.217436974789916, "grad_norm": 10.458573195068976, "learning_rate": 1.9364307163782466e-06, "loss": 0.4890228509902954, "step": 4222 }, { "epoch": 2.2179621848739495, "grad_norm": 11.304200533087775, "learning_rate": 1.9340160982351937e-06, "loss": 0.6948677897453308, "step": 4223 }, { "epoch": 2.2184873949579833, "grad_norm": 11.930371566472163, "learning_rate": 1.9316026254557083e-06, "loss": 0.372274249792099, "step": 4224 }, { "epoch": 2.2190126050420167, "grad_norm": 9.730098144466242, "learning_rate": 1.9291902989413935e-06, "loss": 0.3224791884422302, "step": 4225 }, { "epoch": 2.2195378151260505, "grad_norm": 28.299366470610863, "learning_rate": 1.926779119593424e-06, "loss": 1.8382922410964966, "step": 4226 }, { "epoch": 2.220063025210084, "grad_norm": 23.461126414762614, "learning_rate": 1.9243690883125495e-06, "loss": 0.3881704807281494, "step": 4227 }, { "epoch": 2.2205882352941178, "grad_norm": 11.216692612225211, "learning_rate": 1.9219602059990855e-06, "loss": 1.0345499515533447, "step": 4228 }, { "epoch": 2.221113445378151, "grad_norm": 12.991118298538602, "learning_rate": 1.9195524735529237e-06, "loss": 0.2979498505592346, "step": 4229 }, { "epoch": 2.221638655462185, "grad_norm": 12.497677517668397, "learning_rate": 1.917145891873522e-06, "loss": 0.293893426656723, "step": 4230 }, { "epoch": 2.2221638655462184, "grad_norm": 10.544509646480991, "learning_rate": 1.91474046185991e-06, "loss": 0.34756922721862793, "step": 4231 }, { "epoch": 2.222689075630252, "grad_norm": 14.556717169054096, "learning_rate": 1.9123361844106897e-06, "loss": 0.5735061168670654, "step": 4232 }, { "epoch": 2.2232142857142856, "grad_norm": 11.593487925413427, "learning_rate": 1.909933060424029e-06, "loss": 0.4957185387611389, "step": 4233 }, { "epoch": 2.2237394957983194, "grad_norm": 14.976538125462485, "learning_rate": 1.9075310907976665e-06, "loss": 0.5994662046432495, "step": 4234 }, { "epoch": 2.224264705882353, "grad_norm": 15.837535116480117, "learning_rate": 1.9051302764289075e-06, "loss": 0.3711355924606323, "step": 4235 }, { "epoch": 2.2247899159663866, "grad_norm": 10.753597804787082, "learning_rate": 1.9027306182146287e-06, "loss": 0.7939506769180298, "step": 4236 }, { "epoch": 2.22531512605042, "grad_norm": 12.043672496085208, "learning_rate": 1.9003321170512728e-06, "loss": 0.6818105578422546, "step": 4237 }, { "epoch": 2.225840336134454, "grad_norm": 13.476801970533435, "learning_rate": 1.8979347738348498e-06, "loss": 0.4162781834602356, "step": 4238 }, { "epoch": 2.2263655462184873, "grad_norm": 9.75946868817556, "learning_rate": 1.8955385894609414e-06, "loss": 0.40535199642181396, "step": 4239 }, { "epoch": 2.226890756302521, "grad_norm": 12.048971044153054, "learning_rate": 1.8931435648246916e-06, "loss": 0.3999428451061249, "step": 4240 }, { "epoch": 2.2274159663865545, "grad_norm": 8.310005672894276, "learning_rate": 1.890749700820813e-06, "loss": 0.3226167559623718, "step": 4241 }, { "epoch": 2.2279411764705883, "grad_norm": 10.69961698436323, "learning_rate": 1.8883569983435846e-06, "loss": 0.7567530870437622, "step": 4242 }, { "epoch": 2.2284663865546217, "grad_norm": 12.533105232512634, "learning_rate": 1.8859654582868508e-06, "loss": 0.6056888103485107, "step": 4243 }, { "epoch": 2.2289915966386555, "grad_norm": 10.63696061089627, "learning_rate": 1.8835750815440223e-06, "loss": 0.967621386051178, "step": 4244 }, { "epoch": 2.229516806722689, "grad_norm": 11.261256743839713, "learning_rate": 1.8811858690080764e-06, "loss": 0.5182070136070251, "step": 4245 }, { "epoch": 2.2300420168067228, "grad_norm": 11.348583071333728, "learning_rate": 1.8787978215715513e-06, "loss": 0.5857005715370178, "step": 4246 }, { "epoch": 2.230567226890756, "grad_norm": 11.668928252340686, "learning_rate": 1.8764109401265567e-06, "loss": 0.5006313920021057, "step": 4247 }, { "epoch": 2.23109243697479, "grad_norm": 9.570223735853158, "learning_rate": 1.8740252255647616e-06, "loss": 0.9233022928237915, "step": 4248 }, { "epoch": 2.2316176470588234, "grad_norm": 14.529213295709033, "learning_rate": 1.8716406787774e-06, "loss": 0.5809304118156433, "step": 4249 }, { "epoch": 2.232142857142857, "grad_norm": 9.445726587664717, "learning_rate": 1.8692573006552712e-06, "loss": 0.34896859526634216, "step": 4250 }, { "epoch": 2.2326680672268906, "grad_norm": 10.016974712584476, "learning_rate": 1.866875092088735e-06, "loss": 0.4887806177139282, "step": 4251 }, { "epoch": 2.2331932773109244, "grad_norm": 22.837821621436888, "learning_rate": 1.864494053967718e-06, "loss": 1.0725637674331665, "step": 4252 }, { "epoch": 2.233718487394958, "grad_norm": 10.952656545944258, "learning_rate": 1.862114187181705e-06, "loss": 0.6504073739051819, "step": 4253 }, { "epoch": 2.2342436974789917, "grad_norm": 29.280790993162977, "learning_rate": 1.8597354926197492e-06, "loss": 2.4205195903778076, "step": 4254 }, { "epoch": 2.234768907563025, "grad_norm": 12.430858344818144, "learning_rate": 1.8573579711704615e-06, "loss": 1.1163129806518555, "step": 4255 }, { "epoch": 2.235294117647059, "grad_norm": 18.441813056922744, "learning_rate": 1.8549816237220153e-06, "loss": 0.4767477512359619, "step": 4256 }, { "epoch": 2.2358193277310923, "grad_norm": 13.193167198146616, "learning_rate": 1.8526064511621455e-06, "loss": 0.45058536529541016, "step": 4257 }, { "epoch": 2.236344537815126, "grad_norm": 11.797257836622302, "learning_rate": 1.850232454378149e-06, "loss": 0.4441848695278168, "step": 4258 }, { "epoch": 2.2368697478991595, "grad_norm": 11.28502299388834, "learning_rate": 1.8478596342568827e-06, "loss": 0.2549547553062439, "step": 4259 }, { "epoch": 2.2373949579831933, "grad_norm": 9.977389844623083, "learning_rate": 1.8454879916847619e-06, "loss": 0.5016900300979614, "step": 4260 }, { "epoch": 2.2379201680672267, "grad_norm": 13.518901266693817, "learning_rate": 1.843117527547768e-06, "loss": 0.2857413589954376, "step": 4261 }, { "epoch": 2.2384453781512605, "grad_norm": 10.638081977113426, "learning_rate": 1.8407482427314366e-06, "loss": 0.6297985911369324, "step": 4262 }, { "epoch": 2.238970588235294, "grad_norm": 13.721575737834797, "learning_rate": 1.8383801381208644e-06, "loss": 0.5709312558174133, "step": 4263 }, { "epoch": 2.2394957983193278, "grad_norm": 8.553145652845286, "learning_rate": 1.8360132146007077e-06, "loss": 0.36588770151138306, "step": 4264 }, { "epoch": 2.240021008403361, "grad_norm": 9.595777530635301, "learning_rate": 1.8336474730551807e-06, "loss": 0.6568527817726135, "step": 4265 }, { "epoch": 2.240546218487395, "grad_norm": 9.879017482166368, "learning_rate": 1.8312829143680562e-06, "loss": 0.3603191375732422, "step": 4266 }, { "epoch": 2.2410714285714284, "grad_norm": 6.578677773465254, "learning_rate": 1.828919539422666e-06, "loss": 0.4119417667388916, "step": 4267 }, { "epoch": 2.241596638655462, "grad_norm": 11.091435318143251, "learning_rate": 1.8265573491018978e-06, "loss": 0.6172628998756409, "step": 4268 }, { "epoch": 2.2421218487394956, "grad_norm": 28.052879120220954, "learning_rate": 1.8241963442882005e-06, "loss": 0.5042892694473267, "step": 4269 }, { "epoch": 2.2426470588235294, "grad_norm": 10.741007294378209, "learning_rate": 1.8218365258635767e-06, "loss": 0.8626440763473511, "step": 4270 }, { "epoch": 2.243172268907563, "grad_norm": 8.08388356503716, "learning_rate": 1.8194778947095866e-06, "loss": 0.8154909014701843, "step": 4271 }, { "epoch": 2.2436974789915967, "grad_norm": 15.921824884484678, "learning_rate": 1.8171204517073472e-06, "loss": 0.3045297861099243, "step": 4272 }, { "epoch": 2.24422268907563, "grad_norm": 11.139114130473832, "learning_rate": 1.8147641977375313e-06, "loss": 0.18273624777793884, "step": 4273 }, { "epoch": 2.244747899159664, "grad_norm": 12.013231223046976, "learning_rate": 1.8124091336803684e-06, "loss": 0.6754957437515259, "step": 4274 }, { "epoch": 2.2452731092436973, "grad_norm": 11.867861241802702, "learning_rate": 1.81005526041564e-06, "loss": 0.8179190158843994, "step": 4275 }, { "epoch": 2.245798319327731, "grad_norm": 16.47379659801115, "learning_rate": 1.8077025788226898e-06, "loss": 0.37153083086013794, "step": 4276 }, { "epoch": 2.2463235294117645, "grad_norm": 17.569367020613644, "learning_rate": 1.8053510897804105e-06, "loss": 0.4909835457801819, "step": 4277 }, { "epoch": 2.2468487394957983, "grad_norm": 13.35263942734109, "learning_rate": 1.80300079416725e-06, "loss": 0.47448334097862244, "step": 4278 }, { "epoch": 2.247373949579832, "grad_norm": 9.428658728702898, "learning_rate": 1.8006516928612121e-06, "loss": 0.8697469234466553, "step": 4279 }, { "epoch": 2.2478991596638656, "grad_norm": 9.434113994014082, "learning_rate": 1.798303786739854e-06, "loss": 0.44476377964019775, "step": 4280 }, { "epoch": 2.248424369747899, "grad_norm": 13.118989511658329, "learning_rate": 1.7959570766802847e-06, "loss": 0.7967353463172913, "step": 4281 }, { "epoch": 2.2489495798319328, "grad_norm": 13.719742825553485, "learning_rate": 1.7936115635591684e-06, "loss": 0.8126027584075928, "step": 4282 }, { "epoch": 2.2494747899159666, "grad_norm": 7.911725621639151, "learning_rate": 1.79126724825272e-06, "loss": 0.3285817801952362, "step": 4283 }, { "epoch": 2.25, "grad_norm": 8.437431585657757, "learning_rate": 1.7889241316367112e-06, "loss": 0.3654254078865051, "step": 4284 }, { "epoch": 2.2505252100840334, "grad_norm": 6.008724218610319, "learning_rate": 1.786582214586462e-06, "loss": 0.3179757297039032, "step": 4285 }, { "epoch": 2.2510504201680672, "grad_norm": 9.53388500267815, "learning_rate": 1.7842414979768453e-06, "loss": 0.5743989944458008, "step": 4286 }, { "epoch": 2.251575630252101, "grad_norm": 7.160110305296361, "learning_rate": 1.7819019826822853e-06, "loss": 0.37717652320861816, "step": 4287 }, { "epoch": 2.2521008403361344, "grad_norm": 9.154946871408706, "learning_rate": 1.7795636695767582e-06, "loss": 0.204721137881279, "step": 4288 }, { "epoch": 2.252626050420168, "grad_norm": 14.832979058980008, "learning_rate": 1.77722655953379e-06, "loss": 0.6357308626174927, "step": 4289 }, { "epoch": 2.2531512605042017, "grad_norm": 12.381302350771634, "learning_rate": 1.7748906534264565e-06, "loss": 0.3569576144218445, "step": 4290 }, { "epoch": 2.2536764705882355, "grad_norm": 11.748537287955768, "learning_rate": 1.7725559521273887e-06, "loss": 0.5137014389038086, "step": 4291 }, { "epoch": 2.254201680672269, "grad_norm": 8.644959563734952, "learning_rate": 1.7702224565087629e-06, "loss": 0.6743233799934387, "step": 4292 }, { "epoch": 2.2547268907563023, "grad_norm": 7.622560298300759, "learning_rate": 1.7678901674423044e-06, "loss": 0.7572541832923889, "step": 4293 }, { "epoch": 2.255252100840336, "grad_norm": 9.916966544236823, "learning_rate": 1.765559085799291e-06, "loss": 0.5181331634521484, "step": 4294 }, { "epoch": 2.25577731092437, "grad_norm": 10.883205033840762, "learning_rate": 1.7632292124505474e-06, "loss": 0.7161169648170471, "step": 4295 }, { "epoch": 2.2563025210084033, "grad_norm": 6.951025062262527, "learning_rate": 1.7609005482664472e-06, "loss": 0.2906675338745117, "step": 4296 }, { "epoch": 2.2568277310924367, "grad_norm": 14.973160304477666, "learning_rate": 1.7585730941169105e-06, "loss": 0.6282626986503601, "step": 4297 }, { "epoch": 2.2573529411764706, "grad_norm": 8.911523746568987, "learning_rate": 1.7562468508714115e-06, "loss": 0.40257585048675537, "step": 4298 }, { "epoch": 2.2578781512605044, "grad_norm": 14.840054397955528, "learning_rate": 1.753921819398966e-06, "loss": 0.8833602666854858, "step": 4299 }, { "epoch": 2.258403361344538, "grad_norm": 15.331435867589901, "learning_rate": 1.7515980005681383e-06, "loss": 0.3591822385787964, "step": 4300 }, { "epoch": 2.2589285714285716, "grad_norm": 9.454405703715667, "learning_rate": 1.7492753952470415e-06, "loss": 0.36576658487319946, "step": 4301 }, { "epoch": 2.259453781512605, "grad_norm": 12.652525405012328, "learning_rate": 1.7469540043033335e-06, "loss": 0.3571898639202118, "step": 4302 }, { "epoch": 2.259978991596639, "grad_norm": 8.532771320904967, "learning_rate": 1.7446338286042196e-06, "loss": 0.5041860342025757, "step": 4303 }, { "epoch": 2.2605042016806722, "grad_norm": 16.742160510813598, "learning_rate": 1.7423148690164505e-06, "loss": 0.4292744994163513, "step": 4304 }, { "epoch": 2.261029411764706, "grad_norm": 8.742113836470347, "learning_rate": 1.739997126406322e-06, "loss": 0.13389720022678375, "step": 4305 }, { "epoch": 2.2615546218487395, "grad_norm": 12.027532236582896, "learning_rate": 1.7376806016396786e-06, "loss": 0.7063834071159363, "step": 4306 }, { "epoch": 2.2620798319327733, "grad_norm": 15.170850761266513, "learning_rate": 1.7353652955819067e-06, "loss": 0.2498408555984497, "step": 4307 }, { "epoch": 2.2626050420168067, "grad_norm": 10.825787617819445, "learning_rate": 1.7330512090979372e-06, "loss": 0.6601535081863403, "step": 4308 }, { "epoch": 2.2631302521008405, "grad_norm": 13.445506801696927, "learning_rate": 1.7307383430522474e-06, "loss": 0.7677643299102783, "step": 4309 }, { "epoch": 2.263655462184874, "grad_norm": 10.403545817284883, "learning_rate": 1.7284266983088565e-06, "loss": 0.3086925446987152, "step": 4310 }, { "epoch": 2.2641806722689077, "grad_norm": 6.837787702603852, "learning_rate": 1.7261162757313299e-06, "loss": 0.15964150428771973, "step": 4311 }, { "epoch": 2.264705882352941, "grad_norm": 16.02081449997177, "learning_rate": 1.7238070761827725e-06, "loss": 0.5171663165092468, "step": 4312 }, { "epoch": 2.265231092436975, "grad_norm": 15.381150793292788, "learning_rate": 1.7214991005258386e-06, "loss": 0.7723196744918823, "step": 4313 }, { "epoch": 2.2657563025210083, "grad_norm": 15.114327333104479, "learning_rate": 1.7191923496227203e-06, "loss": 0.3939249813556671, "step": 4314 }, { "epoch": 2.266281512605042, "grad_norm": 15.411326455149124, "learning_rate": 1.7168868243351532e-06, "loss": 0.36170852184295654, "step": 4315 }, { "epoch": 2.2668067226890756, "grad_norm": 10.25117781012011, "learning_rate": 1.7145825255244153e-06, "loss": 0.47441792488098145, "step": 4316 }, { "epoch": 2.2673319327731094, "grad_norm": 7.4714144047658735, "learning_rate": 1.7122794540513265e-06, "loss": 0.44890767335891724, "step": 4317 }, { "epoch": 2.267857142857143, "grad_norm": 16.228638107624416, "learning_rate": 1.7099776107762483e-06, "loss": 0.7477537393569946, "step": 4318 }, { "epoch": 2.2683823529411766, "grad_norm": 11.69767304812106, "learning_rate": 1.7076769965590834e-06, "loss": 0.4765632152557373, "step": 4319 }, { "epoch": 2.26890756302521, "grad_norm": 9.308848236540593, "learning_rate": 1.705377612259273e-06, "loss": 0.2893710136413574, "step": 4320 }, { "epoch": 2.269432773109244, "grad_norm": 8.555800306382825, "learning_rate": 1.703079458735805e-06, "loss": 0.3118830621242523, "step": 4321 }, { "epoch": 2.2699579831932772, "grad_norm": 13.776481389860628, "learning_rate": 1.700782536847202e-06, "loss": 0.7710450291633606, "step": 4322 }, { "epoch": 2.270483193277311, "grad_norm": 10.577928239660647, "learning_rate": 1.698486847451527e-06, "loss": 0.47262483835220337, "step": 4323 }, { "epoch": 2.2710084033613445, "grad_norm": 13.069959610631283, "learning_rate": 1.696192391406385e-06, "loss": 0.5836432576179504, "step": 4324 }, { "epoch": 2.2715336134453783, "grad_norm": 10.966580417486627, "learning_rate": 1.6938991695689184e-06, "loss": 0.6819464564323425, "step": 4325 }, { "epoch": 2.2720588235294117, "grad_norm": 13.005761213457445, "learning_rate": 1.6916071827958087e-06, "loss": 0.33721476793289185, "step": 4326 }, { "epoch": 2.2725840336134455, "grad_norm": 13.135660336334526, "learning_rate": 1.6893164319432748e-06, "loss": 0.5102297067642212, "step": 4327 }, { "epoch": 2.273109243697479, "grad_norm": 8.064421370615745, "learning_rate": 1.6870269178670795e-06, "loss": 0.3952482342720032, "step": 4328 }, { "epoch": 2.2736344537815127, "grad_norm": 10.792512616267555, "learning_rate": 1.684738641422517e-06, "loss": 0.7456547021865845, "step": 4329 }, { "epoch": 2.274159663865546, "grad_norm": 15.020122157114749, "learning_rate": 1.6824516034644217e-06, "loss": 0.5874942541122437, "step": 4330 }, { "epoch": 2.27468487394958, "grad_norm": 10.032837792324347, "learning_rate": 1.6801658048471658e-06, "loss": 0.6321033835411072, "step": 4331 }, { "epoch": 2.2752100840336134, "grad_norm": 11.303584435137592, "learning_rate": 1.677881246424658e-06, "loss": 0.450790673494339, "step": 4332 }, { "epoch": 2.275735294117647, "grad_norm": 9.847503120573618, "learning_rate": 1.6755979290503437e-06, "loss": 0.404061496257782, "step": 4333 }, { "epoch": 2.2762605042016806, "grad_norm": 15.949321649438565, "learning_rate": 1.6733158535772031e-06, "loss": 0.3456595242023468, "step": 4334 }, { "epoch": 2.2767857142857144, "grad_norm": 14.872070372220424, "learning_rate": 1.671035020857757e-06, "loss": 0.34505870938301086, "step": 4335 }, { "epoch": 2.277310924369748, "grad_norm": 7.214427980667019, "learning_rate": 1.6687554317440575e-06, "loss": 0.3279365301132202, "step": 4336 }, { "epoch": 2.2778361344537816, "grad_norm": 10.78146979921209, "learning_rate": 1.666477087087694e-06, "loss": 0.532626211643219, "step": 4337 }, { "epoch": 2.278361344537815, "grad_norm": 7.522589992997947, "learning_rate": 1.6641999877397903e-06, "loss": 0.45383769273757935, "step": 4338 }, { "epoch": 2.278886554621849, "grad_norm": 7.990300738402109, "learning_rate": 1.6619241345510057e-06, "loss": 0.3599814772605896, "step": 4339 }, { "epoch": 2.2794117647058822, "grad_norm": 10.848762924233993, "learning_rate": 1.659649528371533e-06, "loss": 0.36083564162254333, "step": 4340 }, { "epoch": 2.279936974789916, "grad_norm": 12.54704117742708, "learning_rate": 1.6573761700511004e-06, "loss": 0.3345023989677429, "step": 4341 }, { "epoch": 2.2804621848739495, "grad_norm": 7.564376089304036, "learning_rate": 1.6551040604389674e-06, "loss": 0.3725637197494507, "step": 4342 }, { "epoch": 2.2809873949579833, "grad_norm": 14.54050293277215, "learning_rate": 1.6528332003839325e-06, "loss": 0.3416447639465332, "step": 4343 }, { "epoch": 2.2815126050420167, "grad_norm": 10.163078332156777, "learning_rate": 1.6505635907343214e-06, "loss": 0.39350181818008423, "step": 4344 }, { "epoch": 2.2820378151260505, "grad_norm": 11.154794151475356, "learning_rate": 1.6482952323379958e-06, "loss": 0.7113596200942993, "step": 4345 }, { "epoch": 2.282563025210084, "grad_norm": 14.418055790581414, "learning_rate": 1.6460281260423495e-06, "loss": 0.3519514203071594, "step": 4346 }, { "epoch": 2.2830882352941178, "grad_norm": 14.520078892222791, "learning_rate": 1.6437622726943076e-06, "loss": 1.4245522022247314, "step": 4347 }, { "epoch": 2.283613445378151, "grad_norm": 8.285158276756874, "learning_rate": 1.6414976731403265e-06, "loss": 0.41295862197875977, "step": 4348 }, { "epoch": 2.284138655462185, "grad_norm": 7.240487739459167, "learning_rate": 1.639234328226399e-06, "loss": 0.28958454728126526, "step": 4349 }, { "epoch": 2.2846638655462184, "grad_norm": 8.628130571430734, "learning_rate": 1.6369722387980442e-06, "loss": 0.29284539818763733, "step": 4350 }, { "epoch": 2.285189075630252, "grad_norm": 6.934118703501691, "learning_rate": 1.6347114057003116e-06, "loss": 0.30714741349220276, "step": 4351 }, { "epoch": 2.2857142857142856, "grad_norm": 9.935739273690379, "learning_rate": 1.6324518297777875e-06, "loss": 0.3020477592945099, "step": 4352 }, { "epoch": 2.2862394957983194, "grad_norm": 10.933111882023026, "learning_rate": 1.6301935118745826e-06, "loss": 0.3865164518356323, "step": 4353 }, { "epoch": 2.286764705882353, "grad_norm": 11.428036489735486, "learning_rate": 1.62793645283434e-06, "loss": 0.6368634104728699, "step": 4354 }, { "epoch": 2.2872899159663866, "grad_norm": 16.517205459997992, "learning_rate": 1.6256806535002312e-06, "loss": 1.2036032676696777, "step": 4355 }, { "epoch": 2.28781512605042, "grad_norm": 9.146910147728873, "learning_rate": 1.6234261147149594e-06, "loss": 0.28254234790802, "step": 4356 }, { "epoch": 2.288340336134454, "grad_norm": 14.054019769884457, "learning_rate": 1.621172837320754e-06, "loss": 0.5170860290527344, "step": 4357 }, { "epoch": 2.2888655462184873, "grad_norm": 13.45097323662491, "learning_rate": 1.618920822159375e-06, "loss": 0.32772767543792725, "step": 4358 }, { "epoch": 2.289390756302521, "grad_norm": 13.691649496391307, "learning_rate": 1.6166700700721121e-06, "loss": 0.38849198818206787, "step": 4359 }, { "epoch": 2.2899159663865545, "grad_norm": 12.1222124753655, "learning_rate": 1.614420581899781e-06, "loss": 0.3882126808166504, "step": 4360 }, { "epoch": 2.2904411764705883, "grad_norm": 11.678439052954568, "learning_rate": 1.6121723584827259e-06, "loss": 0.5290379524230957, "step": 4361 }, { "epoch": 2.2909663865546217, "grad_norm": 12.6188254512893, "learning_rate": 1.6099254006608183e-06, "loss": 0.19659966230392456, "step": 4362 }, { "epoch": 2.2914915966386555, "grad_norm": 12.166327804570395, "learning_rate": 1.6076797092734575e-06, "loss": 0.3771727681159973, "step": 4363 }, { "epoch": 2.292016806722689, "grad_norm": 15.895343917997305, "learning_rate": 1.6054352851595684e-06, "loss": 0.42266786098480225, "step": 4364 }, { "epoch": 2.2925420168067228, "grad_norm": 13.12300565247586, "learning_rate": 1.6031921291576048e-06, "loss": 0.8594948649406433, "step": 4365 }, { "epoch": 2.293067226890756, "grad_norm": 9.409440418091075, "learning_rate": 1.6009502421055423e-06, "loss": 0.3741492033004761, "step": 4366 }, { "epoch": 2.29359243697479, "grad_norm": 19.970440709074943, "learning_rate": 1.5987096248408896e-06, "loss": 0.780980110168457, "step": 4367 }, { "epoch": 2.2941176470588234, "grad_norm": 6.582649351131381, "learning_rate": 1.5964702782006753e-06, "loss": 0.20653480291366577, "step": 4368 }, { "epoch": 2.294642857142857, "grad_norm": 15.809918568473499, "learning_rate": 1.5942322030214547e-06, "loss": 0.6179236173629761, "step": 4369 }, { "epoch": 2.2951680672268906, "grad_norm": 12.307168966328556, "learning_rate": 1.591995400139309e-06, "loss": 0.4987037479877472, "step": 4370 }, { "epoch": 2.2956932773109244, "grad_norm": 12.33854650104722, "learning_rate": 1.5897598703898432e-06, "loss": 0.6230236887931824, "step": 4371 }, { "epoch": 2.296218487394958, "grad_norm": 14.145721467951539, "learning_rate": 1.5875256146081868e-06, "loss": 0.4220436215400696, "step": 4372 }, { "epoch": 2.2967436974789917, "grad_norm": 13.867589347346152, "learning_rate": 1.5852926336289926e-06, "loss": 0.3709479570388794, "step": 4373 }, { "epoch": 2.297268907563025, "grad_norm": 9.87653617656067, "learning_rate": 1.5830609282864412e-06, "loss": 0.3221050500869751, "step": 4374 }, { "epoch": 2.297794117647059, "grad_norm": 9.950679674557886, "learning_rate": 1.5808304994142315e-06, "loss": 0.4723668396472931, "step": 4375 }, { "epoch": 2.2983193277310923, "grad_norm": 12.8159080605527, "learning_rate": 1.578601347845588e-06, "loss": 0.5835788249969482, "step": 4376 }, { "epoch": 2.298844537815126, "grad_norm": 10.218066774260638, "learning_rate": 1.5763734744132587e-06, "loss": 0.3129946291446686, "step": 4377 }, { "epoch": 2.2993697478991595, "grad_norm": 9.852644863950895, "learning_rate": 1.5741468799495112e-06, "loss": 0.25117218494415283, "step": 4378 }, { "epoch": 2.2998949579831933, "grad_norm": 14.071437665169947, "learning_rate": 1.571921565286139e-06, "loss": 0.5350000858306885, "step": 4379 }, { "epoch": 2.3004201680672267, "grad_norm": 16.766135653846202, "learning_rate": 1.5696975312544532e-06, "loss": 0.3731675148010254, "step": 4380 }, { "epoch": 2.3009453781512605, "grad_norm": 12.037650977090113, "learning_rate": 1.5674747786852935e-06, "loss": 0.43751031160354614, "step": 4381 }, { "epoch": 2.301470588235294, "grad_norm": 11.089661880105295, "learning_rate": 1.5652533084090126e-06, "loss": 0.4016155004501343, "step": 4382 }, { "epoch": 2.3019957983193278, "grad_norm": 10.943201933863106, "learning_rate": 1.5630331212554906e-06, "loss": 0.8298485279083252, "step": 4383 }, { "epoch": 2.302521008403361, "grad_norm": 9.14803865518618, "learning_rate": 1.5608142180541236e-06, "loss": 0.25823837518692017, "step": 4384 }, { "epoch": 2.303046218487395, "grad_norm": 8.04483506782103, "learning_rate": 1.5585965996338314e-06, "loss": 0.3800016939640045, "step": 4385 }, { "epoch": 2.3035714285714284, "grad_norm": 10.034853433108301, "learning_rate": 1.5563802668230522e-06, "loss": 0.48947906494140625, "step": 4386 }, { "epoch": 2.304096638655462, "grad_norm": 12.286004901127377, "learning_rate": 1.5541652204497443e-06, "loss": 0.7934751510620117, "step": 4387 }, { "epoch": 2.3046218487394956, "grad_norm": 9.313521210545806, "learning_rate": 1.5519514613413832e-06, "loss": 0.3479290306568146, "step": 4388 }, { "epoch": 2.3051470588235294, "grad_norm": 9.705513405157093, "learning_rate": 1.5497389903249705e-06, "loss": 0.5358251333236694, "step": 4389 }, { "epoch": 2.3056722689075633, "grad_norm": 10.167095469659124, "learning_rate": 1.5475278082270185e-06, "loss": 0.33251112699508667, "step": 4390 }, { "epoch": 2.3061974789915967, "grad_norm": 12.219389713798087, "learning_rate": 1.5453179158735626e-06, "loss": 0.6672162413597107, "step": 4391 }, { "epoch": 2.30672268907563, "grad_norm": 17.70724727180953, "learning_rate": 1.5431093140901548e-06, "loss": 0.3237547278404236, "step": 4392 }, { "epoch": 2.307247899159664, "grad_norm": 8.29131756283089, "learning_rate": 1.5409020037018652e-06, "loss": 0.4832186996936798, "step": 4393 }, { "epoch": 2.3077731092436977, "grad_norm": 11.148583584479832, "learning_rate": 1.538695985533281e-06, "loss": 0.8950933218002319, "step": 4394 }, { "epoch": 2.308298319327731, "grad_norm": 15.460688906601312, "learning_rate": 1.536491260408507e-06, "loss": 0.5262115597724915, "step": 4395 }, { "epoch": 2.3088235294117645, "grad_norm": 16.68986140644925, "learning_rate": 1.5342878291511675e-06, "loss": 1.0260121822357178, "step": 4396 }, { "epoch": 2.3093487394957983, "grad_norm": 7.571106561965695, "learning_rate": 1.5320856925843997e-06, "loss": 0.1923689842224121, "step": 4397 }, { "epoch": 2.309873949579832, "grad_norm": 11.044530209518856, "learning_rate": 1.5298848515308584e-06, "loss": 0.31885623931884766, "step": 4398 }, { "epoch": 2.3103991596638656, "grad_norm": 8.849088027270751, "learning_rate": 1.527685306812715e-06, "loss": 0.4185711741447449, "step": 4399 }, { "epoch": 2.310924369747899, "grad_norm": 11.187801148523693, "learning_rate": 1.5254870592516569e-06, "loss": 0.4159872233867645, "step": 4400 }, { "epoch": 2.3114495798319328, "grad_norm": 9.827097352712677, "learning_rate": 1.5232901096688847e-06, "loss": 0.3779459595680237, "step": 4401 }, { "epoch": 2.3119747899159666, "grad_norm": 12.91796680900067, "learning_rate": 1.5210944588851168e-06, "loss": 0.44369253516197205, "step": 4402 }, { "epoch": 2.3125, "grad_norm": 11.462757793812512, "learning_rate": 1.5189001077205835e-06, "loss": 0.39783555269241333, "step": 4403 }, { "epoch": 2.3130252100840334, "grad_norm": 8.897084326786889, "learning_rate": 1.5167070569950344e-06, "loss": 0.46692177653312683, "step": 4404 }, { "epoch": 2.3135504201680672, "grad_norm": 14.093689957647248, "learning_rate": 1.5145153075277286e-06, "loss": 0.7288868427276611, "step": 4405 }, { "epoch": 2.314075630252101, "grad_norm": 10.454799310162347, "learning_rate": 1.5123248601374413e-06, "loss": 0.615166425704956, "step": 4406 }, { "epoch": 2.3146008403361344, "grad_norm": 7.12706837617487, "learning_rate": 1.5101357156424601e-06, "loss": 0.20782826840877533, "step": 4407 }, { "epoch": 2.315126050420168, "grad_norm": 11.74774688766439, "learning_rate": 1.5079478748605874e-06, "loss": 0.3646509051322937, "step": 4408 }, { "epoch": 2.3156512605042017, "grad_norm": 11.905856892317097, "learning_rate": 1.505761338609137e-06, "loss": 0.47605645656585693, "step": 4409 }, { "epoch": 2.3161764705882355, "grad_norm": 15.026179011873387, "learning_rate": 1.5035761077049344e-06, "loss": 0.7398310303688049, "step": 4410 }, { "epoch": 2.316701680672269, "grad_norm": 10.281670323887347, "learning_rate": 1.501392182964323e-06, "loss": 0.20230919122695923, "step": 4411 }, { "epoch": 2.3172268907563023, "grad_norm": 11.672565647048748, "learning_rate": 1.4992095652031518e-06, "loss": 0.8576836585998535, "step": 4412 }, { "epoch": 2.317752100840336, "grad_norm": 15.679845450616638, "learning_rate": 1.4970282552367854e-06, "loss": 0.3027660846710205, "step": 4413 }, { "epoch": 2.31827731092437, "grad_norm": 10.589700262780413, "learning_rate": 1.4948482538800974e-06, "loss": 0.28417763113975525, "step": 4414 }, { "epoch": 2.3188025210084033, "grad_norm": 8.405692327071685, "learning_rate": 1.4926695619474747e-06, "loss": 0.3037428855895996, "step": 4415 }, { "epoch": 2.3193277310924367, "grad_norm": 13.642124447538226, "learning_rate": 1.4904921802528133e-06, "loss": 0.52169269323349, "step": 4416 }, { "epoch": 2.3198529411764706, "grad_norm": 12.377939837406828, "learning_rate": 1.4883161096095189e-06, "loss": 0.9864881038665771, "step": 4417 }, { "epoch": 2.3203781512605044, "grad_norm": 10.41779799698978, "learning_rate": 1.4861413508305128e-06, "loss": 0.4198512136936188, "step": 4418 }, { "epoch": 2.320903361344538, "grad_norm": 10.179067127791503, "learning_rate": 1.4839679047282206e-06, "loss": 0.34290653467178345, "step": 4419 }, { "epoch": 2.3214285714285716, "grad_norm": 7.925017466820183, "learning_rate": 1.4817957721145793e-06, "loss": 0.7075372934341431, "step": 4420 }, { "epoch": 2.321953781512605, "grad_norm": 6.103286752754161, "learning_rate": 1.4796249538010354e-06, "loss": 0.3007611334323883, "step": 4421 }, { "epoch": 2.322478991596639, "grad_norm": 12.25961755977851, "learning_rate": 1.477455450598544e-06, "loss": 0.4300242066383362, "step": 4422 }, { "epoch": 2.3230042016806722, "grad_norm": 8.699434886780793, "learning_rate": 1.4752872633175691e-06, "loss": 0.4099404811859131, "step": 4423 }, { "epoch": 2.323529411764706, "grad_norm": 10.741312045169979, "learning_rate": 1.4731203927680842e-06, "loss": 0.5882526636123657, "step": 4424 }, { "epoch": 2.3240546218487395, "grad_norm": 9.297351370478692, "learning_rate": 1.4709548397595674e-06, "loss": 0.5598904490470886, "step": 4425 }, { "epoch": 2.3245798319327733, "grad_norm": 13.838742636903982, "learning_rate": 1.4687906051010103e-06, "loss": 0.6112613677978516, "step": 4426 }, { "epoch": 2.3251050420168067, "grad_norm": 6.7243179871563425, "learning_rate": 1.4666276896009079e-06, "loss": 0.2332310527563095, "step": 4427 }, { "epoch": 2.3256302521008405, "grad_norm": 10.971361557829512, "learning_rate": 1.4644660940672628e-06, "loss": 0.3349398970603943, "step": 4428 }, { "epoch": 2.326155462184874, "grad_norm": 9.07198473960286, "learning_rate": 1.4623058193075852e-06, "loss": 1.245658040046692, "step": 4429 }, { "epoch": 2.3266806722689077, "grad_norm": 15.482122106635144, "learning_rate": 1.460146866128892e-06, "loss": 0.2873179316520691, "step": 4430 }, { "epoch": 2.327205882352941, "grad_norm": 11.523410821172185, "learning_rate": 1.4579892353377055e-06, "loss": 0.5670019388198853, "step": 4431 }, { "epoch": 2.327731092436975, "grad_norm": 8.663789278131366, "learning_rate": 1.4558329277400535e-06, "loss": 0.11815594136714935, "step": 4432 }, { "epoch": 2.3282563025210083, "grad_norm": 10.147117036298072, "learning_rate": 1.453677944141474e-06, "loss": 0.18382048606872559, "step": 4433 }, { "epoch": 2.328781512605042, "grad_norm": 11.139442297776682, "learning_rate": 1.4515242853470047e-06, "loss": 1.187211275100708, "step": 4434 }, { "epoch": 2.3293067226890756, "grad_norm": 7.022775976264377, "learning_rate": 1.449371952161191e-06, "loss": 0.26034629344940186, "step": 4435 }, { "epoch": 2.3298319327731094, "grad_norm": 8.971793472401474, "learning_rate": 1.4472209453880831e-06, "loss": 0.992083728313446, "step": 4436 }, { "epoch": 2.330357142857143, "grad_norm": 14.57673993175187, "learning_rate": 1.4450712658312356e-06, "loss": 0.3485121726989746, "step": 4437 }, { "epoch": 2.3308823529411766, "grad_norm": 10.92881401042016, "learning_rate": 1.4429229142937062e-06, "loss": 0.29710543155670166, "step": 4438 }, { "epoch": 2.33140756302521, "grad_norm": 5.696441023658666, "learning_rate": 1.4407758915780578e-06, "loss": 0.19231635332107544, "step": 4439 }, { "epoch": 2.331932773109244, "grad_norm": 12.725226143271817, "learning_rate": 1.4386301984863548e-06, "loss": 0.7229375839233398, "step": 4440 }, { "epoch": 2.3324579831932772, "grad_norm": 11.279211075275676, "learning_rate": 1.43648583582017e-06, "loss": 0.4464646577835083, "step": 4441 }, { "epoch": 2.332983193277311, "grad_norm": 10.586498248344075, "learning_rate": 1.4343428043805734e-06, "loss": 0.2856397330760956, "step": 4442 }, { "epoch": 2.3335084033613445, "grad_norm": 8.610565290612234, "learning_rate": 1.432201104968141e-06, "loss": 0.6283878087997437, "step": 4443 }, { "epoch": 2.3340336134453783, "grad_norm": 12.3250343639634, "learning_rate": 1.4300607383829495e-06, "loss": 0.3797636032104492, "step": 4444 }, { "epoch": 2.3345588235294117, "grad_norm": 15.96257785335536, "learning_rate": 1.4279217054245793e-06, "loss": 0.6095945835113525, "step": 4445 }, { "epoch": 2.3350840336134455, "grad_norm": 10.161222958965807, "learning_rate": 1.4257840068921103e-06, "loss": 0.2816503942012787, "step": 4446 }, { "epoch": 2.335609243697479, "grad_norm": 13.669656851961088, "learning_rate": 1.423647643584125e-06, "loss": 0.26294994354248047, "step": 4447 }, { "epoch": 2.3361344537815127, "grad_norm": 12.89727894304432, "learning_rate": 1.4215126162987097e-06, "loss": 0.49291765689849854, "step": 4448 }, { "epoch": 2.336659663865546, "grad_norm": 10.492355561706045, "learning_rate": 1.4193789258334485e-06, "loss": 0.24138548970222473, "step": 4449 }, { "epoch": 2.33718487394958, "grad_norm": 8.957404872028208, "learning_rate": 1.4172465729854262e-06, "loss": 0.5285111665725708, "step": 4450 }, { "epoch": 2.3377100840336134, "grad_norm": 5.983758652579017, "learning_rate": 1.4151155585512288e-06, "loss": 0.13447096943855286, "step": 4451 }, { "epoch": 2.338235294117647, "grad_norm": 7.098674025642296, "learning_rate": 1.4129858833269422e-06, "loss": 0.288321852684021, "step": 4452 }, { "epoch": 2.3387605042016806, "grad_norm": 18.10725020355562, "learning_rate": 1.4108575481081522e-06, "loss": 0.5799267888069153, "step": 4453 }, { "epoch": 2.3392857142857144, "grad_norm": 8.769690651248249, "learning_rate": 1.4087305536899415e-06, "loss": 0.7320141792297363, "step": 4454 }, { "epoch": 2.339810924369748, "grad_norm": 8.790205240868099, "learning_rate": 1.406604900866898e-06, "loss": 0.8309720158576965, "step": 4455 }, { "epoch": 2.3403361344537816, "grad_norm": 11.898972273798018, "learning_rate": 1.404480590433102e-06, "loss": 0.8555180430412292, "step": 4456 }, { "epoch": 2.340861344537815, "grad_norm": 8.95813991738779, "learning_rate": 1.4023576231821362e-06, "loss": 0.5222865343093872, "step": 4457 }, { "epoch": 2.341386554621849, "grad_norm": 9.160181655448328, "learning_rate": 1.4002359999070797e-06, "loss": 0.6912654638290405, "step": 4458 }, { "epoch": 2.3419117647058822, "grad_norm": 9.851244147965717, "learning_rate": 1.3981157214005098e-06, "loss": 0.36685875058174133, "step": 4459 }, { "epoch": 2.342436974789916, "grad_norm": 8.409085857554166, "learning_rate": 1.3959967884545016e-06, "loss": 0.16158661246299744, "step": 4460 }, { "epoch": 2.3429621848739495, "grad_norm": 10.50898618550427, "learning_rate": 1.3938792018606278e-06, "loss": 0.6709229946136475, "step": 4461 }, { "epoch": 2.3434873949579833, "grad_norm": 11.521710950342648, "learning_rate": 1.391762962409957e-06, "loss": 0.36330825090408325, "step": 4462 }, { "epoch": 2.3440126050420167, "grad_norm": 10.648231431598703, "learning_rate": 1.3896480708930576e-06, "loss": 0.45184430480003357, "step": 4463 }, { "epoch": 2.3445378151260505, "grad_norm": 10.719388883213727, "learning_rate": 1.3875345280999913e-06, "loss": 0.6399360299110413, "step": 4464 }, { "epoch": 2.345063025210084, "grad_norm": 11.089626478412704, "learning_rate": 1.3854223348203171e-06, "loss": 0.660666823387146, "step": 4465 }, { "epoch": 2.3455882352941178, "grad_norm": 8.80264843882407, "learning_rate": 1.3833114918430896e-06, "loss": 0.2146662175655365, "step": 4466 }, { "epoch": 2.346113445378151, "grad_norm": 10.304086170519128, "learning_rate": 1.3812019999568588e-06, "loss": 0.5874398350715637, "step": 4467 }, { "epoch": 2.346638655462185, "grad_norm": 10.695658890377118, "learning_rate": 1.3790938599496712e-06, "loss": 0.37619832158088684, "step": 4468 }, { "epoch": 2.3471638655462184, "grad_norm": 12.79180398066583, "learning_rate": 1.376987072609065e-06, "loss": 1.54884934425354, "step": 4469 }, { "epoch": 2.347689075630252, "grad_norm": 16.510955416054422, "learning_rate": 1.3748816387220787e-06, "loss": 0.5533466339111328, "step": 4470 }, { "epoch": 2.3482142857142856, "grad_norm": 11.831295081158444, "learning_rate": 1.3727775590752413e-06, "loss": 0.44226396083831787, "step": 4471 }, { "epoch": 2.3487394957983194, "grad_norm": 12.238349594637782, "learning_rate": 1.370674834454575e-06, "loss": 0.7285857796669006, "step": 4472 }, { "epoch": 2.349264705882353, "grad_norm": 7.36240551837595, "learning_rate": 1.368573465645599e-06, "loss": 0.19481465220451355, "step": 4473 }, { "epoch": 2.3497899159663866, "grad_norm": 13.509107523405708, "learning_rate": 1.366473453433323e-06, "loss": 0.27436989545822144, "step": 4474 }, { "epoch": 2.35031512605042, "grad_norm": 11.84939142092099, "learning_rate": 1.3643747986022521e-06, "loss": 0.5133126974105835, "step": 4475 }, { "epoch": 2.350840336134454, "grad_norm": 8.796568434767028, "learning_rate": 1.3622775019363827e-06, "loss": 0.4648074209690094, "step": 4476 }, { "epoch": 2.3513655462184873, "grad_norm": 9.094964859815823, "learning_rate": 1.360181564219204e-06, "loss": 0.4042467772960663, "step": 4477 }, { "epoch": 2.351890756302521, "grad_norm": 8.319322170075091, "learning_rate": 1.358086986233701e-06, "loss": 0.5059428215026855, "step": 4478 }, { "epoch": 2.3524159663865545, "grad_norm": 9.77888075804398, "learning_rate": 1.3559937687623458e-06, "loss": 0.6230376362800598, "step": 4479 }, { "epoch": 2.3529411764705883, "grad_norm": 10.28907790673492, "learning_rate": 1.3539019125871057e-06, "loss": 0.6566630005836487, "step": 4480 }, { "epoch": 2.3534663865546217, "grad_norm": 11.750925105401295, "learning_rate": 1.351811418489436e-06, "loss": 0.41594552993774414, "step": 4481 }, { "epoch": 2.3539915966386555, "grad_norm": 15.97060569749755, "learning_rate": 1.3497222872502868e-06, "loss": 0.23841039836406708, "step": 4482 }, { "epoch": 2.354516806722689, "grad_norm": 9.445741091086223, "learning_rate": 1.3476345196500978e-06, "loss": 0.5826966166496277, "step": 4483 }, { "epoch": 2.3550420168067228, "grad_norm": 10.5760152407604, "learning_rate": 1.3455481164687967e-06, "loss": 0.32878589630126953, "step": 4484 }, { "epoch": 2.355567226890756, "grad_norm": 7.098752200527344, "learning_rate": 1.3434630784858067e-06, "loss": 0.5044801235198975, "step": 4485 }, { "epoch": 2.35609243697479, "grad_norm": 14.220339734493516, "learning_rate": 1.3413794064800373e-06, "loss": 0.45580434799194336, "step": 4486 }, { "epoch": 2.3566176470588234, "grad_norm": 8.36565755090009, "learning_rate": 1.3392971012298883e-06, "loss": 0.37136155366897583, "step": 4487 }, { "epoch": 2.357142857142857, "grad_norm": 10.728025326411071, "learning_rate": 1.3372161635132486e-06, "loss": 0.2327875792980194, "step": 4488 }, { "epoch": 2.3576680672268906, "grad_norm": 9.83698742983316, "learning_rate": 1.335136594107498e-06, "loss": 0.36251547932624817, "step": 4489 }, { "epoch": 2.3581932773109244, "grad_norm": 10.826874145764695, "learning_rate": 1.333058393789503e-06, "loss": 0.24773839116096497, "step": 4490 }, { "epoch": 2.358718487394958, "grad_norm": 13.734248774530457, "learning_rate": 1.3309815633356181e-06, "loss": 0.5755770206451416, "step": 4491 }, { "epoch": 2.3592436974789917, "grad_norm": 12.563413992488053, "learning_rate": 1.328906103521691e-06, "loss": 0.4366951882839203, "step": 4492 }, { "epoch": 2.359768907563025, "grad_norm": 8.78774750837823, "learning_rate": 1.3268320151230518e-06, "loss": 1.001713514328003, "step": 4493 }, { "epoch": 2.360294117647059, "grad_norm": 15.69595928553154, "learning_rate": 1.3247592989145213e-06, "loss": 1.3247722387313843, "step": 4494 }, { "epoch": 2.3608193277310923, "grad_norm": 7.494338259169056, "learning_rate": 1.322687955670406e-06, "loss": 0.443425714969635, "step": 4495 }, { "epoch": 2.361344537815126, "grad_norm": 12.271734786939172, "learning_rate": 1.3206179861645003e-06, "loss": 0.8458099365234375, "step": 4496 }, { "epoch": 2.3618697478991595, "grad_norm": 8.775425752530367, "learning_rate": 1.3185493911700854e-06, "loss": 0.4446178674697876, "step": 4497 }, { "epoch": 2.3623949579831933, "grad_norm": 16.956798719350704, "learning_rate": 1.3164821714599296e-06, "loss": 1.2176728248596191, "step": 4498 }, { "epoch": 2.3629201680672267, "grad_norm": 12.932713028329136, "learning_rate": 1.3144163278062848e-06, "loss": 0.21005836129188538, "step": 4499 }, { "epoch": 2.3634453781512605, "grad_norm": 11.801717284369776, "learning_rate": 1.3123518609808938e-06, "loss": 0.6239449381828308, "step": 4500 }, { "epoch": 2.363970588235294, "grad_norm": 8.704409848054516, "learning_rate": 1.3102887717549812e-06, "loss": 0.33125782012939453, "step": 4501 }, { "epoch": 2.3644957983193278, "grad_norm": 10.073395539635998, "learning_rate": 1.308227060899257e-06, "loss": 0.4214559495449066, "step": 4502 }, { "epoch": 2.365021008403361, "grad_norm": 17.29668995113468, "learning_rate": 1.3061667291839182e-06, "loss": 0.3144763112068176, "step": 4503 }, { "epoch": 2.365546218487395, "grad_norm": 12.387739143754072, "learning_rate": 1.3041077773786448e-06, "loss": 0.39141130447387695, "step": 4504 }, { "epoch": 2.3660714285714284, "grad_norm": 9.140243501641983, "learning_rate": 1.302050206252602e-06, "loss": 0.39588499069213867, "step": 4505 }, { "epoch": 2.366596638655462, "grad_norm": 8.744832052666892, "learning_rate": 1.299994016574439e-06, "loss": 0.3863181471824646, "step": 4506 }, { "epoch": 2.3671218487394956, "grad_norm": 10.049890083498987, "learning_rate": 1.297939209112291e-06, "loss": 0.3938748240470886, "step": 4507 }, { "epoch": 2.3676470588235294, "grad_norm": 9.192739529161791, "learning_rate": 1.295885784633774e-06, "loss": 0.7387629151344299, "step": 4508 }, { "epoch": 2.3681722689075633, "grad_norm": 11.25074840431641, "learning_rate": 1.2938337439059868e-06, "loss": 0.9342288374900818, "step": 4509 }, { "epoch": 2.3686974789915967, "grad_norm": 8.787393353785076, "learning_rate": 1.2917830876955161e-06, "loss": 0.4711243808269501, "step": 4510 }, { "epoch": 2.36922268907563, "grad_norm": 10.92742341460336, "learning_rate": 1.289733816768427e-06, "loss": 0.3783642649650574, "step": 4511 }, { "epoch": 2.369747899159664, "grad_norm": 10.595865389158499, "learning_rate": 1.2876859318902673e-06, "loss": 0.4414080083370209, "step": 4512 }, { "epoch": 2.3702731092436977, "grad_norm": 9.818670326151684, "learning_rate": 1.2856394338260691e-06, "loss": 0.4777085483074188, "step": 4513 }, { "epoch": 2.370798319327731, "grad_norm": 8.044330388133467, "learning_rate": 1.2835943233403448e-06, "loss": 0.37345853447914124, "step": 4514 }, { "epoch": 2.3713235294117645, "grad_norm": 8.44086601901999, "learning_rate": 1.2815506011970874e-06, "loss": 0.2127484381198883, "step": 4515 }, { "epoch": 2.3718487394957983, "grad_norm": 7.6709255750210525, "learning_rate": 1.2795082681597753e-06, "loss": 0.5472914576530457, "step": 4516 }, { "epoch": 2.372373949579832, "grad_norm": 14.223232908912985, "learning_rate": 1.2774673249913656e-06, "loss": 0.49846890568733215, "step": 4517 }, { "epoch": 2.3728991596638656, "grad_norm": 9.019624708036671, "learning_rate": 1.2754277724542945e-06, "loss": 0.5183377265930176, "step": 4518 }, { "epoch": 2.373424369747899, "grad_norm": 15.58524676012771, "learning_rate": 1.273389611310481e-06, "loss": 0.34386664628982544, "step": 4519 }, { "epoch": 2.3739495798319328, "grad_norm": 14.45139815052486, "learning_rate": 1.2713528423213235e-06, "loss": 0.4957372546195984, "step": 4520 }, { "epoch": 2.3744747899159666, "grad_norm": 12.608215426959852, "learning_rate": 1.2693174662477003e-06, "loss": 0.7115473747253418, "step": 4521 }, { "epoch": 2.375, "grad_norm": 9.654962039546547, "learning_rate": 1.2672834838499699e-06, "loss": 0.2595762014389038, "step": 4522 }, { "epoch": 2.3755252100840334, "grad_norm": 9.931027827541035, "learning_rate": 1.2652508958879671e-06, "loss": 0.34179964661598206, "step": 4523 }, { "epoch": 2.3760504201680672, "grad_norm": 7.314600864411473, "learning_rate": 1.263219703121013e-06, "loss": 0.21406862139701843, "step": 4524 }, { "epoch": 2.376575630252101, "grad_norm": 13.579760846219973, "learning_rate": 1.2611899063079002e-06, "loss": 0.3097609281539917, "step": 4525 }, { "epoch": 2.3771008403361344, "grad_norm": 13.948578777970463, "learning_rate": 1.259161506206903e-06, "loss": 0.3892877399921417, "step": 4526 }, { "epoch": 2.377626050420168, "grad_norm": 12.728954798673547, "learning_rate": 1.257134503575773e-06, "loss": 0.49235785007476807, "step": 4527 }, { "epoch": 2.3781512605042017, "grad_norm": 9.33542032962396, "learning_rate": 1.2551088991717409e-06, "loss": 0.5548625588417053, "step": 4528 }, { "epoch": 2.3786764705882355, "grad_norm": 14.105576588004832, "learning_rate": 1.253084693751514e-06, "loss": 0.32942134141921997, "step": 4529 }, { "epoch": 2.379201680672269, "grad_norm": 15.890278297736177, "learning_rate": 1.2510618880712755e-06, "loss": 0.7341816425323486, "step": 4530 }, { "epoch": 2.3797268907563023, "grad_norm": 14.638612778437102, "learning_rate": 1.2490404828866914e-06, "loss": 1.1487144231796265, "step": 4531 }, { "epoch": 2.380252100840336, "grad_norm": 10.822677428034597, "learning_rate": 1.2470204789528983e-06, "loss": 0.27511245012283325, "step": 4532 }, { "epoch": 2.38077731092437, "grad_norm": 10.529355342309076, "learning_rate": 1.245001877024512e-06, "loss": 0.5962315201759338, "step": 4533 }, { "epoch": 2.3813025210084033, "grad_norm": 14.016859086216845, "learning_rate": 1.2429846778556242e-06, "loss": 0.5010058879852295, "step": 4534 }, { "epoch": 2.3818277310924367, "grad_norm": 13.89031281850282, "learning_rate": 1.2409688821998022e-06, "loss": 0.5231708884239197, "step": 4535 }, { "epoch": 2.3823529411764706, "grad_norm": 10.77777049343026, "learning_rate": 1.2389544908100902e-06, "loss": 0.617600679397583, "step": 4536 }, { "epoch": 2.3828781512605044, "grad_norm": 7.056643902412701, "learning_rate": 1.2369415044390055e-06, "loss": 0.27999234199523926, "step": 4537 }, { "epoch": 2.383403361344538, "grad_norm": 20.01932814320385, "learning_rate": 1.2349299238385442e-06, "loss": 0.8885968327522278, "step": 4538 }, { "epoch": 2.3839285714285716, "grad_norm": 12.602010743357127, "learning_rate": 1.2329197497601742e-06, "loss": 0.41816869378089905, "step": 4539 }, { "epoch": 2.384453781512605, "grad_norm": 10.61641237261424, "learning_rate": 1.2309109829548382e-06, "loss": 0.5384302139282227, "step": 4540 }, { "epoch": 2.384978991596639, "grad_norm": 24.207108774787446, "learning_rate": 1.228903624172954e-06, "loss": 0.5781921148300171, "step": 4541 }, { "epoch": 2.3855042016806722, "grad_norm": 13.753831129571772, "learning_rate": 1.226897674164414e-06, "loss": 0.3146056532859802, "step": 4542 }, { "epoch": 2.386029411764706, "grad_norm": 21.067414772277196, "learning_rate": 1.224893133678583e-06, "loss": 0.46929800510406494, "step": 4543 }, { "epoch": 2.3865546218487395, "grad_norm": 19.178368373358886, "learning_rate": 1.2228900034642992e-06, "loss": 0.4543416202068329, "step": 4544 }, { "epoch": 2.3870798319327733, "grad_norm": 8.590587353420862, "learning_rate": 1.220888284269874e-06, "loss": 0.2502267360687256, "step": 4545 }, { "epoch": 2.3876050420168067, "grad_norm": 8.267644715887172, "learning_rate": 1.218887976843095e-06, "loss": 0.403756707906723, "step": 4546 }, { "epoch": 2.3881302521008405, "grad_norm": 27.172483857051382, "learning_rate": 1.2168890819312173e-06, "loss": 1.945648431777954, "step": 4547 }, { "epoch": 2.388655462184874, "grad_norm": 11.871262692990832, "learning_rate": 1.2148916002809719e-06, "loss": 0.4905507266521454, "step": 4548 }, { "epoch": 2.3891806722689077, "grad_norm": 4.4824523843108945, "learning_rate": 1.2128955326385595e-06, "loss": 0.2541502118110657, "step": 4549 }, { "epoch": 2.389705882352941, "grad_norm": 8.654776147893612, "learning_rate": 1.210900879749654e-06, "loss": 0.37291058897972107, "step": 4550 }, { "epoch": 2.390231092436975, "grad_norm": 8.544907980266823, "learning_rate": 1.208907642359401e-06, "loss": 0.19303686916828156, "step": 4551 }, { "epoch": 2.3907563025210083, "grad_norm": 9.245516836008381, "learning_rate": 1.2069158212124148e-06, "loss": 0.36492592096328735, "step": 4552 }, { "epoch": 2.391281512605042, "grad_norm": 11.486559035082562, "learning_rate": 1.2049254170527857e-06, "loss": 0.5990746021270752, "step": 4553 }, { "epoch": 2.3918067226890756, "grad_norm": 10.642291366806228, "learning_rate": 1.2029364306240703e-06, "loss": 0.34983572363853455, "step": 4554 }, { "epoch": 2.3923319327731094, "grad_norm": 14.700654848807085, "learning_rate": 1.200948862669296e-06, "loss": 0.3403421640396118, "step": 4555 }, { "epoch": 2.392857142857143, "grad_norm": 10.654595923642477, "learning_rate": 1.1989627139309617e-06, "loss": 0.3918571472167969, "step": 4556 }, { "epoch": 2.3933823529411766, "grad_norm": 10.624715948046202, "learning_rate": 1.196977985151036e-06, "loss": 0.6162440776824951, "step": 4557 }, { "epoch": 2.39390756302521, "grad_norm": 10.207329269807822, "learning_rate": 1.1949946770709558e-06, "loss": 0.32177025079727173, "step": 4558 }, { "epoch": 2.394432773109244, "grad_norm": 12.335230457637099, "learning_rate": 1.1930127904316286e-06, "loss": 1.0117385387420654, "step": 4559 }, { "epoch": 2.3949579831932772, "grad_norm": 8.243611243776725, "learning_rate": 1.1910323259734286e-06, "loss": 0.35751795768737793, "step": 4560 }, { "epoch": 2.395483193277311, "grad_norm": 8.818503755815009, "learning_rate": 1.1890532844362035e-06, "loss": 0.3382008671760559, "step": 4561 }, { "epoch": 2.3960084033613445, "grad_norm": 11.812357760685854, "learning_rate": 1.1870756665592648e-06, "loss": 0.3039571940898895, "step": 4562 }, { "epoch": 2.3965336134453783, "grad_norm": 8.267801937979563, "learning_rate": 1.1850994730813937e-06, "loss": 0.5322685837745667, "step": 4563 }, { "epoch": 2.3970588235294117, "grad_norm": 11.86240748917353, "learning_rate": 1.1831247047408396e-06, "loss": 0.6107098460197449, "step": 4564 }, { "epoch": 2.3975840336134455, "grad_norm": 9.478992360920858, "learning_rate": 1.1811513622753196e-06, "loss": 0.3338160812854767, "step": 4565 }, { "epoch": 2.398109243697479, "grad_norm": 16.787931283767268, "learning_rate": 1.1791794464220169e-06, "loss": 0.7742102742195129, "step": 4566 }, { "epoch": 2.3986344537815127, "grad_norm": 7.128087325207843, "learning_rate": 1.1772089579175816e-06, "loss": 0.2856733798980713, "step": 4567 }, { "epoch": 2.399159663865546, "grad_norm": 10.84834945215509, "learning_rate": 1.175239897498135e-06, "loss": 1.2364510297775269, "step": 4568 }, { "epoch": 2.39968487394958, "grad_norm": 14.669079873648037, "learning_rate": 1.1732722658992597e-06, "loss": 0.47522950172424316, "step": 4569 }, { "epoch": 2.4002100840336134, "grad_norm": 10.01277105949357, "learning_rate": 1.171306063856006e-06, "loss": 0.9380142688751221, "step": 4570 }, { "epoch": 2.400735294117647, "grad_norm": 8.870561127927482, "learning_rate": 1.1693412921028913e-06, "loss": 0.34390684962272644, "step": 4571 }, { "epoch": 2.4012605042016806, "grad_norm": 8.102666776910846, "learning_rate": 1.167377951373897e-06, "loss": 0.3068695664405823, "step": 4572 }, { "epoch": 2.4017857142857144, "grad_norm": 11.733510445020583, "learning_rate": 1.1654160424024718e-06, "loss": 0.3494885563850403, "step": 4573 }, { "epoch": 2.402310924369748, "grad_norm": 9.119536859268015, "learning_rate": 1.1634555659215268e-06, "loss": 0.4633587598800659, "step": 4574 }, { "epoch": 2.4028361344537816, "grad_norm": 11.788540750458916, "learning_rate": 1.1614965226634423e-06, "loss": 0.3058873414993286, "step": 4575 }, { "epoch": 2.403361344537815, "grad_norm": 8.634211487623894, "learning_rate": 1.1595389133600594e-06, "loss": 0.3870164155960083, "step": 4576 }, { "epoch": 2.403886554621849, "grad_norm": 13.009369084404737, "learning_rate": 1.1575827387426846e-06, "loss": 0.619822084903717, "step": 4577 }, { "epoch": 2.4044117647058822, "grad_norm": 7.596472587253989, "learning_rate": 1.1556279995420888e-06, "loss": 0.35728439688682556, "step": 4578 }, { "epoch": 2.404936974789916, "grad_norm": 8.100305393677553, "learning_rate": 1.1536746964885065e-06, "loss": 0.3426452875137329, "step": 4579 }, { "epoch": 2.4054621848739495, "grad_norm": 10.260781417810852, "learning_rate": 1.1517228303116356e-06, "loss": 0.28849175572395325, "step": 4580 }, { "epoch": 2.4059873949579833, "grad_norm": 10.602548146202897, "learning_rate": 1.149772401740637e-06, "loss": 0.3924471139907837, "step": 4581 }, { "epoch": 2.4065126050420167, "grad_norm": 8.276072794012705, "learning_rate": 1.1478234115041332e-06, "loss": 0.3332470655441284, "step": 4582 }, { "epoch": 2.4070378151260505, "grad_norm": 13.10012865056916, "learning_rate": 1.1458758603302145e-06, "loss": 0.7749272584915161, "step": 4583 }, { "epoch": 2.407563025210084, "grad_norm": 7.66078655500123, "learning_rate": 1.1439297489464285e-06, "loss": 0.6607871055603027, "step": 4584 }, { "epoch": 2.4080882352941178, "grad_norm": 20.841498297644005, "learning_rate": 1.1419850780797864e-06, "loss": 0.9104949235916138, "step": 4585 }, { "epoch": 2.408613445378151, "grad_norm": 24.58967149440676, "learning_rate": 1.1400418484567615e-06, "loss": 0.4393938481807709, "step": 4586 }, { "epoch": 2.409138655462185, "grad_norm": 19.30629632993457, "learning_rate": 1.1381000608032883e-06, "loss": 0.5510975122451782, "step": 4587 }, { "epoch": 2.4096638655462184, "grad_norm": 9.060418499495135, "learning_rate": 1.1361597158447634e-06, "loss": 0.6518533229827881, "step": 4588 }, { "epoch": 2.410189075630252, "grad_norm": 11.550628576725419, "learning_rate": 1.1342208143060423e-06, "loss": 0.3659060001373291, "step": 4589 }, { "epoch": 2.4107142857142856, "grad_norm": 10.536082007500719, "learning_rate": 1.1322833569114461e-06, "loss": 0.6478267312049866, "step": 4590 }, { "epoch": 2.4112394957983194, "grad_norm": 10.03293638787172, "learning_rate": 1.1303473443847507e-06, "loss": 0.6590147018432617, "step": 4591 }, { "epoch": 2.411764705882353, "grad_norm": 8.406678997050797, "learning_rate": 1.1284127774491965e-06, "loss": 0.23670372366905212, "step": 4592 }, { "epoch": 2.4122899159663866, "grad_norm": 11.042492722263283, "learning_rate": 1.1264796568274811e-06, "loss": 0.46212291717529297, "step": 4593 }, { "epoch": 2.41281512605042, "grad_norm": 8.492544898311726, "learning_rate": 1.1245479832417628e-06, "loss": 0.7500928044319153, "step": 4594 }, { "epoch": 2.413340336134454, "grad_norm": 51.10833416309785, "learning_rate": 1.1226177574136598e-06, "loss": 0.5376368165016174, "step": 4595 }, { "epoch": 2.4138655462184873, "grad_norm": 15.47020436339206, "learning_rate": 1.120688980064249e-06, "loss": 0.9305119514465332, "step": 4596 }, { "epoch": 2.414390756302521, "grad_norm": 12.071511895735837, "learning_rate": 1.118761651914065e-06, "loss": 0.3205832839012146, "step": 4597 }, { "epoch": 2.4149159663865545, "grad_norm": 11.168123193679575, "learning_rate": 1.1168357736831042e-06, "loss": 0.3182224929332733, "step": 4598 }, { "epoch": 2.4154411764705883, "grad_norm": 10.301001919075295, "learning_rate": 1.114911346090819e-06, "loss": 0.24900300800800323, "step": 4599 }, { "epoch": 2.4159663865546217, "grad_norm": 10.871850965335218, "learning_rate": 1.11298836985612e-06, "loss": 0.5075720548629761, "step": 4600 }, { "epoch": 2.4164915966386555, "grad_norm": 14.646679590428588, "learning_rate": 1.1110668456973761e-06, "loss": 0.4689010679721832, "step": 4601 }, { "epoch": 2.417016806722689, "grad_norm": 23.360825741897084, "learning_rate": 1.109146774332413e-06, "loss": 0.6588277220726013, "step": 4602 }, { "epoch": 2.4175420168067228, "grad_norm": 23.480203977740334, "learning_rate": 1.1072281564785148e-06, "loss": 0.6603044271469116, "step": 4603 }, { "epoch": 2.418067226890756, "grad_norm": 13.877326349427491, "learning_rate": 1.1053109928524208e-06, "loss": 0.37659794092178345, "step": 4604 }, { "epoch": 2.41859243697479, "grad_norm": 7.630531509101829, "learning_rate": 1.10339528417033e-06, "loss": 0.6668751239776611, "step": 4605 }, { "epoch": 2.4191176470588234, "grad_norm": 9.484671306100047, "learning_rate": 1.1014810311478956e-06, "loss": 0.6734082102775574, "step": 4606 }, { "epoch": 2.419642857142857, "grad_norm": 10.68041361148424, "learning_rate": 1.0995682345002278e-06, "loss": 0.45807453989982605, "step": 4607 }, { "epoch": 2.4201680672268906, "grad_norm": 7.363900299142912, "learning_rate": 1.097656894941892e-06, "loss": 0.6644171476364136, "step": 4608 }, { "epoch": 2.4206932773109244, "grad_norm": 9.935514576410666, "learning_rate": 1.0957470131869102e-06, "loss": 0.5023245215415955, "step": 4609 }, { "epoch": 2.421218487394958, "grad_norm": 11.434976183372724, "learning_rate": 1.0938385899487592e-06, "loss": 0.47515398263931274, "step": 4610 }, { "epoch": 2.4217436974789917, "grad_norm": 8.137973873825795, "learning_rate": 1.0919316259403695e-06, "loss": 0.6053848266601562, "step": 4611 }, { "epoch": 2.422268907563025, "grad_norm": 15.478126748003993, "learning_rate": 1.090026121874131e-06, "loss": 0.8274378776550293, "step": 4612 }, { "epoch": 2.422794117647059, "grad_norm": 8.113753229832033, "learning_rate": 1.088122078461884e-06, "loss": 0.2030077576637268, "step": 4613 }, { "epoch": 2.4233193277310923, "grad_norm": 12.815663910863112, "learning_rate": 1.0862194964149247e-06, "loss": 0.6265841722488403, "step": 4614 }, { "epoch": 2.423844537815126, "grad_norm": 15.273531269907533, "learning_rate": 1.084318376444003e-06, "loss": 0.186864972114563, "step": 4615 }, { "epoch": 2.4243697478991595, "grad_norm": 7.960351905070217, "learning_rate": 1.0824187192593221e-06, "loss": 0.21888574957847595, "step": 4616 }, { "epoch": 2.4248949579831933, "grad_norm": 14.13373694399719, "learning_rate": 1.0805205255705403e-06, "loss": 0.6205488443374634, "step": 4617 }, { "epoch": 2.4254201680672267, "grad_norm": 9.753636436422939, "learning_rate": 1.078623796086768e-06, "loss": 0.3561756908893585, "step": 4618 }, { "epoch": 2.4259453781512605, "grad_norm": 8.614605845692175, "learning_rate": 1.0767285315165675e-06, "loss": 0.7992842793464661, "step": 4619 }, { "epoch": 2.426470588235294, "grad_norm": 5.079532214651388, "learning_rate": 1.0748347325679575e-06, "loss": 0.3378203809261322, "step": 4620 }, { "epoch": 2.4269957983193278, "grad_norm": 12.258978016996133, "learning_rate": 1.0729423999484062e-06, "loss": 0.6549593210220337, "step": 4621 }, { "epoch": 2.427521008403361, "grad_norm": 11.15933276488124, "learning_rate": 1.0710515343648348e-06, "loss": 0.6643284559249878, "step": 4622 }, { "epoch": 2.428046218487395, "grad_norm": 11.934831310505624, "learning_rate": 1.0691621365236154e-06, "loss": 0.758434534072876, "step": 4623 }, { "epoch": 2.4285714285714284, "grad_norm": 11.236140098617149, "learning_rate": 1.0672742071305736e-06, "loss": 0.23000317811965942, "step": 4624 }, { "epoch": 2.429096638655462, "grad_norm": 10.18273884662997, "learning_rate": 1.0653877468909857e-06, "loss": 0.43413665890693665, "step": 4625 }, { "epoch": 2.4296218487394956, "grad_norm": 10.484440270034014, "learning_rate": 1.0635027565095779e-06, "loss": 0.7100927233695984, "step": 4626 }, { "epoch": 2.4301470588235294, "grad_norm": 10.39787756370367, "learning_rate": 1.0616192366905303e-06, "loss": 0.5624114274978638, "step": 4627 }, { "epoch": 2.4306722689075633, "grad_norm": 10.210201466083713, "learning_rate": 1.0597371881374713e-06, "loss": 0.357845664024353, "step": 4628 }, { "epoch": 2.4311974789915967, "grad_norm": 14.365351018327207, "learning_rate": 1.0578566115534794e-06, "loss": 0.5785903930664062, "step": 4629 }, { "epoch": 2.43172268907563, "grad_norm": 10.595979118429634, "learning_rate": 1.055977507641085e-06, "loss": 0.34922948479652405, "step": 4630 }, { "epoch": 2.432247899159664, "grad_norm": 9.011612235377743, "learning_rate": 1.054099877102267e-06, "loss": 0.5151946544647217, "step": 4631 }, { "epoch": 2.4327731092436977, "grad_norm": 6.991350232267147, "learning_rate": 1.052223720638454e-06, "loss": 0.16994990408420563, "step": 4632 }, { "epoch": 2.433298319327731, "grad_norm": 8.942996413150123, "learning_rate": 1.0503490389505244e-06, "loss": 0.6988615989685059, "step": 4633 }, { "epoch": 2.4338235294117645, "grad_norm": 12.219976719707963, "learning_rate": 1.048475832738804e-06, "loss": 0.9352719783782959, "step": 4634 }, { "epoch": 2.4343487394957983, "grad_norm": 8.108446115529599, "learning_rate": 1.0466041027030716e-06, "loss": 0.416052907705307, "step": 4635 }, { "epoch": 2.434873949579832, "grad_norm": 13.376560828922862, "learning_rate": 1.0447338495425506e-06, "loss": 0.7009848356246948, "step": 4636 }, { "epoch": 2.4353991596638656, "grad_norm": 10.397124759629698, "learning_rate": 1.0428650739559138e-06, "loss": 0.3095715045928955, "step": 4637 }, { "epoch": 2.435924369747899, "grad_norm": 12.509361131831966, "learning_rate": 1.0409977766412821e-06, "loss": 1.1042118072509766, "step": 4638 }, { "epoch": 2.4364495798319328, "grad_norm": 11.823671042508225, "learning_rate": 1.0391319582962244e-06, "loss": 0.8671766519546509, "step": 4639 }, { "epoch": 2.4369747899159666, "grad_norm": 16.64653003896913, "learning_rate": 1.0372676196177561e-06, "loss": 0.4441404342651367, "step": 4640 }, { "epoch": 2.4375, "grad_norm": 13.246604756211456, "learning_rate": 1.0354047613023404e-06, "loss": 0.5973472595214844, "step": 4641 }, { "epoch": 2.4380252100840334, "grad_norm": 10.513921651500217, "learning_rate": 1.0335433840458892e-06, "loss": 0.20096644759178162, "step": 4642 }, { "epoch": 2.4385504201680672, "grad_norm": 10.223128915328267, "learning_rate": 1.0316834885437594e-06, "loss": 0.4479983448982239, "step": 4643 }, { "epoch": 2.439075630252101, "grad_norm": 14.272482738733997, "learning_rate": 1.0298250754907534e-06, "loss": 0.44989848136901855, "step": 4644 }, { "epoch": 2.4396008403361344, "grad_norm": 12.102580308462185, "learning_rate": 1.0279681455811219e-06, "loss": 0.4259611964225769, "step": 4645 }, { "epoch": 2.440126050420168, "grad_norm": 10.869361108084085, "learning_rate": 1.0261126995085607e-06, "loss": 0.42129191756248474, "step": 4646 }, { "epoch": 2.4406512605042017, "grad_norm": 15.94137067423729, "learning_rate": 1.02425873796621e-06, "loss": 0.410157710313797, "step": 4647 }, { "epoch": 2.4411764705882355, "grad_norm": 9.645977618738604, "learning_rate": 1.0224062616466562e-06, "loss": 0.36366206407546997, "step": 4648 }, { "epoch": 2.441701680672269, "grad_norm": 9.70861359971365, "learning_rate": 1.0205552712419343e-06, "loss": 0.4668600559234619, "step": 4649 }, { "epoch": 2.4422268907563023, "grad_norm": 9.99325401999145, "learning_rate": 1.018705767443519e-06, "loss": 0.48056501150131226, "step": 4650 }, { "epoch": 2.442752100840336, "grad_norm": 13.475799182568382, "learning_rate": 1.0168577509423322e-06, "loss": 0.30501866340637207, "step": 4651 }, { "epoch": 2.44327731092437, "grad_norm": 11.649082911865747, "learning_rate": 1.0150112224287396e-06, "loss": 0.6185335516929626, "step": 4652 }, { "epoch": 2.4438025210084033, "grad_norm": 16.70253684316815, "learning_rate": 1.013166182592551e-06, "loss": 0.724973201751709, "step": 4653 }, { "epoch": 2.4443277310924367, "grad_norm": 17.50781231837, "learning_rate": 1.0113226321230213e-06, "loss": 0.5617965459823608, "step": 4654 }, { "epoch": 2.4448529411764706, "grad_norm": 15.26581366504748, "learning_rate": 1.0094805717088473e-06, "loss": 0.5633823275566101, "step": 4655 }, { "epoch": 2.4453781512605044, "grad_norm": 10.232663292730718, "learning_rate": 1.0076400020381687e-06, "loss": 0.42383772134780884, "step": 4656 }, { "epoch": 2.445903361344538, "grad_norm": 14.80627293281165, "learning_rate": 1.0058009237985721e-06, "loss": 0.8045518398284912, "step": 4657 }, { "epoch": 2.4464285714285716, "grad_norm": 12.875540222576225, "learning_rate": 1.003963337677083e-06, "loss": 1.4751150608062744, "step": 4658 }, { "epoch": 2.446953781512605, "grad_norm": 9.835555950015056, "learning_rate": 1.002127244360171e-06, "loss": 0.2266070544719696, "step": 4659 }, { "epoch": 2.447478991596639, "grad_norm": 16.775725560965732, "learning_rate": 1.0002926445337474e-06, "loss": 2.585972309112549, "step": 4660 }, { "epoch": 2.4480042016806722, "grad_norm": 17.680236712991608, "learning_rate": 9.98459538883167e-07, "loss": 0.5047086477279663, "step": 4661 }, { "epoch": 2.448529411764706, "grad_norm": 9.336729580011761, "learning_rate": 9.96627928093224e-07, "loss": 0.25490984320640564, "step": 4662 }, { "epoch": 2.4490546218487395, "grad_norm": 16.411493477279496, "learning_rate": 9.947978128481556e-07, "loss": 0.4267747402191162, "step": 4663 }, { "epoch": 2.4495798319327733, "grad_norm": 12.203016330801969, "learning_rate": 9.929691938316422e-07, "loss": 0.36980509757995605, "step": 4664 }, { "epoch": 2.4501050420168067, "grad_norm": 16.308788493374234, "learning_rate": 9.911420717268023e-07, "loss": 0.5081444978713989, "step": 4665 }, { "epoch": 2.4506302521008405, "grad_norm": 12.769936615698294, "learning_rate": 9.893164472161965e-07, "loss": 0.24696582555770874, "step": 4666 }, { "epoch": 2.451155462184874, "grad_norm": 9.790135986927812, "learning_rate": 9.874923209818249e-07, "loss": 0.7831677198410034, "step": 4667 }, { "epoch": 2.4516806722689077, "grad_norm": 11.856238608913674, "learning_rate": 9.856696937051297e-07, "loss": 0.6640298962593079, "step": 4668 }, { "epoch": 2.452205882352941, "grad_norm": 9.527089820470042, "learning_rate": 9.838485660669906e-07, "loss": 0.6059166193008423, "step": 4669 }, { "epoch": 2.452731092436975, "grad_norm": 21.9886168050128, "learning_rate": 9.820289387477304e-07, "loss": 0.6838579177856445, "step": 4670 }, { "epoch": 2.4532563025210083, "grad_norm": 24.78888622836168, "learning_rate": 9.802108124271092e-07, "loss": 3.7656304836273193, "step": 4671 }, { "epoch": 2.453781512605042, "grad_norm": 6.48566338430173, "learning_rate": 9.78394187784325e-07, "loss": 0.2731790244579315, "step": 4672 }, { "epoch": 2.4543067226890756, "grad_norm": 14.024939105084325, "learning_rate": 9.765790654980195e-07, "loss": 0.49420320987701416, "step": 4673 }, { "epoch": 2.4548319327731094, "grad_norm": 10.56214526304676, "learning_rate": 9.74765446246269e-07, "loss": 0.33996111154556274, "step": 4674 }, { "epoch": 2.455357142857143, "grad_norm": 10.87291795043671, "learning_rate": 9.729533307065897e-07, "loss": 0.22930221259593964, "step": 4675 }, { "epoch": 2.4558823529411766, "grad_norm": 5.957944172714362, "learning_rate": 9.711427195559352e-07, "loss": 0.2003369927406311, "step": 4676 }, { "epoch": 2.45640756302521, "grad_norm": 15.022562983405647, "learning_rate": 9.693336134706988e-07, "loss": 0.36549103260040283, "step": 4677 }, { "epoch": 2.456932773109244, "grad_norm": 15.985269360078055, "learning_rate": 9.6752601312671e-07, "loss": 0.6158819794654846, "step": 4678 }, { "epoch": 2.4574579831932772, "grad_norm": 7.715153626258572, "learning_rate": 9.65719919199235e-07, "loss": 0.3991345167160034, "step": 4679 }, { "epoch": 2.457983193277311, "grad_norm": 9.644010906168198, "learning_rate": 9.63915332362982e-07, "loss": 0.6521626710891724, "step": 4680 }, { "epoch": 2.4585084033613445, "grad_norm": 12.418945035394104, "learning_rate": 9.621122532920908e-07, "loss": 0.5674232244491577, "step": 4681 }, { "epoch": 2.4590336134453783, "grad_norm": 5.721560434142794, "learning_rate": 9.6031068266014e-07, "loss": 0.1428273469209671, "step": 4682 }, { "epoch": 2.4595588235294117, "grad_norm": 17.981790555131614, "learning_rate": 9.58510621140145e-07, "loss": 0.3296002447605133, "step": 4683 }, { "epoch": 2.4600840336134455, "grad_norm": 10.881012831291098, "learning_rate": 9.567120694045568e-07, "loss": 0.8027752041816711, "step": 4684 }, { "epoch": 2.460609243697479, "grad_norm": 10.925923426486813, "learning_rate": 9.549150281252633e-07, "loss": 0.36758917570114136, "step": 4685 }, { "epoch": 2.4611344537815127, "grad_norm": 8.777357272041694, "learning_rate": 9.531194979735869e-07, "loss": 0.9515275955200195, "step": 4686 }, { "epoch": 2.461659663865546, "grad_norm": 11.242025681773345, "learning_rate": 9.513254796202848e-07, "loss": 0.6229841113090515, "step": 4687 }, { "epoch": 2.46218487394958, "grad_norm": 8.82602710321931, "learning_rate": 9.495329737355541e-07, "loss": 0.2540172040462494, "step": 4688 }, { "epoch": 2.4627100840336134, "grad_norm": 11.3910438721506, "learning_rate": 9.477419809890215e-07, "loss": 1.8517810106277466, "step": 4689 }, { "epoch": 2.463235294117647, "grad_norm": 14.344553984642637, "learning_rate": 9.459525020497507e-07, "loss": 0.6088428497314453, "step": 4690 }, { "epoch": 2.4637605042016806, "grad_norm": 24.59858337612728, "learning_rate": 9.441645375862397e-07, "loss": 0.7089042067527771, "step": 4691 }, { "epoch": 2.4642857142857144, "grad_norm": 16.790495896313505, "learning_rate": 9.423780882664202e-07, "loss": 0.7045350074768066, "step": 4692 }, { "epoch": 2.464810924369748, "grad_norm": 13.776657877349882, "learning_rate": 9.405931547576591e-07, "loss": 0.41512563824653625, "step": 4693 }, { "epoch": 2.4653361344537816, "grad_norm": 11.554234737040302, "learning_rate": 9.388097377267552e-07, "loss": 0.996214747428894, "step": 4694 }, { "epoch": 2.465861344537815, "grad_norm": 12.658984217175727, "learning_rate": 9.370278378399433e-07, "loss": 0.47079768776893616, "step": 4695 }, { "epoch": 2.466386554621849, "grad_norm": 15.816315389778362, "learning_rate": 9.352474557628899e-07, "loss": 0.4400932788848877, "step": 4696 }, { "epoch": 2.4669117647058822, "grad_norm": 8.718914840431788, "learning_rate": 9.334685921606946e-07, "loss": 0.19140273332595825, "step": 4697 }, { "epoch": 2.467436974789916, "grad_norm": 7.9039013943502505, "learning_rate": 9.31691247697889e-07, "loss": 0.26404517889022827, "step": 4698 }, { "epoch": 2.4679621848739495, "grad_norm": 10.2417256646224, "learning_rate": 9.299154230384383e-07, "loss": 0.6127966642379761, "step": 4699 }, { "epoch": 2.4684873949579833, "grad_norm": 9.496690714163332, "learning_rate": 9.281411188457396e-07, "loss": 2.1094913482666016, "step": 4700 }, { "epoch": 2.4690126050420167, "grad_norm": 13.347013677572749, "learning_rate": 9.26368335782622e-07, "loss": 0.4380970597267151, "step": 4701 }, { "epoch": 2.4695378151260505, "grad_norm": 14.36156356847337, "learning_rate": 9.245970745113453e-07, "loss": 0.6515285968780518, "step": 4702 }, { "epoch": 2.470063025210084, "grad_norm": 11.80035507754474, "learning_rate": 9.228273356936046e-07, "loss": 0.3813164532184601, "step": 4703 }, { "epoch": 2.4705882352941178, "grad_norm": 8.528257081660707, "learning_rate": 9.210591199905217e-07, "loss": 0.6726269721984863, "step": 4704 }, { "epoch": 2.471113445378151, "grad_norm": 10.551066183916396, "learning_rate": 9.192924280626514e-07, "loss": 0.560356855392456, "step": 4705 }, { "epoch": 2.471638655462185, "grad_norm": 11.015979250413581, "learning_rate": 9.175272605699792e-07, "loss": 0.35912248492240906, "step": 4706 }, { "epoch": 2.4721638655462184, "grad_norm": 9.392218324754262, "learning_rate": 9.157636181719204e-07, "loss": 0.1710374355316162, "step": 4707 }, { "epoch": 2.472689075630252, "grad_norm": 13.355515376393848, "learning_rate": 9.140015015273224e-07, "loss": 0.3761917054653168, "step": 4708 }, { "epoch": 2.4732142857142856, "grad_norm": 10.90637067923259, "learning_rate": 9.122409112944591e-07, "loss": 0.31881874799728394, "step": 4709 }, { "epoch": 2.4737394957983194, "grad_norm": 8.854564926807791, "learning_rate": 9.104818481310396e-07, "loss": 0.43804216384887695, "step": 4710 }, { "epoch": 2.474264705882353, "grad_norm": 9.147833087460095, "learning_rate": 9.087243126941975e-07, "loss": 0.6836268305778503, "step": 4711 }, { "epoch": 2.4747899159663866, "grad_norm": 19.132813414443866, "learning_rate": 9.069683056404982e-07, "loss": 0.33152320981025696, "step": 4712 }, { "epoch": 2.47531512605042, "grad_norm": 14.268196793419088, "learning_rate": 9.052138276259348e-07, "loss": 0.33739525079727173, "step": 4713 }, { "epoch": 2.475840336134454, "grad_norm": 14.092363531663707, "learning_rate": 9.034608793059307e-07, "loss": 0.32641756534576416, "step": 4714 }, { "epoch": 2.4763655462184873, "grad_norm": 9.068358384788377, "learning_rate": 9.017094613353366e-07, "loss": 0.27681398391723633, "step": 4715 }, { "epoch": 2.476890756302521, "grad_norm": 10.4349516296856, "learning_rate": 8.999595743684314e-07, "loss": 0.349325567483902, "step": 4716 }, { "epoch": 2.4774159663865545, "grad_norm": 13.80555283378645, "learning_rate": 8.982112190589237e-07, "loss": 0.5981140732765198, "step": 4717 }, { "epoch": 2.4779411764705883, "grad_norm": 13.102668083052532, "learning_rate": 8.96464396059949e-07, "loss": 0.5057382583618164, "step": 4718 }, { "epoch": 2.4784663865546217, "grad_norm": 9.959965509538167, "learning_rate": 8.947191060240701e-07, "loss": 0.26920151710510254, "step": 4719 }, { "epoch": 2.4789915966386555, "grad_norm": 19.063605641074684, "learning_rate": 8.929753496032761e-07, "loss": 0.8154730200767517, "step": 4720 }, { "epoch": 2.479516806722689, "grad_norm": 13.689453077945961, "learning_rate": 8.912331274489855e-07, "loss": 0.3454706072807312, "step": 4721 }, { "epoch": 2.4800420168067228, "grad_norm": 10.441706029517952, "learning_rate": 8.894924402120425e-07, "loss": 0.3592435419559479, "step": 4722 }, { "epoch": 2.480567226890756, "grad_norm": 20.216135523518552, "learning_rate": 8.87753288542717e-07, "loss": 0.3239816427230835, "step": 4723 }, { "epoch": 2.48109243697479, "grad_norm": 15.57789120946543, "learning_rate": 8.860156730907054e-07, "loss": 0.6587352752685547, "step": 4724 }, { "epoch": 2.4816176470588234, "grad_norm": 8.587661716318681, "learning_rate": 8.842795945051335e-07, "loss": 0.16333778202533722, "step": 4725 }, { "epoch": 2.482142857142857, "grad_norm": 12.397698419058147, "learning_rate": 8.825450534345486e-07, "loss": 0.5055996179580688, "step": 4726 }, { "epoch": 2.4826680672268906, "grad_norm": 16.795151339073612, "learning_rate": 8.808120505269269e-07, "loss": 0.31784749031066895, "step": 4727 }, { "epoch": 2.4831932773109244, "grad_norm": 14.69910847418944, "learning_rate": 8.79080586429667e-07, "loss": 0.6859865188598633, "step": 4728 }, { "epoch": 2.483718487394958, "grad_norm": 10.10533952596627, "learning_rate": 8.773506617895944e-07, "loss": 0.3855650722980499, "step": 4729 }, { "epoch": 2.4842436974789917, "grad_norm": 11.80557983473714, "learning_rate": 8.756222772529599e-07, "loss": 0.4115293025970459, "step": 4730 }, { "epoch": 2.484768907563025, "grad_norm": 13.927940063008188, "learning_rate": 8.73895433465437e-07, "loss": 0.5889797210693359, "step": 4731 }, { "epoch": 2.485294117647059, "grad_norm": 9.563821181327375, "learning_rate": 8.72170131072127e-07, "loss": 0.4151526093482971, "step": 4732 }, { "epoch": 2.4858193277310923, "grad_norm": 28.27792492837143, "learning_rate": 8.704463707175526e-07, "loss": 0.5376741886138916, "step": 4733 }, { "epoch": 2.486344537815126, "grad_norm": 10.945468143772102, "learning_rate": 8.687241530456608e-07, "loss": 0.5140660405158997, "step": 4734 }, { "epoch": 2.4868697478991595, "grad_norm": 11.624318257674368, "learning_rate": 8.670034786998232e-07, "loss": 0.8436710834503174, "step": 4735 }, { "epoch": 2.4873949579831933, "grad_norm": 13.02003886638922, "learning_rate": 8.652843483228335e-07, "loss": 0.7538946866989136, "step": 4736 }, { "epoch": 2.4879201680672267, "grad_norm": 17.921355315899405, "learning_rate": 8.6356676255691e-07, "loss": 0.349669873714447, "step": 4737 }, { "epoch": 2.4884453781512605, "grad_norm": 13.878297019785771, "learning_rate": 8.618507220436939e-07, "loss": 0.5361471176147461, "step": 4738 }, { "epoch": 2.488970588235294, "grad_norm": 12.16297921198656, "learning_rate": 8.601362274242465e-07, "loss": 0.4689074754714966, "step": 4739 }, { "epoch": 2.4894957983193278, "grad_norm": 17.62693924380033, "learning_rate": 8.584232793390562e-07, "loss": 0.534205436706543, "step": 4740 }, { "epoch": 2.490021008403361, "grad_norm": 8.706013583682777, "learning_rate": 8.567118784280309e-07, "loss": 0.5740219354629517, "step": 4741 }, { "epoch": 2.490546218487395, "grad_norm": 20.246481701007742, "learning_rate": 8.550020253305003e-07, "loss": 0.7582848072052002, "step": 4742 }, { "epoch": 2.4910714285714284, "grad_norm": 9.255095815609051, "learning_rate": 8.532937206852165e-07, "loss": 0.31694650650024414, "step": 4743 }, { "epoch": 2.491596638655462, "grad_norm": 14.210622193219352, "learning_rate": 8.515869651303533e-07, "loss": 0.409004271030426, "step": 4744 }, { "epoch": 2.4921218487394956, "grad_norm": 10.275172515676715, "learning_rate": 8.498817593035053e-07, "loss": 0.38191547989845276, "step": 4745 }, { "epoch": 2.4926470588235294, "grad_norm": 15.07256195329395, "learning_rate": 8.48178103841687e-07, "loss": 0.47504180669784546, "step": 4746 }, { "epoch": 2.4931722689075633, "grad_norm": 49.964165771216344, "learning_rate": 8.464759993813382e-07, "loss": 1.2656458616256714, "step": 4747 }, { "epoch": 2.4936974789915967, "grad_norm": 12.222531563981194, "learning_rate": 8.447754465583142e-07, "loss": 0.5406888723373413, "step": 4748 }, { "epoch": 2.49422268907563, "grad_norm": 11.470656056399244, "learning_rate": 8.430764460078938e-07, "loss": 0.7408872246742249, "step": 4749 }, { "epoch": 2.494747899159664, "grad_norm": 12.942732280680922, "learning_rate": 8.41378998364774e-07, "loss": 0.8626704216003418, "step": 4750 }, { "epoch": 2.4952731092436977, "grad_norm": 10.67096922730019, "learning_rate": 8.396831042630727e-07, "loss": 0.23665863275527954, "step": 4751 }, { "epoch": 2.495798319327731, "grad_norm": 9.020429608119832, "learning_rate": 8.379887643363277e-07, "loss": 0.33638107776641846, "step": 4752 }, { "epoch": 2.4963235294117645, "grad_norm": 13.825420485703267, "learning_rate": 8.362959792174941e-07, "loss": 0.33803611993789673, "step": 4753 }, { "epoch": 2.4968487394957983, "grad_norm": 10.06604318894792, "learning_rate": 8.346047495389498e-07, "loss": 1.0515186786651611, "step": 4754 }, { "epoch": 2.497373949579832, "grad_norm": 8.66381834651261, "learning_rate": 8.329150759324894e-07, "loss": 0.26907309889793396, "step": 4755 }, { "epoch": 2.4978991596638656, "grad_norm": 8.36770765299475, "learning_rate": 8.312269590293259e-07, "loss": 0.456392765045166, "step": 4756 }, { "epoch": 2.498424369747899, "grad_norm": 10.390199793457995, "learning_rate": 8.295403994600921e-07, "loss": 0.23822353780269623, "step": 4757 }, { "epoch": 2.4989495798319328, "grad_norm": 11.533163558354298, "learning_rate": 8.278553978548365e-07, "loss": 0.3360922336578369, "step": 4758 }, { "epoch": 2.4994747899159666, "grad_norm": 9.65091677840734, "learning_rate": 8.261719548430292e-07, "loss": 0.219955176115036, "step": 4759 }, { "epoch": 2.5, "grad_norm": 18.190299748999404, "learning_rate": 8.244900710535547e-07, "loss": 0.5958255529403687, "step": 4760 }, { "epoch": 2.5005252100840334, "grad_norm": 7.997470071309525, "learning_rate": 8.228097471147167e-07, "loss": 0.3603939414024353, "step": 4761 }, { "epoch": 2.5010504201680672, "grad_norm": 11.77676038041079, "learning_rate": 8.211309836542369e-07, "loss": 0.3790106177330017, "step": 4762 }, { "epoch": 2.501575630252101, "grad_norm": 8.184737025733762, "learning_rate": 8.194537812992531e-07, "loss": 0.18647828698158264, "step": 4763 }, { "epoch": 2.5021008403361344, "grad_norm": 9.537869020221029, "learning_rate": 8.177781406763196e-07, "loss": 0.9135514497756958, "step": 4764 }, { "epoch": 2.502626050420168, "grad_norm": 8.44957865906863, "learning_rate": 8.161040624114075e-07, "loss": 0.5648715496063232, "step": 4765 }, { "epoch": 2.5031512605042017, "grad_norm": 7.901263133771217, "learning_rate": 8.144315471299046e-07, "loss": 0.43349596858024597, "step": 4766 }, { "epoch": 2.5036764705882355, "grad_norm": 9.546382607738664, "learning_rate": 8.127605954566143e-07, "loss": 0.3956523835659027, "step": 4767 }, { "epoch": 2.504201680672269, "grad_norm": 10.824281803598327, "learning_rate": 8.110912080157552e-07, "loss": 0.44914332032203674, "step": 4768 }, { "epoch": 2.5047268907563023, "grad_norm": 9.70877017119002, "learning_rate": 8.094233854309647e-07, "loss": 0.44429028034210205, "step": 4769 }, { "epoch": 2.505252100840336, "grad_norm": 18.192437595582472, "learning_rate": 8.077571283252928e-07, "loss": 0.46342164278030396, "step": 4770 }, { "epoch": 2.50577731092437, "grad_norm": 12.197374142546467, "learning_rate": 8.060924373212042e-07, "loss": 0.7917468547821045, "step": 4771 }, { "epoch": 2.5063025210084033, "grad_norm": 11.550320342691949, "learning_rate": 8.044293130405806e-07, "loss": 0.3075546622276306, "step": 4772 }, { "epoch": 2.5068277310924367, "grad_norm": 9.685331571603992, "learning_rate": 8.027677561047176e-07, "loss": 0.8057847619056702, "step": 4773 }, { "epoch": 2.5073529411764706, "grad_norm": 10.380721309475254, "learning_rate": 8.011077671343248e-07, "loss": 0.7955812811851501, "step": 4774 }, { "epoch": 2.5078781512605044, "grad_norm": 10.59970516497924, "learning_rate": 7.994493467495262e-07, "loss": 0.40353015065193176, "step": 4775 }, { "epoch": 2.508403361344538, "grad_norm": 13.107800609546093, "learning_rate": 7.977924955698591e-07, "loss": 0.4272182285785675, "step": 4776 }, { "epoch": 2.508928571428571, "grad_norm": 10.59584657769079, "learning_rate": 7.961372142142776e-07, "loss": 0.4582866430282593, "step": 4777 }, { "epoch": 2.509453781512605, "grad_norm": 11.188308198926357, "learning_rate": 7.944835033011472e-07, "loss": 0.21376478672027588, "step": 4778 }, { "epoch": 2.509978991596639, "grad_norm": 15.013372347267286, "learning_rate": 7.928313634482454e-07, "loss": 0.488264262676239, "step": 4779 }, { "epoch": 2.5105042016806722, "grad_norm": 6.666361609200026, "learning_rate": 7.911807952727652e-07, "loss": 0.2537972331047058, "step": 4780 }, { "epoch": 2.5110294117647056, "grad_norm": 12.349476818121895, "learning_rate": 7.89531799391311e-07, "loss": 0.5406059622764587, "step": 4781 }, { "epoch": 2.5115546218487395, "grad_norm": 14.989702434119808, "learning_rate": 7.878843764199007e-07, "loss": 0.542559027671814, "step": 4782 }, { "epoch": 2.5120798319327733, "grad_norm": 9.068316772517909, "learning_rate": 7.862385269739625e-07, "loss": 0.18501508235931396, "step": 4783 }, { "epoch": 2.5126050420168067, "grad_norm": 7.601244891676653, "learning_rate": 7.845942516683414e-07, "loss": 0.4172128438949585, "step": 4784 }, { "epoch": 2.51313025210084, "grad_norm": 14.302351113185587, "learning_rate": 7.829515511172897e-07, "loss": 0.5935714244842529, "step": 4785 }, { "epoch": 2.513655462184874, "grad_norm": 8.467727121395807, "learning_rate": 7.813104259344739e-07, "loss": 0.3091030418872833, "step": 4786 }, { "epoch": 2.5141806722689077, "grad_norm": 12.752753375998163, "learning_rate": 7.796708767329708e-07, "loss": 0.4248979687690735, "step": 4787 }, { "epoch": 2.514705882352941, "grad_norm": 11.038479542489673, "learning_rate": 7.780329041252688e-07, "loss": 0.35864678025245667, "step": 4788 }, { "epoch": 2.5152310924369745, "grad_norm": 11.241897403094827, "learning_rate": 7.763965087232678e-07, "loss": 0.34305691719055176, "step": 4789 }, { "epoch": 2.5157563025210083, "grad_norm": 7.78712659878438, "learning_rate": 7.747616911382766e-07, "loss": 0.31905481219291687, "step": 4790 }, { "epoch": 2.516281512605042, "grad_norm": 11.718760122762212, "learning_rate": 7.73128451981019e-07, "loss": 0.27002179622650146, "step": 4791 }, { "epoch": 2.5168067226890756, "grad_norm": 13.498485736525947, "learning_rate": 7.714967918616245e-07, "loss": 0.3575587272644043, "step": 4792 }, { "epoch": 2.5173319327731094, "grad_norm": 9.834120956463467, "learning_rate": 7.698667113896346e-07, "loss": 0.5128017663955688, "step": 4793 }, { "epoch": 2.517857142857143, "grad_norm": 16.52662972061671, "learning_rate": 7.682382111740011e-07, "loss": 0.807076096534729, "step": 4794 }, { "epoch": 2.5183823529411766, "grad_norm": 5.629052823987348, "learning_rate": 7.666112918230839e-07, "loss": 0.22683599591255188, "step": 4795 }, { "epoch": 2.51890756302521, "grad_norm": 10.892963341447294, "learning_rate": 7.649859539446547e-07, "loss": 0.44364529848098755, "step": 4796 }, { "epoch": 2.519432773109244, "grad_norm": 8.351755144819656, "learning_rate": 7.633621981458916e-07, "loss": 0.24138270318508148, "step": 4797 }, { "epoch": 2.5199579831932772, "grad_norm": 7.268189185600107, "learning_rate": 7.617400250333834e-07, "loss": 0.24619129300117493, "step": 4798 }, { "epoch": 2.520483193277311, "grad_norm": 8.801647696715333, "learning_rate": 7.601194352131285e-07, "loss": 0.2071070671081543, "step": 4799 }, { "epoch": 2.5210084033613445, "grad_norm": 14.145085712514055, "learning_rate": 7.585004292905329e-07, "loss": 0.33978474140167236, "step": 4800 }, { "epoch": 2.5215336134453783, "grad_norm": 11.188695002667309, "learning_rate": 7.568830078704092e-07, "loss": 0.7669999003410339, "step": 4801 }, { "epoch": 2.5220588235294117, "grad_norm": 11.274157481480405, "learning_rate": 7.552671715569809e-07, "loss": 0.23734520375728607, "step": 4802 }, { "epoch": 2.5225840336134455, "grad_norm": 10.725553642871082, "learning_rate": 7.536529209538773e-07, "loss": 0.40044504404067993, "step": 4803 }, { "epoch": 2.523109243697479, "grad_norm": 14.974230115543515, "learning_rate": 7.520402566641366e-07, "loss": 0.3545833230018616, "step": 4804 }, { "epoch": 2.5236344537815127, "grad_norm": 11.605260579557688, "learning_rate": 7.504291792902024e-07, "loss": 0.6216102838516235, "step": 4805 }, { "epoch": 2.524159663865546, "grad_norm": 10.105806232277027, "learning_rate": 7.488196894339289e-07, "loss": 0.19713947176933289, "step": 4806 }, { "epoch": 2.52468487394958, "grad_norm": 9.830120449234002, "learning_rate": 7.472117876965751e-07, "loss": 0.6729844212532043, "step": 4807 }, { "epoch": 2.5252100840336134, "grad_norm": 9.67114193153479, "learning_rate": 7.45605474678806e-07, "loss": 0.7624964714050293, "step": 4808 }, { "epoch": 2.525735294117647, "grad_norm": 9.966141114744058, "learning_rate": 7.440007509806946e-07, "loss": 0.37602072954177856, "step": 4809 }, { "epoch": 2.5262605042016806, "grad_norm": 11.326456009743, "learning_rate": 7.423976172017194e-07, "loss": 0.24186234176158905, "step": 4810 }, { "epoch": 2.5267857142857144, "grad_norm": 11.8197693174477, "learning_rate": 7.407960739407649e-07, "loss": 0.6827012300491333, "step": 4811 }, { "epoch": 2.527310924369748, "grad_norm": 13.644422459868707, "learning_rate": 7.391961217961224e-07, "loss": 0.5761939287185669, "step": 4812 }, { "epoch": 2.5278361344537816, "grad_norm": 15.510772845535444, "learning_rate": 7.375977613654861e-07, "loss": 0.46874308586120605, "step": 4813 }, { "epoch": 2.528361344537815, "grad_norm": 9.707397970383074, "learning_rate": 7.360009932459605e-07, "loss": 0.40307551622390747, "step": 4814 }, { "epoch": 2.528886554621849, "grad_norm": 7.586768252041111, "learning_rate": 7.344058180340513e-07, "loss": 0.4715978503227234, "step": 4815 }, { "epoch": 2.5294117647058822, "grad_norm": 10.679268564030183, "learning_rate": 7.328122363256696e-07, "loss": 0.3112912178039551, "step": 4816 }, { "epoch": 2.529936974789916, "grad_norm": 8.615670193104895, "learning_rate": 7.312202487161318e-07, "loss": 0.6268161535263062, "step": 4817 }, { "epoch": 2.5304621848739495, "grad_norm": 11.014823921525975, "learning_rate": 7.296298558001592e-07, "loss": 0.3747507929801941, "step": 4818 }, { "epoch": 2.5309873949579833, "grad_norm": 7.760951885638209, "learning_rate": 7.280410581718761e-07, "loss": 0.5421075224876404, "step": 4819 }, { "epoch": 2.5315126050420167, "grad_norm": 17.31951317624361, "learning_rate": 7.26453856424812e-07, "loss": 0.7568320631980896, "step": 4820 }, { "epoch": 2.5320378151260505, "grad_norm": 13.174117229431252, "learning_rate": 7.248682511519006e-07, "loss": 0.8093594312667847, "step": 4821 }, { "epoch": 2.532563025210084, "grad_norm": 12.17679758920096, "learning_rate": 7.232842429454784e-07, "loss": 0.7586328983306885, "step": 4822 }, { "epoch": 2.5330882352941178, "grad_norm": 13.971461301282277, "learning_rate": 7.217018323972852e-07, "loss": 0.6551073789596558, "step": 4823 }, { "epoch": 2.533613445378151, "grad_norm": 11.482307681061377, "learning_rate": 7.201210200984643e-07, "loss": 0.27159246802330017, "step": 4824 }, { "epoch": 2.534138655462185, "grad_norm": 8.20542700556153, "learning_rate": 7.18541806639561e-07, "loss": 0.2602250277996063, "step": 4825 }, { "epoch": 2.5346638655462184, "grad_norm": 12.269919628277533, "learning_rate": 7.169641926105247e-07, "loss": 0.7117254734039307, "step": 4826 }, { "epoch": 2.535189075630252, "grad_norm": 15.909872331796288, "learning_rate": 7.153881786007056e-07, "loss": 0.7022569179534912, "step": 4827 }, { "epoch": 2.5357142857142856, "grad_norm": 8.44994351786321, "learning_rate": 7.138137651988597e-07, "loss": 0.6579830646514893, "step": 4828 }, { "epoch": 2.5362394957983194, "grad_norm": 10.151957727346831, "learning_rate": 7.122409529931412e-07, "loss": 0.8653113842010498, "step": 4829 }, { "epoch": 2.536764705882353, "grad_norm": 15.90754446592996, "learning_rate": 7.106697425711062e-07, "loss": 0.5251384973526001, "step": 4830 }, { "epoch": 2.5372899159663866, "grad_norm": 12.169849091145256, "learning_rate": 7.091001345197168e-07, "loss": 0.6808485984802246, "step": 4831 }, { "epoch": 2.53781512605042, "grad_norm": 11.718251401864599, "learning_rate": 7.075321294253324e-07, "loss": 0.29910916090011597, "step": 4832 }, { "epoch": 2.538340336134454, "grad_norm": 16.413172899149846, "learning_rate": 7.059657278737136e-07, "loss": 0.6642720103263855, "step": 4833 }, { "epoch": 2.5388655462184873, "grad_norm": 11.06091649328897, "learning_rate": 7.044009304500238e-07, "loss": 0.48181262612342834, "step": 4834 }, { "epoch": 2.539390756302521, "grad_norm": 13.909343184916338, "learning_rate": 7.028377377388262e-07, "loss": 0.48922231793403625, "step": 4835 }, { "epoch": 2.5399159663865545, "grad_norm": 14.748736598087799, "learning_rate": 7.012761503240845e-07, "loss": 0.25614088773727417, "step": 4836 }, { "epoch": 2.5404411764705883, "grad_norm": 9.388733308532037, "learning_rate": 6.997161687891635e-07, "loss": 0.24819070100784302, "step": 4837 }, { "epoch": 2.5409663865546217, "grad_norm": 15.797221194363438, "learning_rate": 6.981577937168277e-07, "loss": 0.5666882991790771, "step": 4838 }, { "epoch": 2.5414915966386555, "grad_norm": 16.159389042946902, "learning_rate": 6.966010256892408e-07, "loss": 0.47369974851608276, "step": 4839 }, { "epoch": 2.542016806722689, "grad_norm": 9.014616031246243, "learning_rate": 6.950458652879671e-07, "loss": 0.48810073733329773, "step": 4840 }, { "epoch": 2.5425420168067228, "grad_norm": 7.811677899440476, "learning_rate": 6.934923130939692e-07, "loss": 0.22124749422073364, "step": 4841 }, { "epoch": 2.543067226890756, "grad_norm": 20.703581400231325, "learning_rate": 6.919403696876098e-07, "loss": 0.6604146957397461, "step": 4842 }, { "epoch": 2.54359243697479, "grad_norm": 10.027770271529535, "learning_rate": 6.903900356486504e-07, "loss": 0.527151346206665, "step": 4843 }, { "epoch": 2.5441176470588234, "grad_norm": 16.503592775774724, "learning_rate": 6.888413115562504e-07, "loss": 0.5747373700141907, "step": 4844 }, { "epoch": 2.544642857142857, "grad_norm": 17.534450929473888, "learning_rate": 6.872941979889708e-07, "loss": 0.8626971244812012, "step": 4845 }, { "epoch": 2.5451680672268906, "grad_norm": 8.60113631480442, "learning_rate": 6.857486955247677e-07, "loss": 0.18687665462493896, "step": 4846 }, { "epoch": 2.5456932773109244, "grad_norm": 12.47739944839357, "learning_rate": 6.842048047409966e-07, "loss": 0.629452645778656, "step": 4847 }, { "epoch": 2.546218487394958, "grad_norm": 15.422739930466173, "learning_rate": 6.826625262144105e-07, "loss": 0.4687202572822571, "step": 4848 }, { "epoch": 2.5467436974789917, "grad_norm": 10.402910406926852, "learning_rate": 6.811218605211606e-07, "loss": 0.28766074776649475, "step": 4849 }, { "epoch": 2.5472689075630255, "grad_norm": 14.811406208320356, "learning_rate": 6.79582808236795e-07, "loss": 0.391085684299469, "step": 4850 }, { "epoch": 2.547794117647059, "grad_norm": 9.893069366097043, "learning_rate": 6.780453699362583e-07, "loss": 0.4918053448200226, "step": 4851 }, { "epoch": 2.5483193277310923, "grad_norm": 10.627153526672627, "learning_rate": 6.765095461938964e-07, "loss": 0.9855205416679382, "step": 4852 }, { "epoch": 2.548844537815126, "grad_norm": 13.978483999394332, "learning_rate": 6.749753375834467e-07, "loss": 0.6464021801948547, "step": 4853 }, { "epoch": 2.54936974789916, "grad_norm": 10.065154229744966, "learning_rate": 6.734427446780467e-07, "loss": 0.32371342182159424, "step": 4854 }, { "epoch": 2.5498949579831933, "grad_norm": 14.54852078626233, "learning_rate": 6.71911768050228e-07, "loss": 1.1138784885406494, "step": 4855 }, { "epoch": 2.5504201680672267, "grad_norm": 14.099701785370709, "learning_rate": 6.703824082719201e-07, "loss": 0.5352606177330017, "step": 4856 }, { "epoch": 2.5509453781512605, "grad_norm": 12.263050013668199, "learning_rate": 6.688546659144479e-07, "loss": 0.36302947998046875, "step": 4857 }, { "epoch": 2.5514705882352944, "grad_norm": 13.406656503641392, "learning_rate": 6.67328541548532e-07, "loss": 0.37487393617630005, "step": 4858 }, { "epoch": 2.5519957983193278, "grad_norm": 10.510114460032613, "learning_rate": 6.658040357442874e-07, "loss": 1.5453276634216309, "step": 4859 }, { "epoch": 2.552521008403361, "grad_norm": 16.962523456060687, "learning_rate": 6.642811490712281e-07, "loss": 0.5763338804244995, "step": 4860 }, { "epoch": 2.553046218487395, "grad_norm": 14.6903233682848, "learning_rate": 6.627598820982595e-07, "loss": 0.4196200966835022, "step": 4861 }, { "epoch": 2.553571428571429, "grad_norm": 11.346449668829871, "learning_rate": 6.612402353936836e-07, "loss": 0.5094459652900696, "step": 4862 }, { "epoch": 2.554096638655462, "grad_norm": 16.336665587478024, "learning_rate": 6.597222095251965e-07, "loss": 0.5358494520187378, "step": 4863 }, { "epoch": 2.5546218487394956, "grad_norm": 11.334003773741667, "learning_rate": 6.582058050598894e-07, "loss": 0.6448275446891785, "step": 4864 }, { "epoch": 2.5551470588235294, "grad_norm": 10.359794671028979, "learning_rate": 6.566910225642475e-07, "loss": 0.33972978591918945, "step": 4865 }, { "epoch": 2.5556722689075633, "grad_norm": 11.868815376042482, "learning_rate": 6.551778626041483e-07, "loss": 0.24651628732681274, "step": 4866 }, { "epoch": 2.5561974789915967, "grad_norm": 12.24077182388493, "learning_rate": 6.536663257448678e-07, "loss": 0.5210819840431213, "step": 4867 }, { "epoch": 2.55672268907563, "grad_norm": 9.652755066256226, "learning_rate": 6.52156412551071e-07, "loss": 0.5718560218811035, "step": 4868 }, { "epoch": 2.557247899159664, "grad_norm": 11.699170284774768, "learning_rate": 6.50648123586819e-07, "loss": 0.4137555956840515, "step": 4869 }, { "epoch": 2.5577731092436977, "grad_norm": 10.862464511081688, "learning_rate": 6.491414594155648e-07, "loss": 0.32669249176979065, "step": 4870 }, { "epoch": 2.558298319327731, "grad_norm": 8.243241190545326, "learning_rate": 6.476364206001545e-07, "loss": 0.15914994478225708, "step": 4871 }, { "epoch": 2.5588235294117645, "grad_norm": 8.97745382811696, "learning_rate": 6.461330077028283e-07, "loss": 0.7450168132781982, "step": 4872 }, { "epoch": 2.5593487394957983, "grad_norm": 10.807797586108821, "learning_rate": 6.446312212852162e-07, "loss": 0.3181978464126587, "step": 4873 }, { "epoch": 2.559873949579832, "grad_norm": 10.772107006863616, "learning_rate": 6.431310619083453e-07, "loss": 0.5689761638641357, "step": 4874 }, { "epoch": 2.5603991596638656, "grad_norm": 21.676249425336703, "learning_rate": 6.416325301326304e-07, "loss": 2.277855157852173, "step": 4875 }, { "epoch": 2.560924369747899, "grad_norm": 7.9307616325273536, "learning_rate": 6.401356265178798e-07, "loss": 0.29816046357154846, "step": 4876 }, { "epoch": 2.5614495798319328, "grad_norm": 10.364708876046468, "learning_rate": 6.386403516232948e-07, "loss": 0.29614073038101196, "step": 4877 }, { "epoch": 2.5619747899159666, "grad_norm": 9.06537771630169, "learning_rate": 6.37146706007466e-07, "loss": 0.7089647054672241, "step": 4878 }, { "epoch": 2.5625, "grad_norm": 12.997617937172018, "learning_rate": 6.356546902283772e-07, "loss": 1.478808879852295, "step": 4879 }, { "epoch": 2.5630252100840334, "grad_norm": 10.727525301484945, "learning_rate": 6.341643048434027e-07, "loss": 0.3855453431606293, "step": 4880 }, { "epoch": 2.5635504201680672, "grad_norm": 16.869347084069474, "learning_rate": 6.326755504093063e-07, "loss": 0.7136096954345703, "step": 4881 }, { "epoch": 2.564075630252101, "grad_norm": 11.201752979927996, "learning_rate": 6.311884274822461e-07, "loss": 0.5269986987113953, "step": 4882 }, { "epoch": 2.5646008403361344, "grad_norm": 8.516308383367065, "learning_rate": 6.29702936617767e-07, "loss": 0.18743552267551422, "step": 4883 }, { "epoch": 2.565126050420168, "grad_norm": 11.388982319970065, "learning_rate": 6.28219078370807e-07, "loss": 0.33473318815231323, "step": 4884 }, { "epoch": 2.5656512605042017, "grad_norm": 9.306414957595914, "learning_rate": 6.267368532956919e-07, "loss": 0.30755195021629333, "step": 4885 }, { "epoch": 2.5661764705882355, "grad_norm": 22.493330060283913, "learning_rate": 6.252562619461389e-07, "loss": 1.0576213598251343, "step": 4886 }, { "epoch": 2.566701680672269, "grad_norm": 10.813401210980702, "learning_rate": 6.237773048752538e-07, "loss": 0.3524538278579712, "step": 4887 }, { "epoch": 2.5672268907563023, "grad_norm": 11.788023301145108, "learning_rate": 6.222999826355325e-07, "loss": 0.26946529746055603, "step": 4888 }, { "epoch": 2.567752100840336, "grad_norm": 8.763911909070023, "learning_rate": 6.208242957788613e-07, "loss": 0.2788327932357788, "step": 4889 }, { "epoch": 2.56827731092437, "grad_norm": 10.845340501061996, "learning_rate": 6.193502448565142e-07, "loss": 0.48048681020736694, "step": 4890 }, { "epoch": 2.5688025210084033, "grad_norm": 8.02975276473133, "learning_rate": 6.178778304191535e-07, "loss": 0.36470428109169006, "step": 4891 }, { "epoch": 2.5693277310924367, "grad_norm": 11.462410820599452, "learning_rate": 6.164070530168315e-07, "loss": 0.3302750885486603, "step": 4892 }, { "epoch": 2.5698529411764706, "grad_norm": 17.55556566514967, "learning_rate": 6.14937913198988e-07, "loss": 0.3937831521034241, "step": 4893 }, { "epoch": 2.5703781512605044, "grad_norm": 9.263585463703778, "learning_rate": 6.134704115144519e-07, "loss": 0.3886506259441376, "step": 4894 }, { "epoch": 2.570903361344538, "grad_norm": 12.206676542502768, "learning_rate": 6.120045485114396e-07, "loss": 0.3952086567878723, "step": 4895 }, { "epoch": 2.571428571428571, "grad_norm": 18.626192015101026, "learning_rate": 6.105403247375541e-07, "loss": 0.3314104378223419, "step": 4896 }, { "epoch": 2.571953781512605, "grad_norm": 10.46441168576784, "learning_rate": 6.090777407397902e-07, "loss": 0.5985084772109985, "step": 4897 }, { "epoch": 2.572478991596639, "grad_norm": 7.2085395213955, "learning_rate": 6.076167970645252e-07, "loss": 0.3474208116531372, "step": 4898 }, { "epoch": 2.5730042016806722, "grad_norm": 18.13439406031624, "learning_rate": 6.061574942575266e-07, "loss": 0.7391008138656616, "step": 4899 }, { "epoch": 2.5735294117647056, "grad_norm": 14.427053044116693, "learning_rate": 6.046998328639475e-07, "loss": 0.4393512010574341, "step": 4900 }, { "epoch": 2.5740546218487395, "grad_norm": 10.916262159945981, "learning_rate": 6.032438134283286e-07, "loss": 0.3656442165374756, "step": 4901 }, { "epoch": 2.5745798319327733, "grad_norm": 12.103255393857316, "learning_rate": 6.017894364945964e-07, "loss": 0.6148119568824768, "step": 4902 }, { "epoch": 2.5751050420168067, "grad_norm": 14.915322403540596, "learning_rate": 6.003367026060647e-07, "loss": 0.29871106147766113, "step": 4903 }, { "epoch": 2.57563025210084, "grad_norm": 16.957751874371827, "learning_rate": 5.988856123054337e-07, "loss": 0.4171563386917114, "step": 4904 }, { "epoch": 2.576155462184874, "grad_norm": 11.272245404964119, "learning_rate": 5.974361661347889e-07, "loss": 0.4126380681991577, "step": 4905 }, { "epoch": 2.5766806722689077, "grad_norm": 10.180383869453076, "learning_rate": 5.959883646356013e-07, "loss": 0.6835031509399414, "step": 4906 }, { "epoch": 2.577205882352941, "grad_norm": 9.60162119919138, "learning_rate": 5.945422083487284e-07, "loss": 0.3895748257637024, "step": 4907 }, { "epoch": 2.5777310924369745, "grad_norm": 9.730603159694775, "learning_rate": 5.930976978144132e-07, "loss": 0.2707878053188324, "step": 4908 }, { "epoch": 2.5782563025210083, "grad_norm": 11.542625343752965, "learning_rate": 5.916548335722822e-07, "loss": 0.5062350034713745, "step": 4909 }, { "epoch": 2.578781512605042, "grad_norm": 7.81585480822707, "learning_rate": 5.90213616161347e-07, "loss": 0.632489800453186, "step": 4910 }, { "epoch": 2.5793067226890756, "grad_norm": 7.853902566417403, "learning_rate": 5.887740461200081e-07, "loss": 0.8873011469841003, "step": 4911 }, { "epoch": 2.5798319327731094, "grad_norm": 15.662605069794354, "learning_rate": 5.873361239860464e-07, "loss": 0.48568254709243774, "step": 4912 }, { "epoch": 2.580357142857143, "grad_norm": 18.02139388229487, "learning_rate": 5.858998502966273e-07, "loss": 0.7427787780761719, "step": 4913 }, { "epoch": 2.5808823529411766, "grad_norm": 18.37329751558733, "learning_rate": 5.844652255883026e-07, "loss": 0.5184937715530396, "step": 4914 }, { "epoch": 2.58140756302521, "grad_norm": 12.736909638492465, "learning_rate": 5.83032250397006e-07, "loss": 0.7362766861915588, "step": 4915 }, { "epoch": 2.581932773109244, "grad_norm": 9.187846248254253, "learning_rate": 5.816009252580568e-07, "loss": 0.2318895310163498, "step": 4916 }, { "epoch": 2.5824579831932772, "grad_norm": 13.861756008045637, "learning_rate": 5.801712507061563e-07, "loss": 0.36642661690711975, "step": 4917 }, { "epoch": 2.582983193277311, "grad_norm": 7.653698361429107, "learning_rate": 5.787432272753885e-07, "loss": 0.38152921199798584, "step": 4918 }, { "epoch": 2.5835084033613445, "grad_norm": 11.877417012201363, "learning_rate": 5.773168554992248e-07, "loss": 0.42070138454437256, "step": 4919 }, { "epoch": 2.5840336134453783, "grad_norm": 11.08707915277657, "learning_rate": 5.758921359105158e-07, "loss": 0.22507277131080627, "step": 4920 }, { "epoch": 2.5845588235294117, "grad_norm": 14.454471628961176, "learning_rate": 5.74469069041495e-07, "loss": 0.23637282848358154, "step": 4921 }, { "epoch": 2.5850840336134455, "grad_norm": 9.845238903312321, "learning_rate": 5.730476554237801e-07, "loss": 0.3867771327495575, "step": 4922 }, { "epoch": 2.585609243697479, "grad_norm": 8.433582715329326, "learning_rate": 5.716278955883703e-07, "loss": 0.35116636753082275, "step": 4923 }, { "epoch": 2.5861344537815127, "grad_norm": 10.06509714731401, "learning_rate": 5.702097900656466e-07, "loss": 0.2620098888874054, "step": 4924 }, { "epoch": 2.586659663865546, "grad_norm": 12.524717422952994, "learning_rate": 5.687933393853718e-07, "loss": 0.5872154235839844, "step": 4925 }, { "epoch": 2.58718487394958, "grad_norm": 10.160159474398997, "learning_rate": 5.673785440766938e-07, "loss": 0.38177043199539185, "step": 4926 }, { "epoch": 2.5877100840336134, "grad_norm": 7.802478214541245, "learning_rate": 5.659654046681373e-07, "loss": 0.39889341592788696, "step": 4927 }, { "epoch": 2.588235294117647, "grad_norm": 15.869754176021893, "learning_rate": 5.645539216876117e-07, "loss": 0.8242252469062805, "step": 4928 }, { "epoch": 2.5887605042016806, "grad_norm": 9.934567067619742, "learning_rate": 5.631440956624057e-07, "loss": 0.2728886902332306, "step": 4929 }, { "epoch": 2.5892857142857144, "grad_norm": 10.096954744165794, "learning_rate": 5.617359271191908e-07, "loss": 0.7107230424880981, "step": 4930 }, { "epoch": 2.589810924369748, "grad_norm": 7.848418440603656, "learning_rate": 5.603294165840173e-07, "loss": 0.21378864347934723, "step": 4931 }, { "epoch": 2.5903361344537816, "grad_norm": 15.743830660589166, "learning_rate": 5.589245645823177e-07, "loss": 0.7895803451538086, "step": 4932 }, { "epoch": 2.590861344537815, "grad_norm": 9.572214660478107, "learning_rate": 5.575213716389039e-07, "loss": 0.3168739080429077, "step": 4933 }, { "epoch": 2.591386554621849, "grad_norm": 14.73096843305296, "learning_rate": 5.561198382779692e-07, "loss": 1.4471051692962646, "step": 4934 }, { "epoch": 2.5919117647058822, "grad_norm": 13.376052837695921, "learning_rate": 5.547199650230862e-07, "loss": 1.2652502059936523, "step": 4935 }, { "epoch": 2.592436974789916, "grad_norm": 11.975741587074959, "learning_rate": 5.533217523972073e-07, "loss": 0.26494717597961426, "step": 4936 }, { "epoch": 2.5929621848739495, "grad_norm": 9.466706069351542, "learning_rate": 5.519252009226639e-07, "loss": 0.532478928565979, "step": 4937 }, { "epoch": 2.5934873949579833, "grad_norm": 10.098536916532042, "learning_rate": 5.505303111211685e-07, "loss": 0.9167251586914062, "step": 4938 }, { "epoch": 2.5940126050420167, "grad_norm": 13.980146929135243, "learning_rate": 5.491370835138116e-07, "loss": 0.7907119989395142, "step": 4939 }, { "epoch": 2.5945378151260505, "grad_norm": 8.081646284203565, "learning_rate": 5.477455186210612e-07, "loss": 0.5667487978935242, "step": 4940 }, { "epoch": 2.595063025210084, "grad_norm": 9.20277774123809, "learning_rate": 5.463556169627687e-07, "loss": 0.4445914924144745, "step": 4941 }, { "epoch": 2.5955882352941178, "grad_norm": 9.9060628243416, "learning_rate": 5.449673790581611e-07, "loss": 0.3172542452812195, "step": 4942 }, { "epoch": 2.596113445378151, "grad_norm": 10.903004010929578, "learning_rate": 5.435808054258429e-07, "loss": 0.49419260025024414, "step": 4943 }, { "epoch": 2.596638655462185, "grad_norm": 9.1126139120858, "learning_rate": 5.421958965837993e-07, "loss": 0.26421844959259033, "step": 4944 }, { "epoch": 2.5971638655462184, "grad_norm": 22.62074914624199, "learning_rate": 5.408126530493918e-07, "loss": 0.923978328704834, "step": 4945 }, { "epoch": 2.597689075630252, "grad_norm": 12.447667010920588, "learning_rate": 5.394310753393606e-07, "loss": 0.25522667169570923, "step": 4946 }, { "epoch": 2.5982142857142856, "grad_norm": 8.768505787866767, "learning_rate": 5.380511639698227e-07, "loss": 0.2672596573829651, "step": 4947 }, { "epoch": 2.5987394957983194, "grad_norm": 16.844998395265588, "learning_rate": 5.366729194562747e-07, "loss": 0.4241945743560791, "step": 4948 }, { "epoch": 2.599264705882353, "grad_norm": 9.95635249340883, "learning_rate": 5.352963423135893e-07, "loss": 0.4814796447753906, "step": 4949 }, { "epoch": 2.5997899159663866, "grad_norm": 8.542234566665938, "learning_rate": 5.339214330560155e-07, "loss": 0.3713931441307068, "step": 4950 }, { "epoch": 2.60031512605042, "grad_norm": 14.735836273599324, "learning_rate": 5.325481921971804e-07, "loss": 0.28314444422721863, "step": 4951 }, { "epoch": 2.600840336134454, "grad_norm": 11.93953246422329, "learning_rate": 5.311766202500868e-07, "loss": 0.5708507895469666, "step": 4952 }, { "epoch": 2.6013655462184873, "grad_norm": 14.320011508280166, "learning_rate": 5.298067177271144e-07, "loss": 0.5337470769882202, "step": 4953 }, { "epoch": 2.601890756302521, "grad_norm": 8.12294923810274, "learning_rate": 5.284384851400204e-07, "loss": 0.2840821146965027, "step": 4954 }, { "epoch": 2.6024159663865545, "grad_norm": 14.776064194771555, "learning_rate": 5.270719229999355e-07, "loss": 0.3523826599121094, "step": 4955 }, { "epoch": 2.6029411764705883, "grad_norm": 15.188553645729352, "learning_rate": 5.257070318173702e-07, "loss": 0.6922599077224731, "step": 4956 }, { "epoch": 2.6034663865546217, "grad_norm": 8.866611699424793, "learning_rate": 5.243438121022077e-07, "loss": 0.352006733417511, "step": 4957 }, { "epoch": 2.6039915966386555, "grad_norm": 15.282931728080383, "learning_rate": 5.229822643637078e-07, "loss": 0.6487103700637817, "step": 4958 }, { "epoch": 2.604516806722689, "grad_norm": 14.542059450480222, "learning_rate": 5.216223891105054e-07, "loss": 1.1369600296020508, "step": 4959 }, { "epoch": 2.6050420168067228, "grad_norm": 9.159200705510656, "learning_rate": 5.202641868506115e-07, "loss": 0.44024384021759033, "step": 4960 }, { "epoch": 2.605567226890756, "grad_norm": 9.223462725611745, "learning_rate": 5.18907658091411e-07, "loss": 0.5041219592094421, "step": 4961 }, { "epoch": 2.60609243697479, "grad_norm": 17.68379411030876, "learning_rate": 5.175528033396632e-07, "loss": 1.0955138206481934, "step": 4962 }, { "epoch": 2.6066176470588234, "grad_norm": 7.785254889745292, "learning_rate": 5.161996231015049e-07, "loss": 0.40684613585472107, "step": 4963 }, { "epoch": 2.607142857142857, "grad_norm": 11.738092258208198, "learning_rate": 5.148481178824438e-07, "loss": 0.7205455303192139, "step": 4964 }, { "epoch": 2.6076680672268906, "grad_norm": 10.22256791991857, "learning_rate": 5.134982881873646e-07, "loss": 0.44471150636672974, "step": 4965 }, { "epoch": 2.6081932773109244, "grad_norm": 10.969187643944052, "learning_rate": 5.121501345205243e-07, "loss": 0.6944993734359741, "step": 4966 }, { "epoch": 2.608718487394958, "grad_norm": 11.13196585678083, "learning_rate": 5.10803657385554e-07, "loss": 0.459159791469574, "step": 4967 }, { "epoch": 2.6092436974789917, "grad_norm": 11.153613586703113, "learning_rate": 5.094588572854603e-07, "loss": 0.3382960855960846, "step": 4968 }, { "epoch": 2.6097689075630255, "grad_norm": 15.162025155832893, "learning_rate": 5.081157347226201e-07, "loss": 0.7505803108215332, "step": 4969 }, { "epoch": 2.610294117647059, "grad_norm": 11.696195669171312, "learning_rate": 5.067742901987855e-07, "loss": 0.3236784338951111, "step": 4970 }, { "epoch": 2.6108193277310923, "grad_norm": 12.108939877118239, "learning_rate": 5.054345242150837e-07, "loss": 0.6487278938293457, "step": 4971 }, { "epoch": 2.611344537815126, "grad_norm": 15.672319025763857, "learning_rate": 5.040964372720114e-07, "loss": 0.7085149884223938, "step": 4972 }, { "epoch": 2.61186974789916, "grad_norm": 9.170665570666213, "learning_rate": 5.027600298694397e-07, "loss": 0.4233211576938629, "step": 4973 }, { "epoch": 2.6123949579831933, "grad_norm": 16.20458097285681, "learning_rate": 5.014253025066124e-07, "loss": 0.39938005805015564, "step": 4974 }, { "epoch": 2.6129201680672267, "grad_norm": 10.635478798920339, "learning_rate": 5.000922556821442e-07, "loss": 0.4904247224330902, "step": 4975 }, { "epoch": 2.6134453781512605, "grad_norm": 10.93500267126394, "learning_rate": 4.987608898940244e-07, "loss": 0.49353671073913574, "step": 4976 }, { "epoch": 2.6139705882352944, "grad_norm": 11.129413067710383, "learning_rate": 4.974312056396113e-07, "loss": 0.3361518979072571, "step": 4977 }, { "epoch": 2.6144957983193278, "grad_norm": 12.31059324856716, "learning_rate": 4.961032034156388e-07, "loss": 0.2596784830093384, "step": 4978 }, { "epoch": 2.615021008403361, "grad_norm": 15.409159300351133, "learning_rate": 4.947768837182093e-07, "loss": 0.41441357135772705, "step": 4979 }, { "epoch": 2.615546218487395, "grad_norm": 20.174977892561795, "learning_rate": 4.934522470427983e-07, "loss": 0.9614930748939514, "step": 4980 }, { "epoch": 2.616071428571429, "grad_norm": 6.601700605135325, "learning_rate": 4.92129293884252e-07, "loss": 0.26034635305404663, "step": 4981 }, { "epoch": 2.616596638655462, "grad_norm": 15.349848786614412, "learning_rate": 4.908080247367869e-07, "loss": 0.560057520866394, "step": 4982 }, { "epoch": 2.6171218487394956, "grad_norm": 9.326176327713412, "learning_rate": 4.89488440093992e-07, "loss": 0.6545529961585999, "step": 4983 }, { "epoch": 2.6176470588235294, "grad_norm": 9.633776927291555, "learning_rate": 4.881705404488252e-07, "loss": 0.5484632253646851, "step": 4984 }, { "epoch": 2.6181722689075633, "grad_norm": 17.935161540688796, "learning_rate": 4.868543262936176e-07, "loss": 1.4751113653182983, "step": 4985 }, { "epoch": 2.6186974789915967, "grad_norm": 12.789476810153891, "learning_rate": 4.855397981200682e-07, "loss": 2.3696200847625732, "step": 4986 }, { "epoch": 2.61922268907563, "grad_norm": 10.784548811215974, "learning_rate": 4.842269564192475e-07, "loss": 0.29669246077537537, "step": 4987 }, { "epoch": 2.619747899159664, "grad_norm": 11.192374082774652, "learning_rate": 4.829158016815944e-07, "loss": 0.5222846269607544, "step": 4988 }, { "epoch": 2.6202731092436977, "grad_norm": 14.040186671829293, "learning_rate": 4.816063343969196e-07, "loss": 0.7106570601463318, "step": 4989 }, { "epoch": 2.620798319327731, "grad_norm": 8.559378630416449, "learning_rate": 4.802985550544015e-07, "loss": 0.3928842842578888, "step": 4990 }, { "epoch": 2.6213235294117645, "grad_norm": 10.384040243522856, "learning_rate": 4.789924641425892e-07, "loss": 0.2502846121788025, "step": 4991 }, { "epoch": 2.6218487394957983, "grad_norm": 12.61645409509892, "learning_rate": 4.776880621494018e-07, "loss": 0.4573371410369873, "step": 4992 }, { "epoch": 2.622373949579832, "grad_norm": 9.581021902806178, "learning_rate": 4.763853495621251e-07, "loss": 0.3357299566268921, "step": 4993 }, { "epoch": 2.6228991596638656, "grad_norm": 13.669627820315572, "learning_rate": 4.7508432686741654e-07, "loss": 0.19969376921653748, "step": 4994 }, { "epoch": 2.623424369747899, "grad_norm": 10.384947809777664, "learning_rate": 4.7378499455129955e-07, "loss": 0.45344555377960205, "step": 4995 }, { "epoch": 2.6239495798319328, "grad_norm": 9.542233395549253, "learning_rate": 4.724873530991686e-07, "loss": 0.46982109546661377, "step": 4996 }, { "epoch": 2.6244747899159666, "grad_norm": 6.954420666419455, "learning_rate": 4.7119140299578424e-07, "loss": 0.4305351972579956, "step": 4997 }, { "epoch": 2.625, "grad_norm": 8.80165104637619, "learning_rate": 4.6989714472527705e-07, "loss": 0.3331296443939209, "step": 4998 }, { "epoch": 2.6255252100840334, "grad_norm": 329.8798479078397, "learning_rate": 4.686045787711435e-07, "loss": 2.238851308822632, "step": 4999 }, { "epoch": 2.6260504201680672, "grad_norm": 11.880667119518066, "learning_rate": 4.673137056162508e-07, "loss": 0.5143446922302246, "step": 5000 }, { "epoch": 2.626575630252101, "grad_norm": 15.530134939738671, "learning_rate": 4.660245257428297e-07, "loss": 0.47613704204559326, "step": 5001 }, { "epoch": 2.6271008403361344, "grad_norm": 10.199669714538224, "learning_rate": 4.647370396324835e-07, "loss": 0.5171878933906555, "step": 5002 }, { "epoch": 2.627626050420168, "grad_norm": 10.302973272122806, "learning_rate": 4.6345124776617847e-07, "loss": 0.39039042592048645, "step": 5003 }, { "epoch": 2.6281512605042017, "grad_norm": 7.315377061463184, "learning_rate": 4.621671506242503e-07, "loss": 0.5917413830757141, "step": 5004 }, { "epoch": 2.6286764705882355, "grad_norm": 11.848402962179168, "learning_rate": 4.6088474868640045e-07, "loss": 0.26132234930992126, "step": 5005 }, { "epoch": 2.629201680672269, "grad_norm": 14.072034274867068, "learning_rate": 4.596040424316972e-07, "loss": 0.8253778219223022, "step": 5006 }, { "epoch": 2.6297268907563023, "grad_norm": 16.722932872957276, "learning_rate": 4.583250323385763e-07, "loss": 0.28480014204978943, "step": 5007 }, { "epoch": 2.630252100840336, "grad_norm": 8.687887171112392, "learning_rate": 4.570477188848377e-07, "loss": 0.21405500173568726, "step": 5008 }, { "epoch": 2.63077731092437, "grad_norm": 9.063060080742835, "learning_rate": 4.557721025476508e-07, "loss": 0.8213982582092285, "step": 5009 }, { "epoch": 2.6313025210084033, "grad_norm": 10.684155451899711, "learning_rate": 4.5449818380354895e-07, "loss": 0.5271738767623901, "step": 5010 }, { "epoch": 2.6318277310924367, "grad_norm": 13.21988084784808, "learning_rate": 4.532259631284308e-07, "loss": 0.7251818180084229, "step": 5011 }, { "epoch": 2.6323529411764706, "grad_norm": 9.213945777737939, "learning_rate": 4.519554409975624e-07, "loss": 0.7404474020004272, "step": 5012 }, { "epoch": 2.6328781512605044, "grad_norm": 10.19667787197892, "learning_rate": 4.5068661788557345e-07, "loss": 0.26499176025390625, "step": 5013 }, { "epoch": 2.633403361344538, "grad_norm": 12.277780908040612, "learning_rate": 4.4941949426646034e-07, "loss": 0.5239801406860352, "step": 5014 }, { "epoch": 2.633928571428571, "grad_norm": 17.096414490131995, "learning_rate": 4.481540706135845e-07, "loss": 0.36629125475883484, "step": 5015 }, { "epoch": 2.634453781512605, "grad_norm": 12.241158966036348, "learning_rate": 4.4689034739966994e-07, "loss": 0.8801144361495972, "step": 5016 }, { "epoch": 2.634978991596639, "grad_norm": 15.69368639398231, "learning_rate": 4.4562832509680963e-07, "loss": 0.5607030987739563, "step": 5017 }, { "epoch": 2.6355042016806722, "grad_norm": 15.44521256084083, "learning_rate": 4.4436800417645863e-07, "loss": 0.48291152715682983, "step": 5018 }, { "epoch": 2.6360294117647056, "grad_norm": 13.63791560133695, "learning_rate": 4.4310938510943533e-07, "loss": 0.5484304428100586, "step": 5019 }, { "epoch": 2.6365546218487395, "grad_norm": 6.534181883845186, "learning_rate": 4.4185246836592475e-07, "loss": 0.2272014170885086, "step": 5020 }, { "epoch": 2.6370798319327733, "grad_norm": 8.219637050497115, "learning_rate": 4.4059725441547464e-07, "loss": 0.4130197763442993, "step": 5021 }, { "epoch": 2.6376050420168067, "grad_norm": 9.126100017102663, "learning_rate": 4.3934374372699704e-07, "loss": 0.44093483686447144, "step": 5022 }, { "epoch": 2.63813025210084, "grad_norm": 6.227653400264672, "learning_rate": 4.3809193676876584e-07, "loss": 0.2749183773994446, "step": 5023 }, { "epoch": 2.638655462184874, "grad_norm": 15.142643835997328, "learning_rate": 4.3684183400842297e-07, "loss": 0.4120340943336487, "step": 5024 }, { "epoch": 2.6391806722689077, "grad_norm": 14.07794606682438, "learning_rate": 4.355934359129699e-07, "loss": 0.6837051510810852, "step": 5025 }, { "epoch": 2.639705882352941, "grad_norm": 10.618887524116545, "learning_rate": 4.343467429487719e-07, "loss": 0.9854604601860046, "step": 5026 }, { "epoch": 2.6402310924369745, "grad_norm": 9.556604938209075, "learning_rate": 4.331017555815575e-07, "loss": 0.3573746383190155, "step": 5027 }, { "epoch": 2.6407563025210083, "grad_norm": 10.14366608903893, "learning_rate": 4.318584742764187e-07, "loss": 0.39185842871665955, "step": 5028 }, { "epoch": 2.641281512605042, "grad_norm": 9.47396003753919, "learning_rate": 4.3061689949780995e-07, "loss": 0.4061623513698578, "step": 5029 }, { "epoch": 2.6418067226890756, "grad_norm": 9.172234088657502, "learning_rate": 4.2937703170954635e-07, "loss": 0.715480625629425, "step": 5030 }, { "epoch": 2.6423319327731094, "grad_norm": 10.649151903912319, "learning_rate": 4.281388713748091e-07, "loss": 0.7693527340888977, "step": 5031 }, { "epoch": 2.642857142857143, "grad_norm": 10.74590409758232, "learning_rate": 4.269024189561383e-07, "loss": 0.6536192893981934, "step": 5032 }, { "epoch": 2.6433823529411766, "grad_norm": 8.0265756243527, "learning_rate": 4.2566767491543706e-07, "loss": 0.1733337789773941, "step": 5033 }, { "epoch": 2.64390756302521, "grad_norm": 13.74760894584026, "learning_rate": 4.2443463971397094e-07, "loss": 0.46782687306404114, "step": 5034 }, { "epoch": 2.644432773109244, "grad_norm": 9.248094269141976, "learning_rate": 4.2320331381236535e-07, "loss": 0.5997669696807861, "step": 5035 }, { "epoch": 2.6449579831932772, "grad_norm": 12.099957296138106, "learning_rate": 4.2197369767060904e-07, "loss": 0.5305054187774658, "step": 5036 }, { "epoch": 2.645483193277311, "grad_norm": 7.944928665825679, "learning_rate": 4.2074579174805173e-07, "loss": 0.4841510057449341, "step": 5037 }, { "epoch": 2.6460084033613445, "grad_norm": 9.634128082702697, "learning_rate": 4.195195965034016e-07, "loss": 0.44690483808517456, "step": 5038 }, { "epoch": 2.6465336134453783, "grad_norm": 10.374367688327673, "learning_rate": 4.182951123947332e-07, "loss": 0.6667125821113586, "step": 5039 }, { "epoch": 2.6470588235294117, "grad_norm": 10.663043440372022, "learning_rate": 4.1707233987947683e-07, "loss": 0.7713562846183777, "step": 5040 }, { "epoch": 2.6475840336134455, "grad_norm": 5.947725818998819, "learning_rate": 4.1585127941442536e-07, "loss": 0.14723870158195496, "step": 5041 }, { "epoch": 2.648109243697479, "grad_norm": 9.584843587310504, "learning_rate": 4.1463193145573175e-07, "loss": 0.292923241853714, "step": 5042 }, { "epoch": 2.6486344537815127, "grad_norm": 8.914960563211729, "learning_rate": 4.134142964589105e-07, "loss": 0.5311110019683838, "step": 5043 }, { "epoch": 2.649159663865546, "grad_norm": 16.191387434150904, "learning_rate": 4.121983748788333e-07, "loss": 0.6045875549316406, "step": 5044 }, { "epoch": 2.64968487394958, "grad_norm": 10.7545985171071, "learning_rate": 4.1098416716973457e-07, "loss": 0.7185678482055664, "step": 5045 }, { "epoch": 2.6502100840336134, "grad_norm": 10.785013009842046, "learning_rate": 4.0977167378520757e-07, "loss": 0.3030956983566284, "step": 5046 }, { "epoch": 2.650735294117647, "grad_norm": 9.166300126067835, "learning_rate": 4.0856089517820495e-07, "loss": 0.2926194667816162, "step": 5047 }, { "epoch": 2.6512605042016806, "grad_norm": 7.437105049032209, "learning_rate": 4.073518318010389e-07, "loss": 0.5140249729156494, "step": 5048 }, { "epoch": 2.6517857142857144, "grad_norm": 10.374503355191733, "learning_rate": 4.0614448410538077e-07, "loss": 0.43929004669189453, "step": 5049 }, { "epoch": 2.652310924369748, "grad_norm": 11.917484956714038, "learning_rate": 4.0493885254226037e-07, "loss": 0.2109602987766266, "step": 5050 }, { "epoch": 2.6528361344537816, "grad_norm": 11.482493297152423, "learning_rate": 4.037349375620675e-07, "loss": 0.7243170738220215, "step": 5051 }, { "epoch": 2.653361344537815, "grad_norm": 14.272292392510508, "learning_rate": 4.0253273961455065e-07, "loss": 0.3412622809410095, "step": 5052 }, { "epoch": 2.653886554621849, "grad_norm": 9.552330371113477, "learning_rate": 4.01332259148815e-07, "loss": 0.6411492228507996, "step": 5053 }, { "epoch": 2.6544117647058822, "grad_norm": 8.022015111837447, "learning_rate": 4.0013349661332745e-07, "loss": 0.3606320321559906, "step": 5054 }, { "epoch": 2.654936974789916, "grad_norm": 19.069147101970064, "learning_rate": 3.989364524559108e-07, "loss": 0.7307056188583374, "step": 5055 }, { "epoch": 2.6554621848739495, "grad_norm": 21.058146930449862, "learning_rate": 3.977411271237458e-07, "loss": 0.5460872054100037, "step": 5056 }, { "epoch": 2.6559873949579833, "grad_norm": 8.522836469199902, "learning_rate": 3.965475210633718e-07, "loss": 0.38311946392059326, "step": 5057 }, { "epoch": 2.6565126050420167, "grad_norm": 9.746552559779182, "learning_rate": 3.953556347206861e-07, "loss": 0.49410921335220337, "step": 5058 }, { "epoch": 2.6570378151260505, "grad_norm": 14.03211872512491, "learning_rate": 3.9416546854094294e-07, "loss": 0.6534441113471985, "step": 5059 }, { "epoch": 2.657563025210084, "grad_norm": 11.261552326662507, "learning_rate": 3.9297702296875297e-07, "loss": 0.8130030035972595, "step": 5060 }, { "epoch": 2.6580882352941178, "grad_norm": 9.925699429537678, "learning_rate": 3.917902984480881e-07, "loss": 0.3044094443321228, "step": 5061 }, { "epoch": 2.658613445378151, "grad_norm": 8.940964821056612, "learning_rate": 3.9060529542227277e-07, "loss": 0.18114949762821198, "step": 5062 }, { "epoch": 2.659138655462185, "grad_norm": 7.752457253335019, "learning_rate": 3.894220143339905e-07, "loss": 0.1693650186061859, "step": 5063 }, { "epoch": 2.6596638655462184, "grad_norm": 17.460865938865027, "learning_rate": 3.882404556252811e-07, "loss": 0.5181553959846497, "step": 5064 }, { "epoch": 2.660189075630252, "grad_norm": 10.440624579430413, "learning_rate": 3.870606197375415e-07, "loss": 0.3412543833255768, "step": 5065 }, { "epoch": 2.6607142857142856, "grad_norm": 13.194396878437251, "learning_rate": 3.8588250711152295e-07, "loss": 0.4590100049972534, "step": 5066 }, { "epoch": 2.6612394957983194, "grad_norm": 14.42595042453583, "learning_rate": 3.8470611818733516e-07, "loss": 0.36411648988723755, "step": 5067 }, { "epoch": 2.661764705882353, "grad_norm": 12.726359330704001, "learning_rate": 3.8353145340444486e-07, "loss": 0.4519106149673462, "step": 5068 }, { "epoch": 2.6622899159663866, "grad_norm": 13.62414776448119, "learning_rate": 3.823585132016711e-07, "loss": 0.39295411109924316, "step": 5069 }, { "epoch": 2.66281512605042, "grad_norm": 11.306907392656061, "learning_rate": 3.8118729801719157e-07, "loss": 0.6925134658813477, "step": 5070 }, { "epoch": 2.663340336134454, "grad_norm": 9.726202812301112, "learning_rate": 3.800178082885386e-07, "loss": 0.4672687351703644, "step": 5071 }, { "epoch": 2.6638655462184873, "grad_norm": 10.549496219967628, "learning_rate": 3.7885004445259997e-07, "loss": 0.7954195141792297, "step": 5072 }, { "epoch": 2.664390756302521, "grad_norm": 18.812156421013242, "learning_rate": 3.776840069456189e-07, "loss": 0.791772723197937, "step": 5073 }, { "epoch": 2.6649159663865545, "grad_norm": 15.442799520498514, "learning_rate": 3.765196962031925e-07, "loss": 1.1211323738098145, "step": 5074 }, { "epoch": 2.6654411764705883, "grad_norm": 9.302269880984653, "learning_rate": 3.753571126602745e-07, "loss": 0.29546308517456055, "step": 5075 }, { "epoch": 2.6659663865546217, "grad_norm": 13.886958122168721, "learning_rate": 3.741962567511731e-07, "loss": 0.4888610541820526, "step": 5076 }, { "epoch": 2.6664915966386555, "grad_norm": 20.780045152002426, "learning_rate": 3.730371289095508e-07, "loss": 0.910915732383728, "step": 5077 }, { "epoch": 2.667016806722689, "grad_norm": 5.838502590757335, "learning_rate": 3.71879729568424e-07, "loss": 0.3355761170387268, "step": 5078 }, { "epoch": 2.6675420168067228, "grad_norm": 13.460261740815776, "learning_rate": 3.7072405916016353e-07, "loss": 0.3116706311702728, "step": 5079 }, { "epoch": 2.668067226890756, "grad_norm": 9.178354197184836, "learning_rate": 3.6957011811649567e-07, "loss": 0.5316522121429443, "step": 5080 }, { "epoch": 2.66859243697479, "grad_norm": 17.862679280425013, "learning_rate": 3.6841790686849897e-07, "loss": 1.001525640487671, "step": 5081 }, { "epoch": 2.6691176470588234, "grad_norm": 10.73360002274136, "learning_rate": 3.6726742584660637e-07, "loss": 0.7820755243301392, "step": 5082 }, { "epoch": 2.669642857142857, "grad_norm": 19.69197224149106, "learning_rate": 3.661186754806051e-07, "loss": 0.9408999681472778, "step": 5083 }, { "epoch": 2.6701680672268906, "grad_norm": 8.305112385944424, "learning_rate": 3.649716561996353e-07, "loss": 0.5280729532241821, "step": 5084 }, { "epoch": 2.6706932773109244, "grad_norm": 7.363834394608336, "learning_rate": 3.6382636843218967e-07, "loss": 0.25487959384918213, "step": 5085 }, { "epoch": 2.671218487394958, "grad_norm": 15.765167085439563, "learning_rate": 3.62682812606116e-07, "loss": 0.8331020474433899, "step": 5086 }, { "epoch": 2.6717436974789917, "grad_norm": 14.727366211498815, "learning_rate": 3.615409891486127e-07, "loss": 0.9125362634658813, "step": 5087 }, { "epoch": 2.6722689075630255, "grad_norm": 15.26226066798175, "learning_rate": 3.604008984862334e-07, "loss": 0.7915086150169373, "step": 5088 }, { "epoch": 2.672794117647059, "grad_norm": 6.484660751647232, "learning_rate": 3.592625410448813e-07, "loss": 0.11719128489494324, "step": 5089 }, { "epoch": 2.6733193277310923, "grad_norm": 8.572139401460749, "learning_rate": 3.581259172498169e-07, "loss": 0.5589736104011536, "step": 5090 }, { "epoch": 2.673844537815126, "grad_norm": 12.452725733062692, "learning_rate": 3.569910275256483e-07, "loss": 0.29968681931495667, "step": 5091 }, { "epoch": 2.67436974789916, "grad_norm": 12.172960299583249, "learning_rate": 3.5585787229633794e-07, "loss": 0.48796796798706055, "step": 5092 }, { "epoch": 2.6748949579831933, "grad_norm": 14.3603765992435, "learning_rate": 3.5472645198520064e-07, "loss": 0.9962418675422668, "step": 5093 }, { "epoch": 2.6754201680672267, "grad_norm": 13.686422078868164, "learning_rate": 3.535967670149021e-07, "loss": 0.49266868829727173, "step": 5094 }, { "epoch": 2.6759453781512605, "grad_norm": 9.403051743595773, "learning_rate": 3.5246881780746034e-07, "loss": 0.3322441577911377, "step": 5095 }, { "epoch": 2.6764705882352944, "grad_norm": 9.016010865890543, "learning_rate": 3.51342604784245e-07, "loss": 1.0054486989974976, "step": 5096 }, { "epoch": 2.6769957983193278, "grad_norm": 7.950977095117265, "learning_rate": 3.502181283659756e-07, "loss": 0.5073027610778809, "step": 5097 }, { "epoch": 2.677521008403361, "grad_norm": 12.763806412288341, "learning_rate": 3.490953889727261e-07, "loss": 0.40890246629714966, "step": 5098 }, { "epoch": 2.678046218487395, "grad_norm": 9.699736906986432, "learning_rate": 3.479743870239188e-07, "loss": 0.490925133228302, "step": 5099 }, { "epoch": 2.678571428571429, "grad_norm": 12.970574000287224, "learning_rate": 3.4685512293832823e-07, "loss": 0.38826829195022583, "step": 5100 }, { "epoch": 2.679096638655462, "grad_norm": 11.456556383710724, "learning_rate": 3.4573759713407927e-07, "loss": 0.34248778223991394, "step": 5101 }, { "epoch": 2.6796218487394956, "grad_norm": 12.123584688114516, "learning_rate": 3.4462181002864694e-07, "loss": 0.751177966594696, "step": 5102 }, { "epoch": 2.6801470588235294, "grad_norm": 7.9298450949287265, "learning_rate": 3.435077620388577e-07, "loss": 0.2769845724105835, "step": 5103 }, { "epoch": 2.6806722689075633, "grad_norm": 12.652891528358486, "learning_rate": 3.423954535808871e-07, "loss": 0.32310184836387634, "step": 5104 }, { "epoch": 2.6811974789915967, "grad_norm": 7.673829371355647, "learning_rate": 3.4128488507026327e-07, "loss": 0.36670172214508057, "step": 5105 }, { "epoch": 2.68172268907563, "grad_norm": 9.227254053382925, "learning_rate": 3.4017605692186207e-07, "loss": 0.5683304667472839, "step": 5106 }, { "epoch": 2.682247899159664, "grad_norm": 10.441834545797024, "learning_rate": 3.390689695499089e-07, "loss": 0.46113595366477966, "step": 5107 }, { "epoch": 2.6827731092436977, "grad_norm": 6.066434769965064, "learning_rate": 3.379636233679812e-07, "loss": 0.10238748788833618, "step": 5108 }, { "epoch": 2.683298319327731, "grad_norm": 11.27713465418464, "learning_rate": 3.3686001878900365e-07, "loss": 0.33836841583251953, "step": 5109 }, { "epoch": 2.6838235294117645, "grad_norm": 11.489752386889222, "learning_rate": 3.3575815622525096e-07, "loss": 0.7936794757843018, "step": 5110 }, { "epoch": 2.6843487394957983, "grad_norm": 7.366511831349445, "learning_rate": 3.3465803608834837e-07, "loss": 0.120862677693367, "step": 5111 }, { "epoch": 2.684873949579832, "grad_norm": 9.496802476563838, "learning_rate": 3.335596587892681e-07, "loss": 0.5731973648071289, "step": 5112 }, { "epoch": 2.6853991596638656, "grad_norm": 12.501951837295532, "learning_rate": 3.324630247383337e-07, "loss": 0.7040807008743286, "step": 5113 }, { "epoch": 2.685924369747899, "grad_norm": 11.644252178637693, "learning_rate": 3.313681343452152e-07, "loss": 1.183510661125183, "step": 5114 }, { "epoch": 2.6864495798319328, "grad_norm": 13.946909485798127, "learning_rate": 3.3027498801893266e-07, "loss": 0.7500033974647522, "step": 5115 }, { "epoch": 2.6869747899159666, "grad_norm": 9.296163618879163, "learning_rate": 3.2918358616785384e-07, "loss": 0.3383568525314331, "step": 5116 }, { "epoch": 2.6875, "grad_norm": 10.415550548151717, "learning_rate": 3.2809392919969483e-07, "loss": 0.7802726030349731, "step": 5117 }, { "epoch": 2.6880252100840334, "grad_norm": 14.71984912515302, "learning_rate": 3.2700601752152117e-07, "loss": 0.5044372081756592, "step": 5118 }, { "epoch": 2.6885504201680672, "grad_norm": 9.397877479800641, "learning_rate": 3.2591985153974383e-07, "loss": 0.789323091506958, "step": 5119 }, { "epoch": 2.689075630252101, "grad_norm": 12.374559013291245, "learning_rate": 3.248354316601254e-07, "loss": 0.3527710735797882, "step": 5120 }, { "epoch": 2.6896008403361344, "grad_norm": 11.935226966945294, "learning_rate": 3.2375275828777253e-07, "loss": 0.5447399020195007, "step": 5121 }, { "epoch": 2.690126050420168, "grad_norm": 15.418321811650998, "learning_rate": 3.226718318271416e-07, "loss": 0.5229968428611755, "step": 5122 }, { "epoch": 2.6906512605042017, "grad_norm": 11.611984987052244, "learning_rate": 3.215926526820351e-07, "loss": 0.27938351035118103, "step": 5123 }, { "epoch": 2.6911764705882355, "grad_norm": 10.234290346976508, "learning_rate": 3.20515221255604e-07, "loss": 0.45139533281326294, "step": 5124 }, { "epoch": 2.691701680672269, "grad_norm": 8.946855070567134, "learning_rate": 3.194395379503451e-07, "loss": 0.18062157928943634, "step": 5125 }, { "epoch": 2.6922268907563023, "grad_norm": 8.171326689808147, "learning_rate": 3.1836560316810263e-07, "loss": 0.4740408658981323, "step": 5126 }, { "epoch": 2.692752100840336, "grad_norm": 11.085258568472408, "learning_rate": 3.172934173100689e-07, "loss": 0.4420720934867859, "step": 5127 }, { "epoch": 2.69327731092437, "grad_norm": 11.835224430789292, "learning_rate": 3.162229807767808e-07, "loss": 0.45101088285446167, "step": 5128 }, { "epoch": 2.6938025210084033, "grad_norm": 11.639413644216262, "learning_rate": 3.151542939681235e-07, "loss": 0.17761468887329102, "step": 5129 }, { "epoch": 2.6943277310924367, "grad_norm": 17.31427173957996, "learning_rate": 3.140873572833275e-07, "loss": 0.713176965713501, "step": 5130 }, { "epoch": 2.6948529411764706, "grad_norm": 11.838748608092535, "learning_rate": 3.1302217112096855e-07, "loss": 0.6659767031669617, "step": 5131 }, { "epoch": 2.6953781512605044, "grad_norm": 14.292981371162789, "learning_rate": 3.119587358789711e-07, "loss": 0.6102906465530396, "step": 5132 }, { "epoch": 2.695903361344538, "grad_norm": 7.170690939536522, "learning_rate": 3.108970519546034e-07, "loss": 0.4029874801635742, "step": 5133 }, { "epoch": 2.696428571428571, "grad_norm": 11.454931920897556, "learning_rate": 3.098371197444794e-07, "loss": 0.3053671717643738, "step": 5134 }, { "epoch": 2.696953781512605, "grad_norm": 11.947430182963497, "learning_rate": 3.0877893964456116e-07, "loss": 0.3585653603076935, "step": 5135 }, { "epoch": 2.697478991596639, "grad_norm": 7.936443136705915, "learning_rate": 3.0772251205015315e-07, "loss": 0.4208843410015106, "step": 5136 }, { "epoch": 2.6980042016806722, "grad_norm": 7.205033921601751, "learning_rate": 3.066678373559062e-07, "loss": 0.32238075137138367, "step": 5137 }, { "epoch": 2.6985294117647056, "grad_norm": 13.779233308588191, "learning_rate": 3.0561491595581695e-07, "loss": 0.7137563228607178, "step": 5138 }, { "epoch": 2.6990546218487395, "grad_norm": 12.03154113537296, "learning_rate": 3.0456374824322674e-07, "loss": 0.41850051283836365, "step": 5139 }, { "epoch": 2.6995798319327733, "grad_norm": 16.939026850849142, "learning_rate": 3.03514334610821e-07, "loss": 0.6328059434890747, "step": 5140 }, { "epoch": 2.7001050420168067, "grad_norm": 14.51801961228705, "learning_rate": 3.0246667545063057e-07, "loss": 0.2627220153808594, "step": 5141 }, { "epoch": 2.70063025210084, "grad_norm": 11.531479972898897, "learning_rate": 3.014207711540318e-07, "loss": 0.8954662680625916, "step": 5142 }, { "epoch": 2.701155462184874, "grad_norm": 20.985896647812933, "learning_rate": 3.0037662211174437e-07, "loss": 0.5788215398788452, "step": 5143 }, { "epoch": 2.7016806722689077, "grad_norm": 7.5724057917125585, "learning_rate": 2.993342287138312e-07, "loss": 0.6538835167884827, "step": 5144 }, { "epoch": 2.702205882352941, "grad_norm": 8.327473652430706, "learning_rate": 2.9829359134970206e-07, "loss": 0.49965575337409973, "step": 5145 }, { "epoch": 2.7027310924369745, "grad_norm": 7.523605616397045, "learning_rate": 2.972547104081081e-07, "loss": 0.33146965503692627, "step": 5146 }, { "epoch": 2.7032563025210083, "grad_norm": 9.267993831346974, "learning_rate": 2.962175862771455e-07, "loss": 0.4421359896659851, "step": 5147 }, { "epoch": 2.703781512605042, "grad_norm": 8.956967198864293, "learning_rate": 2.951822193442544e-07, "loss": 0.29288220405578613, "step": 5148 }, { "epoch": 2.7043067226890756, "grad_norm": 14.8026688753217, "learning_rate": 2.9414860999621764e-07, "loss": 0.9873912930488586, "step": 5149 }, { "epoch": 2.7048319327731094, "grad_norm": 11.676850508780802, "learning_rate": 2.9311675861916246e-07, "loss": 0.43603038787841797, "step": 5150 }, { "epoch": 2.705357142857143, "grad_norm": 6.7432563989320355, "learning_rate": 2.920866655985588e-07, "loss": 0.12715210020542145, "step": 5151 }, { "epoch": 2.7058823529411766, "grad_norm": 9.044957894735301, "learning_rate": 2.9105833131922056e-07, "loss": 0.2916662096977234, "step": 5152 }, { "epoch": 2.70640756302521, "grad_norm": 14.286187786290421, "learning_rate": 2.9003175616530264e-07, "loss": 0.8403812646865845, "step": 5153 }, { "epoch": 2.706932773109244, "grad_norm": 13.314369909470404, "learning_rate": 2.8900694052030553e-07, "loss": 0.362918883562088, "step": 5154 }, { "epoch": 2.7074579831932772, "grad_norm": 10.088509037070159, "learning_rate": 2.879838847670696e-07, "loss": 0.2886349856853485, "step": 5155 }, { "epoch": 2.707983193277311, "grad_norm": 12.567570112336634, "learning_rate": 2.8696258928777975e-07, "loss": 0.446466863155365, "step": 5156 }, { "epoch": 2.7085084033613445, "grad_norm": 6.717264869350844, "learning_rate": 2.8594305446396245e-07, "loss": 0.21716740727424622, "step": 5157 }, { "epoch": 2.7090336134453783, "grad_norm": 9.794897398497712, "learning_rate": 2.8492528067648575e-07, "loss": 0.8401632308959961, "step": 5158 }, { "epoch": 2.7095588235294117, "grad_norm": 9.37508491060464, "learning_rate": 2.839092683055622e-07, "loss": 0.6506957411766052, "step": 5159 }, { "epoch": 2.7100840336134455, "grad_norm": 16.041922721308104, "learning_rate": 2.828950177307443e-07, "loss": 0.6913888454437256, "step": 5160 }, { "epoch": 2.710609243697479, "grad_norm": 8.810022606786823, "learning_rate": 2.818825293309274e-07, "loss": 0.34516096115112305, "step": 5161 }, { "epoch": 2.7111344537815127, "grad_norm": 10.626186409807241, "learning_rate": 2.808718034843472e-07, "loss": 0.4685274362564087, "step": 5162 }, { "epoch": 2.711659663865546, "grad_norm": 17.02547421741878, "learning_rate": 2.798628405685827e-07, "loss": 0.3441670536994934, "step": 5163 }, { "epoch": 2.71218487394958, "grad_norm": 8.73713214975575, "learning_rate": 2.7885564096055305e-07, "loss": 0.2538045048713684, "step": 5164 }, { "epoch": 2.7127100840336134, "grad_norm": 15.018737948467326, "learning_rate": 2.7785020503651783e-07, "loss": 0.5275735855102539, "step": 5165 }, { "epoch": 2.713235294117647, "grad_norm": 7.8716962330650615, "learning_rate": 2.7684653317208154e-07, "loss": 0.5633281469345093, "step": 5166 }, { "epoch": 2.7137605042016806, "grad_norm": 12.214703795854728, "learning_rate": 2.7584462574218595e-07, "loss": 0.6210681200027466, "step": 5167 }, { "epoch": 2.7142857142857144, "grad_norm": 11.077590971720717, "learning_rate": 2.74844483121115e-07, "loss": 0.45682603120803833, "step": 5168 }, { "epoch": 2.714810924369748, "grad_norm": 7.188246699120523, "learning_rate": 2.7384610568249313e-07, "loss": 0.24761930108070374, "step": 5169 }, { "epoch": 2.7153361344537816, "grad_norm": 13.962531163836607, "learning_rate": 2.7284949379928535e-07, "loss": 0.4918213486671448, "step": 5170 }, { "epoch": 2.715861344537815, "grad_norm": 9.93389399552238, "learning_rate": 2.7185464784379777e-07, "loss": 0.680639922618866, "step": 5171 }, { "epoch": 2.716386554621849, "grad_norm": 15.257188440144379, "learning_rate": 2.708615681876747e-07, "loss": 0.588678777217865, "step": 5172 }, { "epoch": 2.7169117647058822, "grad_norm": 9.996652705020658, "learning_rate": 2.698702552019045e-07, "loss": 0.16154924035072327, "step": 5173 }, { "epoch": 2.717436974789916, "grad_norm": 15.809024926042696, "learning_rate": 2.6888070925681244e-07, "loss": 0.5694648027420044, "step": 5174 }, { "epoch": 2.7179621848739495, "grad_norm": 12.544449454952185, "learning_rate": 2.67892930722064e-07, "loss": 0.6989417672157288, "step": 5175 }, { "epoch": 2.7184873949579833, "grad_norm": 9.30951304822431, "learning_rate": 2.669069199666652e-07, "loss": 0.5404279828071594, "step": 5176 }, { "epoch": 2.7190126050420167, "grad_norm": 11.562908665845434, "learning_rate": 2.659226773589607e-07, "loss": 0.9708887338638306, "step": 5177 }, { "epoch": 2.7195378151260505, "grad_norm": 11.028694871376349, "learning_rate": 2.649402032666365e-07, "loss": 0.5706590414047241, "step": 5178 }, { "epoch": 2.720063025210084, "grad_norm": 16.18872235961291, "learning_rate": 2.639594980567162e-07, "loss": 0.9086883664131165, "step": 5179 }, { "epoch": 2.7205882352941178, "grad_norm": 10.588572533047328, "learning_rate": 2.6298056209556164e-07, "loss": 1.261301040649414, "step": 5180 }, { "epoch": 2.721113445378151, "grad_norm": 8.784072350037729, "learning_rate": 2.620033957488777e-07, "loss": 0.3578791618347168, "step": 5181 }, { "epoch": 2.721638655462185, "grad_norm": 9.747894425934968, "learning_rate": 2.6102799938170444e-07, "loss": 0.265135258436203, "step": 5182 }, { "epoch": 2.7221638655462184, "grad_norm": 8.892857907929, "learning_rate": 2.6005437335842155e-07, "loss": 0.40978115797042847, "step": 5183 }, { "epoch": 2.722689075630252, "grad_norm": 12.999338000428267, "learning_rate": 2.5908251804274863e-07, "loss": 0.6683812737464905, "step": 5184 }, { "epoch": 2.7232142857142856, "grad_norm": 11.186695108480528, "learning_rate": 2.581124337977425e-07, "loss": 0.31849461793899536, "step": 5185 }, { "epoch": 2.7237394957983194, "grad_norm": 12.15900271550104, "learning_rate": 2.5714412098579886e-07, "loss": 0.43230393528938293, "step": 5186 }, { "epoch": 2.724264705882353, "grad_norm": 11.42415083456005, "learning_rate": 2.5617757996865053e-07, "loss": 0.7816082239151001, "step": 5187 }, { "epoch": 2.7247899159663866, "grad_norm": 11.839165575978413, "learning_rate": 2.552128111073715e-07, "loss": 0.458351731300354, "step": 5188 }, { "epoch": 2.72531512605042, "grad_norm": 17.6454826719477, "learning_rate": 2.542498147623701e-07, "loss": 0.9202781319618225, "step": 5189 }, { "epoch": 2.725840336134454, "grad_norm": 14.666203901231317, "learning_rate": 2.532885912933952e-07, "loss": 0.5745277404785156, "step": 5190 }, { "epoch": 2.7263655462184873, "grad_norm": 8.854490892871947, "learning_rate": 2.523291410595319e-07, "loss": 0.5427813529968262, "step": 5191 }, { "epoch": 2.726890756302521, "grad_norm": 12.861767210044425, "learning_rate": 2.5137146441920277e-07, "loss": 0.30206120014190674, "step": 5192 }, { "epoch": 2.7274159663865545, "grad_norm": 9.46209596488583, "learning_rate": 2.50415561730169e-07, "loss": 0.5628501176834106, "step": 5193 }, { "epoch": 2.7279411764705883, "grad_norm": 7.937418909307046, "learning_rate": 2.4946143334952764e-07, "loss": 0.3499003052711487, "step": 5194 }, { "epoch": 2.7284663865546217, "grad_norm": 11.452914711023155, "learning_rate": 2.485090796337142e-07, "loss": 0.5341336727142334, "step": 5195 }, { "epoch": 2.7289915966386555, "grad_norm": 11.04327253903026, "learning_rate": 2.4755850093850063e-07, "loss": 0.5487924218177795, "step": 5196 }, { "epoch": 2.729516806722689, "grad_norm": 8.410998913839864, "learning_rate": 2.4660969761899576e-07, "loss": 0.46378302574157715, "step": 5197 }, { "epoch": 2.7300420168067228, "grad_norm": 13.564689895319297, "learning_rate": 2.456626700296455e-07, "loss": 0.39550554752349854, "step": 5198 }, { "epoch": 2.730567226890756, "grad_norm": 11.341228251483367, "learning_rate": 2.447174185242324e-07, "loss": 0.2185949683189392, "step": 5199 }, { "epoch": 2.73109243697479, "grad_norm": 13.165934898963018, "learning_rate": 2.437739434558745e-07, "loss": 0.5988172292709351, "step": 5200 }, { "epoch": 2.7316176470588234, "grad_norm": 11.223174592460525, "learning_rate": 2.428322451770276e-07, "loss": 0.35855352878570557, "step": 5201 }, { "epoch": 2.732142857142857, "grad_norm": 14.238648883274028, "learning_rate": 2.418923240394816e-07, "loss": 0.3877594470977783, "step": 5202 }, { "epoch": 2.7326680672268906, "grad_norm": 9.531355162077892, "learning_rate": 2.409541803943666e-07, "loss": 0.29127657413482666, "step": 5203 }, { "epoch": 2.7331932773109244, "grad_norm": 8.905301611343187, "learning_rate": 2.40017814592145e-07, "loss": 0.22414088249206543, "step": 5204 }, { "epoch": 2.733718487394958, "grad_norm": 9.08993426406396, "learning_rate": 2.3908322698261597e-07, "loss": 0.34578025341033936, "step": 5205 }, { "epoch": 2.7342436974789917, "grad_norm": 9.60290329750277, "learning_rate": 2.3815041791491467e-07, "loss": 0.9344683885574341, "step": 5206 }, { "epoch": 2.7347689075630255, "grad_norm": 8.536931437238191, "learning_rate": 2.3721938773751175e-07, "loss": 0.7497357130050659, "step": 5207 }, { "epoch": 2.735294117647059, "grad_norm": 12.09791752641407, "learning_rate": 2.3629013679821343e-07, "loss": 0.244570791721344, "step": 5208 }, { "epoch": 2.7358193277310923, "grad_norm": 11.471601434314076, "learning_rate": 2.3536266544416043e-07, "loss": 0.31481319665908813, "step": 5209 }, { "epoch": 2.736344537815126, "grad_norm": 14.472248151194497, "learning_rate": 2.3443697402183107e-07, "loss": 0.3894531726837158, "step": 5210 }, { "epoch": 2.73686974789916, "grad_norm": 7.711367696201472, "learning_rate": 2.3351306287703657e-07, "loss": 0.3622080981731415, "step": 5211 }, { "epoch": 2.7373949579831933, "grad_norm": 13.570556777124034, "learning_rate": 2.3259093235492246e-07, "loss": 0.6158708333969116, "step": 5212 }, { "epoch": 2.7379201680672267, "grad_norm": 11.418961913377924, "learning_rate": 2.3167058279997156e-07, "loss": 0.2521127462387085, "step": 5213 }, { "epoch": 2.7384453781512605, "grad_norm": 9.380688231523322, "learning_rate": 2.3075201455599939e-07, "loss": 0.1848069131374359, "step": 5214 }, { "epoch": 2.7389705882352944, "grad_norm": 15.619700240407587, "learning_rate": 2.2983522796615698e-07, "loss": 1.0577616691589355, "step": 5215 }, { "epoch": 2.7394957983193278, "grad_norm": 15.023539675588399, "learning_rate": 2.2892022337292929e-07, "loss": 0.4063257575035095, "step": 5216 }, { "epoch": 2.740021008403361, "grad_norm": 9.742470715527096, "learning_rate": 2.2800700111813456e-07, "loss": 0.38715842366218567, "step": 5217 }, { "epoch": 2.740546218487395, "grad_norm": 8.154275100350905, "learning_rate": 2.2709556154292878e-07, "loss": 0.29554781317710876, "step": 5218 }, { "epoch": 2.741071428571429, "grad_norm": 15.388981485681432, "learning_rate": 2.2618590498779847e-07, "loss": 0.3714393973350525, "step": 5219 }, { "epoch": 2.741596638655462, "grad_norm": 16.877988790395055, "learning_rate": 2.2527803179256512e-07, "loss": 0.4998897314071655, "step": 5220 }, { "epoch": 2.7421218487394956, "grad_norm": 7.74639740984657, "learning_rate": 2.2437194229638415e-07, "loss": 0.40450119972229004, "step": 5221 }, { "epoch": 2.7426470588235294, "grad_norm": 10.529193464334151, "learning_rate": 2.234676368377442e-07, "loss": 0.9691871404647827, "step": 5222 }, { "epoch": 2.7431722689075633, "grad_norm": 7.858874563657268, "learning_rate": 2.2256511575446837e-07, "loss": 0.456901490688324, "step": 5223 }, { "epoch": 2.7436974789915967, "grad_norm": 12.315463276930483, "learning_rate": 2.21664379383712e-07, "loss": 1.2641358375549316, "step": 5224 }, { "epoch": 2.74422268907563, "grad_norm": 14.952039409073935, "learning_rate": 2.2076542806196588e-07, "loss": 0.8916432857513428, "step": 5225 }, { "epoch": 2.744747899159664, "grad_norm": 6.647874374561462, "learning_rate": 2.1986826212505085e-07, "loss": 0.29105767607688904, "step": 5226 }, { "epoch": 2.7452731092436977, "grad_norm": 9.494486770843666, "learning_rate": 2.1897288190812271e-07, "loss": 0.5085248947143555, "step": 5227 }, { "epoch": 2.745798319327731, "grad_norm": 12.758002239363936, "learning_rate": 2.1807928774567e-07, "loss": 0.5324745178222656, "step": 5228 }, { "epoch": 2.7463235294117645, "grad_norm": 12.831924534069625, "learning_rate": 2.17187479971514e-07, "loss": 0.411658376455307, "step": 5229 }, { "epoch": 2.7468487394957983, "grad_norm": 10.316201456070786, "learning_rate": 2.1629745891880826e-07, "loss": 0.4209370017051697, "step": 5230 }, { "epoch": 2.747373949579832, "grad_norm": 8.153955420619475, "learning_rate": 2.1540922492003902e-07, "loss": 0.5144951939582825, "step": 5231 }, { "epoch": 2.7478991596638656, "grad_norm": 9.69868090658562, "learning_rate": 2.1452277830702473e-07, "loss": 0.3466729521751404, "step": 5232 }, { "epoch": 2.748424369747899, "grad_norm": 13.490544809205158, "learning_rate": 2.136381194109166e-07, "loss": 0.26459285616874695, "step": 5233 }, { "epoch": 2.7489495798319328, "grad_norm": 12.72382832217057, "learning_rate": 2.1275524856219864e-07, "loss": 1.7140297889709473, "step": 5234 }, { "epoch": 2.7494747899159666, "grad_norm": 20.1977683566574, "learning_rate": 2.1187416609068533e-07, "loss": 0.7161862254142761, "step": 5235 }, { "epoch": 2.75, "grad_norm": 14.163750003239532, "learning_rate": 2.1099487232552395e-07, "loss": 0.5943667888641357, "step": 5236 }, { "epoch": 2.7505252100840334, "grad_norm": 10.008631260743623, "learning_rate": 2.1011736759519286e-07, "loss": 1.0886059999465942, "step": 5237 }, { "epoch": 2.7510504201680672, "grad_norm": 13.046513895398908, "learning_rate": 2.0924165222750315e-07, "loss": 0.8634684681892395, "step": 5238 }, { "epoch": 2.751575630252101, "grad_norm": 12.602009747509273, "learning_rate": 2.0836772654959647e-07, "loss": 0.5296191573143005, "step": 5239 }, { "epoch": 2.7521008403361344, "grad_norm": 11.488068286472648, "learning_rate": 2.0749559088794725e-07, "loss": 2.070067882537842, "step": 5240 }, { "epoch": 2.752626050420168, "grad_norm": 20.517043931977664, "learning_rate": 2.0662524556835982e-07, "loss": 1.506117820739746, "step": 5241 }, { "epoch": 2.7531512605042017, "grad_norm": 14.568706344718933, "learning_rate": 2.0575669091597028e-07, "loss": 0.39455515146255493, "step": 5242 }, { "epoch": 2.7536764705882355, "grad_norm": 10.191662703901843, "learning_rate": 2.048899272552457e-07, "loss": 0.36395686864852905, "step": 5243 }, { "epoch": 2.754201680672269, "grad_norm": 12.527550419845172, "learning_rate": 2.0402495490998374e-07, "loss": 0.5779542922973633, "step": 5244 }, { "epoch": 2.7547268907563023, "grad_norm": 9.700534359568579, "learning_rate": 2.0316177420331375e-07, "loss": 0.7974497675895691, "step": 5245 }, { "epoch": 2.755252100840336, "grad_norm": 13.081380083043447, "learning_rate": 2.0230038545769436e-07, "loss": 0.44117051362991333, "step": 5246 }, { "epoch": 2.75577731092437, "grad_norm": 12.716563259448858, "learning_rate": 2.0144078899491715e-07, "loss": 0.33425411581993103, "step": 5247 }, { "epoch": 2.7563025210084033, "grad_norm": 12.11437755750207, "learning_rate": 2.0058298513610185e-07, "loss": 0.271504670381546, "step": 5248 }, { "epoch": 2.7568277310924367, "grad_norm": 9.949676273396376, "learning_rate": 1.997269742016994e-07, "loss": 0.4813274145126343, "step": 5249 }, { "epoch": 2.7573529411764706, "grad_norm": 7.966684951065601, "learning_rate": 1.9887275651149064e-07, "loss": 0.5327956676483154, "step": 5250 }, { "epoch": 2.7578781512605044, "grad_norm": 7.366833698245474, "learning_rate": 1.980203323845875e-07, "loss": 0.28279638290405273, "step": 5251 }, { "epoch": 2.758403361344538, "grad_norm": 10.194026938818547, "learning_rate": 1.9716970213943032e-07, "loss": 0.3261258006095886, "step": 5252 }, { "epoch": 2.758928571428571, "grad_norm": 7.540614511619889, "learning_rate": 1.9632086609379041e-07, "loss": 0.25825923681259155, "step": 5253 }, { "epoch": 2.759453781512605, "grad_norm": 10.079179278488615, "learning_rate": 1.954738245647686e-07, "loss": 0.3405923843383789, "step": 5254 }, { "epoch": 2.759978991596639, "grad_norm": 7.474917382793523, "learning_rate": 1.9462857786879562e-07, "loss": 0.5673338770866394, "step": 5255 }, { "epoch": 2.7605042016806722, "grad_norm": 12.08479845850555, "learning_rate": 1.9378512632163116e-07, "loss": 0.33351564407348633, "step": 5256 }, { "epoch": 2.7610294117647056, "grad_norm": 9.985484581599241, "learning_rate": 1.929434702383648e-07, "loss": 0.5576674342155457, "step": 5257 }, { "epoch": 2.7615546218487395, "grad_norm": 13.262001763752773, "learning_rate": 1.9210360993341447e-07, "loss": 0.3428288698196411, "step": 5258 }, { "epoch": 2.7620798319327733, "grad_norm": 8.538119716704557, "learning_rate": 1.9126554572052813e-07, "loss": 0.3532198667526245, "step": 5259 }, { "epoch": 2.7626050420168067, "grad_norm": 10.353891713475264, "learning_rate": 1.9042927791278255e-07, "loss": 1.021399736404419, "step": 5260 }, { "epoch": 2.76313025210084, "grad_norm": 9.682995832088631, "learning_rate": 1.895948068225828e-07, "loss": 0.21227969229221344, "step": 5261 }, { "epoch": 2.763655462184874, "grad_norm": 14.381793813334657, "learning_rate": 1.88762132761664e-07, "loss": 0.46639224886894226, "step": 5262 }, { "epoch": 2.7641806722689077, "grad_norm": 7.763797401316596, "learning_rate": 1.879312560410901e-07, "loss": 0.4608529806137085, "step": 5263 }, { "epoch": 2.764705882352941, "grad_norm": 14.303230361485726, "learning_rate": 1.871021769712511e-07, "loss": 0.9698854684829712, "step": 5264 }, { "epoch": 2.7652310924369745, "grad_norm": 14.76113395344127, "learning_rate": 1.862748958618682e-07, "loss": 0.31873244047164917, "step": 5265 }, { "epoch": 2.7657563025210083, "grad_norm": 15.832060938925425, "learning_rate": 1.8544941302198916e-07, "loss": 0.2968553304672241, "step": 5266 }, { "epoch": 2.766281512605042, "grad_norm": 11.696503201110188, "learning_rate": 1.8462572875999117e-07, "loss": 0.4104006886482239, "step": 5267 }, { "epoch": 2.7668067226890756, "grad_norm": 14.507134113258228, "learning_rate": 1.8380384338357925e-07, "loss": 0.39367198944091797, "step": 5268 }, { "epoch": 2.7673319327731094, "grad_norm": 6.073440345244819, "learning_rate": 1.8298375719978501e-07, "loss": 0.15749509632587433, "step": 5269 }, { "epoch": 2.767857142857143, "grad_norm": 10.618812305534561, "learning_rate": 1.8216547051497057e-07, "loss": 0.7008739709854126, "step": 5270 }, { "epoch": 2.7683823529411766, "grad_norm": 20.67596785777784, "learning_rate": 1.8134898363482367e-07, "loss": 0.7393758296966553, "step": 5271 }, { "epoch": 2.76890756302521, "grad_norm": 12.111080953413962, "learning_rate": 1.805342968643603e-07, "loss": 0.5815110802650452, "step": 5272 }, { "epoch": 2.769432773109244, "grad_norm": 8.515310981885063, "learning_rate": 1.797214105079248e-07, "loss": 0.23644420504570007, "step": 5273 }, { "epoch": 2.7699579831932772, "grad_norm": 10.33491556272188, "learning_rate": 1.7891032486918703e-07, "loss": 0.50987708568573, "step": 5274 }, { "epoch": 2.770483193277311, "grad_norm": 9.630963979161228, "learning_rate": 1.7810104025114572e-07, "loss": 0.23692744970321655, "step": 5275 }, { "epoch": 2.7710084033613445, "grad_norm": 8.538582729561833, "learning_rate": 1.772935569561257e-07, "loss": 0.8385730981826782, "step": 5276 }, { "epoch": 2.7715336134453783, "grad_norm": 12.083804730648628, "learning_rate": 1.7648787528578127e-07, "loss": 0.5103262662887573, "step": 5277 }, { "epoch": 2.7720588235294117, "grad_norm": 21.93822699151687, "learning_rate": 1.7568399554109106e-07, "loss": 0.6717256903648376, "step": 5278 }, { "epoch": 2.7725840336134455, "grad_norm": 8.93335327779304, "learning_rate": 1.74881918022361e-07, "loss": 0.5249470472335815, "step": 5279 }, { "epoch": 2.773109243697479, "grad_norm": 7.6321937027675055, "learning_rate": 1.7408164302922471e-07, "loss": 0.2761799693107605, "step": 5280 }, { "epoch": 2.7736344537815127, "grad_norm": 5.1413373017231345, "learning_rate": 1.732831708606425e-07, "loss": 0.13875234127044678, "step": 5281 }, { "epoch": 2.774159663865546, "grad_norm": 8.847203043976213, "learning_rate": 1.7248650181489913e-07, "loss": 0.3915010392665863, "step": 5282 }, { "epoch": 2.77468487394958, "grad_norm": 11.03708182958953, "learning_rate": 1.7169163618960817e-07, "loss": 0.362044095993042, "step": 5283 }, { "epoch": 2.7752100840336134, "grad_norm": 13.893155821128941, "learning_rate": 1.708985742817093e-07, "loss": 0.7443951368331909, "step": 5284 }, { "epoch": 2.775735294117647, "grad_norm": 13.37558564170199, "learning_rate": 1.7010731638746668e-07, "loss": 0.5511894226074219, "step": 5285 }, { "epoch": 2.7762605042016806, "grad_norm": 10.838040601191585, "learning_rate": 1.6931786280247164e-07, "loss": 0.5490012168884277, "step": 5286 }, { "epoch": 2.7767857142857144, "grad_norm": 11.709256979236866, "learning_rate": 1.6853021382164215e-07, "loss": 0.7820290327072144, "step": 5287 }, { "epoch": 2.777310924369748, "grad_norm": 14.16090941397965, "learning_rate": 1.677443697392206e-07, "loss": 0.37508317828178406, "step": 5288 }, { "epoch": 2.7778361344537816, "grad_norm": 13.287352811773326, "learning_rate": 1.669603308487755e-07, "loss": 0.3444094657897949, "step": 5289 }, { "epoch": 2.778361344537815, "grad_norm": 10.214412289717455, "learning_rate": 1.6617809744320202e-07, "loss": 0.529754638671875, "step": 5290 }, { "epoch": 2.778886554621849, "grad_norm": 10.049130527937852, "learning_rate": 1.6539766981471861e-07, "loss": 0.351434588432312, "step": 5291 }, { "epoch": 2.7794117647058822, "grad_norm": 8.273984362625566, "learning_rate": 1.6461904825487263e-07, "loss": 0.2401900738477707, "step": 5292 }, { "epoch": 2.779936974789916, "grad_norm": 13.618541858300375, "learning_rate": 1.6384223305453417e-07, "loss": 0.3075028657913208, "step": 5293 }, { "epoch": 2.7804621848739495, "grad_norm": 13.146301169739644, "learning_rate": 1.6306722450389834e-07, "loss": 0.38644635677337646, "step": 5294 }, { "epoch": 2.7809873949579833, "grad_norm": 54.93450039098761, "learning_rate": 1.6229402289248686e-07, "loss": 0.41556641459465027, "step": 5295 }, { "epoch": 2.7815126050420167, "grad_norm": 11.898619469462943, "learning_rate": 1.615226285091448e-07, "loss": 0.48220962285995483, "step": 5296 }, { "epoch": 2.7820378151260505, "grad_norm": 8.805826705739822, "learning_rate": 1.6075304164204385e-07, "loss": 0.38020676374435425, "step": 5297 }, { "epoch": 2.782563025210084, "grad_norm": 16.819100127416643, "learning_rate": 1.5998526257867852e-07, "loss": 0.4429679214954376, "step": 5298 }, { "epoch": 2.7830882352941178, "grad_norm": 9.930819894253196, "learning_rate": 1.5921929160587045e-07, "loss": 0.24621260166168213, "step": 5299 }, { "epoch": 2.783613445378151, "grad_norm": 8.793978379463555, "learning_rate": 1.5845512900976355e-07, "loss": 0.21653716266155243, "step": 5300 }, { "epoch": 2.784138655462185, "grad_norm": 9.093765452154368, "learning_rate": 1.5769277507582725e-07, "loss": 0.2279520481824875, "step": 5301 }, { "epoch": 2.7846638655462184, "grad_norm": 8.319356182635838, "learning_rate": 1.5693223008885482e-07, "loss": 0.3648935854434967, "step": 5302 }, { "epoch": 2.785189075630252, "grad_norm": 12.361521188510322, "learning_rate": 1.5617349433296402e-07, "loss": 0.456037312746048, "step": 5303 }, { "epoch": 2.7857142857142856, "grad_norm": 8.854668678436887, "learning_rate": 1.55416568091597e-07, "loss": 0.6153507232666016, "step": 5304 }, { "epoch": 2.7862394957983194, "grad_norm": 13.793754105887354, "learning_rate": 1.5466145164751977e-07, "loss": 0.6217098832130432, "step": 5305 }, { "epoch": 2.786764705882353, "grad_norm": 8.201286776594328, "learning_rate": 1.539081452828217e-07, "loss": 0.24277333915233612, "step": 5306 }, { "epoch": 2.7872899159663866, "grad_norm": 9.010046431737942, "learning_rate": 1.531566492789166e-07, "loss": 0.28896012902259827, "step": 5307 }, { "epoch": 2.78781512605042, "grad_norm": 10.78762507105897, "learning_rate": 1.5240696391654208e-07, "loss": 0.3023233711719513, "step": 5308 }, { "epoch": 2.788340336134454, "grad_norm": 17.127626300524348, "learning_rate": 1.5165908947575914e-07, "loss": 1.1710079908370972, "step": 5309 }, { "epoch": 2.7888655462184873, "grad_norm": 14.491777086749252, "learning_rate": 1.5091302623595205e-07, "loss": 0.7649059295654297, "step": 5310 }, { "epoch": 2.789390756302521, "grad_norm": 11.073341365094276, "learning_rate": 1.501687744758279e-07, "loss": 0.574504017829895, "step": 5311 }, { "epoch": 2.7899159663865545, "grad_norm": 10.805373645764925, "learning_rate": 1.4942633447341815e-07, "loss": 0.738406777381897, "step": 5312 }, { "epoch": 2.7904411764705883, "grad_norm": 10.021427179838017, "learning_rate": 1.4868570650607816e-07, "loss": 0.396742582321167, "step": 5313 }, { "epoch": 2.7909663865546217, "grad_norm": 16.291250676264095, "learning_rate": 1.4794689085048386e-07, "loss": 0.6696195602416992, "step": 5314 }, { "epoch": 2.7914915966386555, "grad_norm": 13.070604790935267, "learning_rate": 1.4720988778263612e-07, "loss": 0.3883357644081116, "step": 5315 }, { "epoch": 2.792016806722689, "grad_norm": 10.139241497110453, "learning_rate": 1.464746975778586e-07, "loss": 0.4829482436180115, "step": 5316 }, { "epoch": 2.7925420168067228, "grad_norm": 14.787881451448238, "learning_rate": 1.4574132051079658e-07, "loss": 0.3221268653869629, "step": 5317 }, { "epoch": 2.793067226890756, "grad_norm": 10.175809100063471, "learning_rate": 1.450097568554193e-07, "loss": 0.3616814911365509, "step": 5318 }, { "epoch": 2.79359243697479, "grad_norm": 8.98723246899509, "learning_rate": 1.44280006885017e-07, "loss": 0.7913529872894287, "step": 5319 }, { "epoch": 2.7941176470588234, "grad_norm": 12.738193711962554, "learning_rate": 1.4355207087220436e-07, "loss": 0.6317701935768127, "step": 5320 }, { "epoch": 2.794642857142857, "grad_norm": 10.677582673573733, "learning_rate": 1.4282594908891666e-07, "loss": 0.3486226797103882, "step": 5321 }, { "epoch": 2.7951680672268906, "grad_norm": 16.37361640788061, "learning_rate": 1.4210164180641195e-07, "loss": 0.5092406272888184, "step": 5322 }, { "epoch": 2.7956932773109244, "grad_norm": 13.25979261324815, "learning_rate": 1.4137914929527097e-07, "loss": 0.46420764923095703, "step": 5323 }, { "epoch": 2.796218487394958, "grad_norm": 9.485223593080686, "learning_rate": 1.406584718253967e-07, "loss": 1.2684170007705688, "step": 5324 }, { "epoch": 2.7967436974789917, "grad_norm": 11.07027522648686, "learning_rate": 1.3993960966601328e-07, "loss": 0.805744469165802, "step": 5325 }, { "epoch": 2.7972689075630255, "grad_norm": 6.362335976876811, "learning_rate": 1.3922256308566696e-07, "loss": 0.23201191425323486, "step": 5326 }, { "epoch": 2.797794117647059, "grad_norm": 8.94539050724915, "learning_rate": 1.3850733235222512e-07, "loss": 0.2961287498474121, "step": 5327 }, { "epoch": 2.7983193277310923, "grad_norm": 13.688707017855698, "learning_rate": 1.3779391773287854e-07, "loss": 0.66350257396698, "step": 5328 }, { "epoch": 2.798844537815126, "grad_norm": 9.26589871090719, "learning_rate": 1.3708231949413676e-07, "loss": 0.6241029500961304, "step": 5329 }, { "epoch": 2.79936974789916, "grad_norm": 9.52911476160773, "learning_rate": 1.3637253790183435e-07, "loss": 0.2913224697113037, "step": 5330 }, { "epoch": 2.7998949579831933, "grad_norm": 11.36635584842067, "learning_rate": 1.3566457322112425e-07, "loss": 0.47725537419319153, "step": 5331 }, { "epoch": 2.8004201680672267, "grad_norm": 13.66061270502456, "learning_rate": 1.349584257164821e-07, "loss": 0.4649675190448761, "step": 5332 }, { "epoch": 2.8009453781512605, "grad_norm": 8.293346986204147, "learning_rate": 1.342540956517041e-07, "loss": 0.4227481782436371, "step": 5333 }, { "epoch": 2.8014705882352944, "grad_norm": 9.106751554917912, "learning_rate": 1.3355158328990814e-07, "loss": 0.49842900037765503, "step": 5334 }, { "epoch": 2.8019957983193278, "grad_norm": 11.161237893089197, "learning_rate": 1.3285088889353203e-07, "loss": 1.0039618015289307, "step": 5335 }, { "epoch": 2.802521008403361, "grad_norm": 9.60805238161859, "learning_rate": 1.3215201272433585e-07, "loss": 0.5710328221321106, "step": 5336 }, { "epoch": 2.803046218487395, "grad_norm": 11.718173968294836, "learning_rate": 1.3145495504339856e-07, "loss": 0.7412339448928833, "step": 5337 }, { "epoch": 2.803571428571429, "grad_norm": 8.385559110735308, "learning_rate": 1.3075971611112237e-07, "loss": 0.37511223554611206, "step": 5338 }, { "epoch": 2.804096638655462, "grad_norm": 6.657065325141617, "learning_rate": 1.3006629618722733e-07, "loss": 0.29596245288848877, "step": 5339 }, { "epoch": 2.8046218487394956, "grad_norm": 12.859196836036965, "learning_rate": 1.293746955307562e-07, "loss": 0.44414791464805603, "step": 5340 }, { "epoch": 2.8051470588235294, "grad_norm": 7.28302530916716, "learning_rate": 1.2868491440007015e-07, "loss": 0.791850209236145, "step": 5341 }, { "epoch": 2.8056722689075633, "grad_norm": 10.843610602596723, "learning_rate": 1.2799695305285241e-07, "loss": 1.0128309726715088, "step": 5342 }, { "epoch": 2.8061974789915967, "grad_norm": 9.704900045551488, "learning_rate": 1.2731081174610526e-07, "loss": 0.8688849210739136, "step": 5343 }, { "epoch": 2.80672268907563, "grad_norm": 11.951938602000247, "learning_rate": 1.2662649073615084e-07, "loss": 0.7026639580726624, "step": 5344 }, { "epoch": 2.807247899159664, "grad_norm": 7.8211097217957555, "learning_rate": 1.2594399027863302e-07, "loss": 0.543343186378479, "step": 5345 }, { "epoch": 2.8077731092436977, "grad_norm": 11.22422748506458, "learning_rate": 1.2526331062851395e-07, "loss": 0.5593937039375305, "step": 5346 }, { "epoch": 2.808298319327731, "grad_norm": 24.53637925057311, "learning_rate": 1.245844520400752e-07, "loss": 0.5738602876663208, "step": 5347 }, { "epoch": 2.8088235294117645, "grad_norm": 16.051264213764703, "learning_rate": 1.2390741476692003e-07, "loss": 0.3799140751361847, "step": 5348 }, { "epoch": 2.8093487394957983, "grad_norm": 8.783649743514527, "learning_rate": 1.232321990619695e-07, "loss": 0.4208924174308777, "step": 5349 }, { "epoch": 2.809873949579832, "grad_norm": 15.934727226597506, "learning_rate": 1.2255880517746453e-07, "loss": 0.3477574586868286, "step": 5350 }, { "epoch": 2.8103991596638656, "grad_norm": 12.036751450871467, "learning_rate": 1.2188723336496623e-07, "loss": 0.49650102853775024, "step": 5351 }, { "epoch": 2.810924369747899, "grad_norm": 10.078649824134489, "learning_rate": 1.2121748387535437e-07, "loss": 0.6503196954727173, "step": 5352 }, { "epoch": 2.8114495798319328, "grad_norm": 17.584087336453408, "learning_rate": 1.205495569588283e-07, "loss": 0.8900395631790161, "step": 5353 }, { "epoch": 2.8119747899159666, "grad_norm": 10.319350930512377, "learning_rate": 1.198834528649062e-07, "loss": 0.46624845266342163, "step": 5354 }, { "epoch": 2.8125, "grad_norm": 11.944373383781771, "learning_rate": 1.192191718424257e-07, "loss": 0.3012745678424835, "step": 5355 }, { "epoch": 2.8130252100840334, "grad_norm": 10.114065064506235, "learning_rate": 1.1855671413954272e-07, "loss": 0.4149806499481201, "step": 5356 }, { "epoch": 2.8135504201680672, "grad_norm": 12.746936015257683, "learning_rate": 1.1789608000373209e-07, "loss": 0.6580238938331604, "step": 5357 }, { "epoch": 2.814075630252101, "grad_norm": 10.278430011703913, "learning_rate": 1.1723726968178917e-07, "loss": 0.303714394569397, "step": 5358 }, { "epoch": 2.8146008403361344, "grad_norm": 45.32337131709893, "learning_rate": 1.1658028341982486e-07, "loss": 1.570613980293274, "step": 5359 }, { "epoch": 2.815126050420168, "grad_norm": 9.59833546372829, "learning_rate": 1.1592512146327117e-07, "loss": 0.468766987323761, "step": 5360 }, { "epoch": 2.8156512605042017, "grad_norm": 12.130684925064983, "learning_rate": 1.1527178405687845e-07, "loss": 0.3492281138896942, "step": 5361 }, { "epoch": 2.8161764705882355, "grad_norm": 10.839829582761553, "learning_rate": 1.1462027144471367e-07, "loss": 0.3299821615219116, "step": 5362 }, { "epoch": 2.816701680672269, "grad_norm": 10.197376718305389, "learning_rate": 1.139705838701638e-07, "loss": 0.7447125315666199, "step": 5363 }, { "epoch": 2.8172268907563023, "grad_norm": 10.416405125397606, "learning_rate": 1.133227215759336e-07, "loss": 0.7418153285980225, "step": 5364 }, { "epoch": 2.817752100840336, "grad_norm": 10.707547744824177, "learning_rate": 1.1267668480404559e-07, "loss": 0.40967997908592224, "step": 5365 }, { "epoch": 2.81827731092437, "grad_norm": 13.873791986477118, "learning_rate": 1.1203247379584004e-07, "loss": 0.4700956344604492, "step": 5366 }, { "epoch": 2.8188025210084033, "grad_norm": 9.250387949417668, "learning_rate": 1.1139008879197722e-07, "loss": 0.6769203543663025, "step": 5367 }, { "epoch": 2.8193277310924367, "grad_norm": 13.165084999423746, "learning_rate": 1.1074953003243183e-07, "loss": 0.329289972782135, "step": 5368 }, { "epoch": 2.8198529411764706, "grad_norm": 10.888653879775097, "learning_rate": 1.1011079775649969e-07, "loss": 0.7494542598724365, "step": 5369 }, { "epoch": 2.8203781512605044, "grad_norm": 7.969576908463429, "learning_rate": 1.0947389220279214e-07, "loss": 0.863798201084137, "step": 5370 }, { "epoch": 2.820903361344538, "grad_norm": 16.69518130169497, "learning_rate": 1.0883881360923943e-07, "loss": 0.966042160987854, "step": 5371 }, { "epoch": 2.821428571428571, "grad_norm": 8.572555688495012, "learning_rate": 1.082055622130873e-07, "loss": 0.27385351061820984, "step": 5372 }, { "epoch": 2.821953781512605, "grad_norm": 10.54310458802142, "learning_rate": 1.0757413825090212e-07, "loss": 0.5568721294403076, "step": 5373 }, { "epoch": 2.822478991596639, "grad_norm": 9.10784506239515, "learning_rate": 1.0694454195856408e-07, "loss": 0.5777037739753723, "step": 5374 }, { "epoch": 2.8230042016806722, "grad_norm": 7.909087522378086, "learning_rate": 1.0631677357127335e-07, "loss": 0.501948893070221, "step": 5375 }, { "epoch": 2.8235294117647056, "grad_norm": 6.986424072356834, "learning_rate": 1.0569083332354568e-07, "loss": 0.36117836833000183, "step": 5376 }, { "epoch": 2.8240546218487395, "grad_norm": 14.008208299228922, "learning_rate": 1.0506672144921515e-07, "loss": 0.3777201175689697, "step": 5377 }, { "epoch": 2.8245798319327733, "grad_norm": 5.040241788861432, "learning_rate": 1.0444443818143135e-07, "loss": 0.1081673800945282, "step": 5378 }, { "epoch": 2.8251050420168067, "grad_norm": 19.53682313756833, "learning_rate": 1.0382398375266111e-07, "loss": 0.7970004081726074, "step": 5379 }, { "epoch": 2.82563025210084, "grad_norm": 8.732013370059734, "learning_rate": 1.0320535839468904e-07, "loss": 0.21069815754890442, "step": 5380 }, { "epoch": 2.826155462184874, "grad_norm": 11.66756751108921, "learning_rate": 1.0258856233861524e-07, "loss": 0.39165201783180237, "step": 5381 }, { "epoch": 2.8266806722689077, "grad_norm": 10.838267364878453, "learning_rate": 1.0197359581485821e-07, "loss": 0.4563502073287964, "step": 5382 }, { "epoch": 2.827205882352941, "grad_norm": 9.686001954281087, "learning_rate": 1.0136045905315028e-07, "loss": 0.49988648295402527, "step": 5383 }, { "epoch": 2.8277310924369745, "grad_norm": 11.290492863035048, "learning_rate": 1.0074915228254267e-07, "loss": 0.38395699858665466, "step": 5384 }, { "epoch": 2.8282563025210083, "grad_norm": 9.097544675573802, "learning_rate": 1.0013967573140216e-07, "loss": 0.172023743391037, "step": 5385 }, { "epoch": 2.828781512605042, "grad_norm": 6.88368428192777, "learning_rate": 9.953202962741105e-08, "loss": 0.5613949298858643, "step": 5386 }, { "epoch": 2.8293067226890756, "grad_norm": 7.980538562005332, "learning_rate": 9.892621419756888e-08, "loss": 0.42911338806152344, "step": 5387 }, { "epoch": 2.8298319327731094, "grad_norm": 9.464845386903423, "learning_rate": 9.832222966819015e-08, "loss": 0.41949114203453064, "step": 5388 }, { "epoch": 2.830357142857143, "grad_norm": 10.902470001210592, "learning_rate": 9.77200762649072e-08, "loss": 0.6436776518821716, "step": 5389 }, { "epoch": 2.8308823529411766, "grad_norm": 12.109360939073499, "learning_rate": 9.711975421266673e-08, "loss": 0.6845769882202148, "step": 5390 }, { "epoch": 2.83140756302521, "grad_norm": 13.991567999585888, "learning_rate": 9.652126373573211e-08, "loss": 0.743374228477478, "step": 5391 }, { "epoch": 2.831932773109244, "grad_norm": 10.004105747890282, "learning_rate": 9.592460505768176e-08, "loss": 0.50460284948349, "step": 5392 }, { "epoch": 2.8324579831932772, "grad_norm": 12.033271998192342, "learning_rate": 9.532977840141123e-08, "loss": 0.5155885815620422, "step": 5393 }, { "epoch": 2.832983193277311, "grad_norm": 11.297583392445413, "learning_rate": 9.47367839891289e-08, "loss": 0.5681381821632385, "step": 5394 }, { "epoch": 2.8335084033613445, "grad_norm": 9.224849387242672, "learning_rate": 9.414562204236199e-08, "loss": 0.3853095769882202, "step": 5395 }, { "epoch": 2.8340336134453783, "grad_norm": 8.496614906852798, "learning_rate": 9.355629278195111e-08, "loss": 0.7218843102455139, "step": 5396 }, { "epoch": 2.8345588235294117, "grad_norm": 12.94308171771513, "learning_rate": 9.29687964280529e-08, "loss": 0.7495203614234924, "step": 5397 }, { "epoch": 2.8350840336134455, "grad_norm": 17.18270599819209, "learning_rate": 9.238313320013903e-08, "loss": 1.6747816801071167, "step": 5398 }, { "epoch": 2.835609243697479, "grad_norm": 14.64959043691776, "learning_rate": 9.179930331699615e-08, "loss": 0.4574100971221924, "step": 5399 }, { "epoch": 2.8361344537815127, "grad_norm": 12.818606677492864, "learning_rate": 9.121730699672704e-08, "loss": 0.5144633650779724, "step": 5400 }, { "epoch": 2.836659663865546, "grad_norm": 15.213620111633261, "learning_rate": 9.063714445674776e-08, "loss": 0.37657615542411804, "step": 5401 }, { "epoch": 2.83718487394958, "grad_norm": 12.468471476033974, "learning_rate": 9.005881591379161e-08, "loss": 1.0478260517120361, "step": 5402 }, { "epoch": 2.8377100840336134, "grad_norm": 20.589081134747186, "learning_rate": 8.948232158390468e-08, "loss": 0.8107958436012268, "step": 5403 }, { "epoch": 2.838235294117647, "grad_norm": 7.59970019977053, "learning_rate": 8.890766168244913e-08, "loss": 0.3058919608592987, "step": 5404 }, { "epoch": 2.8387605042016806, "grad_norm": 8.897228334750066, "learning_rate": 8.833483642410101e-08, "loss": 0.29552382230758667, "step": 5405 }, { "epoch": 2.8392857142857144, "grad_norm": 11.976288543042422, "learning_rate": 8.776384602285193e-08, "loss": 0.36509275436401367, "step": 5406 }, { "epoch": 2.839810924369748, "grad_norm": 12.87114970838843, "learning_rate": 8.719469069200737e-08, "loss": 0.9010012149810791, "step": 5407 }, { "epoch": 2.8403361344537816, "grad_norm": 12.447854522581904, "learning_rate": 8.662737064418725e-08, "loss": 0.514291524887085, "step": 5408 }, { "epoch": 2.840861344537815, "grad_norm": 11.513357376603228, "learning_rate": 8.606188609132593e-08, "loss": 0.38690242171287537, "step": 5409 }, { "epoch": 2.841386554621849, "grad_norm": 12.003519814690758, "learning_rate": 8.54982372446722e-08, "loss": 0.8508661985397339, "step": 5410 }, { "epoch": 2.8419117647058822, "grad_norm": 7.7949565701068755, "learning_rate": 8.493642431478877e-08, "loss": 0.28931260108947754, "step": 5411 }, { "epoch": 2.842436974789916, "grad_norm": 12.18276386266004, "learning_rate": 8.437644751155383e-08, "loss": 0.5548318028450012, "step": 5412 }, { "epoch": 2.8429621848739495, "grad_norm": 13.121820859416536, "learning_rate": 8.381830704415839e-08, "loss": 0.43945053219795227, "step": 5413 }, { "epoch": 2.8434873949579833, "grad_norm": 13.670072708652555, "learning_rate": 8.326200312110732e-08, "loss": 0.46232089400291443, "step": 5414 }, { "epoch": 2.8440126050420167, "grad_norm": 11.601618264853066, "learning_rate": 8.270753595021941e-08, "loss": 0.2802753448486328, "step": 5415 }, { "epoch": 2.8445378151260505, "grad_norm": 14.069716355246122, "learning_rate": 8.215490573862838e-08, "loss": 0.681830644607544, "step": 5416 }, { "epoch": 2.845063025210084, "grad_norm": 9.981005871385653, "learning_rate": 8.160411269278079e-08, "loss": 0.30614691972732544, "step": 5417 }, { "epoch": 2.8455882352941178, "grad_norm": 17.493708242652872, "learning_rate": 8.105515701843703e-08, "loss": 0.3944821357727051, "step": 5418 }, { "epoch": 2.846113445378151, "grad_norm": 22.31584618961918, "learning_rate": 8.050803892067139e-08, "loss": 0.5468156337738037, "step": 5419 }, { "epoch": 2.846638655462185, "grad_norm": 6.983503197041113, "learning_rate": 7.996275860387149e-08, "loss": 0.2810583710670471, "step": 5420 }, { "epoch": 2.8471638655462184, "grad_norm": 8.057975557851535, "learning_rate": 7.941931627173827e-08, "loss": 0.6226363182067871, "step": 5421 }, { "epoch": 2.847689075630252, "grad_norm": 8.462203546292969, "learning_rate": 7.887771212728601e-08, "loss": 0.3900071680545807, "step": 5422 }, { "epoch": 2.8482142857142856, "grad_norm": 13.5572522209641, "learning_rate": 7.833794637284232e-08, "loss": 0.3240922689437866, "step": 5423 }, { "epoch": 2.8487394957983194, "grad_norm": 11.616897923443245, "learning_rate": 7.780001921004864e-08, "loss": 0.34255099296569824, "step": 5424 }, { "epoch": 2.849264705882353, "grad_norm": 9.4279151169318, "learning_rate": 7.726393083985929e-08, "loss": 0.9534785747528076, "step": 5425 }, { "epoch": 2.8497899159663866, "grad_norm": 9.868687622826036, "learning_rate": 7.672968146254068e-08, "loss": 0.27143174409866333, "step": 5426 }, { "epoch": 2.85031512605042, "grad_norm": 11.477046303047926, "learning_rate": 7.61972712776743e-08, "loss": 0.3544968366622925, "step": 5427 }, { "epoch": 2.850840336134454, "grad_norm": 8.630011008442724, "learning_rate": 7.566670048415214e-08, "loss": 0.2991662621498108, "step": 5428 }, { "epoch": 2.8513655462184873, "grad_norm": 9.050442791986411, "learning_rate": 7.513796928018069e-08, "loss": 0.6704224348068237, "step": 5429 }, { "epoch": 2.851890756302521, "grad_norm": 8.472622961682655, "learning_rate": 7.46110778632786e-08, "loss": 0.2958106994628906, "step": 5430 }, { "epoch": 2.8524159663865545, "grad_norm": 10.885641210710723, "learning_rate": 7.408602643027729e-08, "loss": 0.18826164305210114, "step": 5431 }, { "epoch": 2.8529411764705883, "grad_norm": 8.202291202121717, "learning_rate": 7.356281517732156e-08, "loss": 0.5394679307937622, "step": 5432 }, { "epoch": 2.8534663865546217, "grad_norm": 9.099191152457923, "learning_rate": 7.30414442998667e-08, "loss": 0.22680304944515228, "step": 5433 }, { "epoch": 2.8539915966386555, "grad_norm": 9.571015785775163, "learning_rate": 7.25219139926836e-08, "loss": 0.5800913572311401, "step": 5434 }, { "epoch": 2.854516806722689, "grad_norm": 8.220725070899238, "learning_rate": 7.200422444985312e-08, "loss": 0.22415028512477875, "step": 5435 }, { "epoch": 2.8550420168067228, "grad_norm": 15.670997270041683, "learning_rate": 7.148837586476887e-08, "loss": 0.29121801257133484, "step": 5436 }, { "epoch": 2.855567226890756, "grad_norm": 13.481412102366525, "learning_rate": 7.097436843013783e-08, "loss": 0.40365713834762573, "step": 5437 }, { "epoch": 2.85609243697479, "grad_norm": 12.765602714375742, "learning_rate": 7.046220233797752e-08, "loss": 0.2992285490036011, "step": 5438 }, { "epoch": 2.8566176470588234, "grad_norm": 14.3976847124799, "learning_rate": 6.995187777961931e-08, "loss": 0.7297098636627197, "step": 5439 }, { "epoch": 2.857142857142857, "grad_norm": 9.65972866568365, "learning_rate": 6.944339494570517e-08, "loss": 0.7718173861503601, "step": 5440 }, { "epoch": 2.8576680672268906, "grad_norm": 14.846550129795787, "learning_rate": 6.893675402618982e-08, "loss": 1.429620385169983, "step": 5441 }, { "epoch": 2.8581932773109244, "grad_norm": 9.904965079651344, "learning_rate": 6.843195521034018e-08, "loss": 0.3568647801876068, "step": 5442 }, { "epoch": 2.858718487394958, "grad_norm": 8.705997120983524, "learning_rate": 6.792899868673487e-08, "loss": 0.4867497384548187, "step": 5443 }, { "epoch": 2.8592436974789917, "grad_norm": 8.929695338280712, "learning_rate": 6.742788464326245e-08, "loss": 0.5628509521484375, "step": 5444 }, { "epoch": 2.8597689075630255, "grad_norm": 12.220493455838309, "learning_rate": 6.692861326712652e-08, "loss": 0.9987137913703918, "step": 5445 }, { "epoch": 2.860294117647059, "grad_norm": 12.507261218890108, "learning_rate": 6.643118474483956e-08, "loss": 0.42365092039108276, "step": 5446 }, { "epoch": 2.8608193277310923, "grad_norm": 9.46656640852272, "learning_rate": 6.593559926222682e-08, "loss": 0.518358588218689, "step": 5447 }, { "epoch": 2.861344537815126, "grad_norm": 12.063900217699144, "learning_rate": 6.544185700442407e-08, "loss": 0.2418670952320099, "step": 5448 }, { "epoch": 2.86186974789916, "grad_norm": 14.992423646784777, "learning_rate": 6.494995815588101e-08, "loss": 0.7602907419204712, "step": 5449 }, { "epoch": 2.8623949579831933, "grad_norm": 12.546609917885268, "learning_rate": 6.445990290035509e-08, "loss": 0.5298495888710022, "step": 5450 }, { "epoch": 2.8629201680672267, "grad_norm": 12.68031433329933, "learning_rate": 6.397169142091819e-08, "loss": 0.23322808742523193, "step": 5451 }, { "epoch": 2.8634453781512605, "grad_norm": 13.685008586017366, "learning_rate": 6.34853238999511e-08, "loss": 0.4290042519569397, "step": 5452 }, { "epoch": 2.8639705882352944, "grad_norm": 9.16347533489729, "learning_rate": 6.300080051914792e-08, "loss": 0.8632311820983887, "step": 5453 }, { "epoch": 2.8644957983193278, "grad_norm": 7.432043220990809, "learning_rate": 6.251812145951163e-08, "loss": 0.35551148653030396, "step": 5454 }, { "epoch": 2.865021008403361, "grad_norm": 16.28827793340928, "learning_rate": 6.203728690135691e-08, "loss": 0.7173649072647095, "step": 5455 }, { "epoch": 2.865546218487395, "grad_norm": 8.500449978960063, "learning_rate": 6.15582970243117e-08, "loss": 0.46399128437042236, "step": 5456 }, { "epoch": 2.866071428571429, "grad_norm": 8.112111415134255, "learning_rate": 6.108115200731069e-08, "loss": 0.2686334252357483, "step": 5457 }, { "epoch": 2.866596638655462, "grad_norm": 11.915682938336953, "learning_rate": 6.060585202860291e-08, "loss": 0.24891838431358337, "step": 5458 }, { "epoch": 2.8671218487394956, "grad_norm": 7.550971866134155, "learning_rate": 6.013239726574694e-08, "loss": 0.5653460621833801, "step": 5459 }, { "epoch": 2.8676470588235294, "grad_norm": 14.66350281455618, "learning_rate": 5.96607878956107e-08, "loss": 0.3963346779346466, "step": 5460 }, { "epoch": 2.8681722689075633, "grad_norm": 19.335434005393587, "learning_rate": 5.9191024094374384e-08, "loss": 0.42505916953086853, "step": 5461 }, { "epoch": 2.8686974789915967, "grad_norm": 8.58848108397314, "learning_rate": 5.872310603752873e-08, "loss": 0.22156614065170288, "step": 5462 }, { "epoch": 2.86922268907563, "grad_norm": 10.815123405405565, "learning_rate": 5.825703389987392e-08, "loss": 0.31902599334716797, "step": 5463 }, { "epoch": 2.869747899159664, "grad_norm": 10.538887002681403, "learning_rate": 5.7792807855521774e-08, "loss": 0.6373153924942017, "step": 5464 }, { "epoch": 2.8702731092436977, "grad_norm": 7.739173010504628, "learning_rate": 5.7330428077893575e-08, "loss": 0.3954012989997864, "step": 5465 }, { "epoch": 2.870798319327731, "grad_norm": 15.16119100932093, "learning_rate": 5.68698947397206e-08, "loss": 0.7021001577377319, "step": 5466 }, { "epoch": 2.8713235294117645, "grad_norm": 10.034576471833478, "learning_rate": 5.641120801304523e-08, "loss": 0.5146601796150208, "step": 5467 }, { "epoch": 2.8718487394957983, "grad_norm": 14.995480459574054, "learning_rate": 5.5954368069219834e-08, "loss": 0.5189433097839355, "step": 5468 }, { "epoch": 2.872373949579832, "grad_norm": 11.433428653053515, "learning_rate": 5.5499375078906793e-08, "loss": 0.27618587017059326, "step": 5469 }, { "epoch": 2.8728991596638656, "grad_norm": 6.885839621084402, "learning_rate": 5.504622921207736e-08, "loss": 0.28780463337898254, "step": 5470 }, { "epoch": 2.873424369747899, "grad_norm": 7.0398569741963115, "learning_rate": 5.4594930638015574e-08, "loss": 0.3494113087654114, "step": 5471 }, { "epoch": 2.8739495798319328, "grad_norm": 10.392317991672112, "learning_rate": 5.414547952531213e-08, "loss": 0.6539039015769958, "step": 5472 }, { "epoch": 2.8744747899159666, "grad_norm": 10.463577463377476, "learning_rate": 5.369787604186993e-08, "loss": 0.5085176229476929, "step": 5473 }, { "epoch": 2.875, "grad_norm": 13.109319236077438, "learning_rate": 5.325212035490024e-08, "loss": 0.44301432371139526, "step": 5474 }, { "epoch": 2.8755252100840334, "grad_norm": 11.177983266236517, "learning_rate": 5.2808212630925395e-08, "loss": 0.2431657910346985, "step": 5475 }, { "epoch": 2.8760504201680672, "grad_norm": 9.547790291300467, "learning_rate": 5.236615303577552e-08, "loss": 0.19286304712295532, "step": 5476 }, { "epoch": 2.876575630252101, "grad_norm": 9.332135530406768, "learning_rate": 5.192594173459242e-08, "loss": 0.3314986228942871, "step": 5477 }, { "epoch": 2.8771008403361344, "grad_norm": 12.756189824241634, "learning_rate": 5.148757889182565e-08, "loss": 0.3398740887641907, "step": 5478 }, { "epoch": 2.877626050420168, "grad_norm": 10.322278129800047, "learning_rate": 5.105106467123477e-08, "loss": 0.3892437517642975, "step": 5479 }, { "epoch": 2.8781512605042017, "grad_norm": 10.429227622362067, "learning_rate": 5.06163992358899e-08, "loss": 0.23828959465026855, "step": 5480 }, { "epoch": 2.8786764705882355, "grad_norm": 12.802806398235582, "learning_rate": 5.018358274816892e-08, "loss": 0.5334790349006653, "step": 5481 }, { "epoch": 2.879201680672269, "grad_norm": 12.025231146183835, "learning_rate": 4.975261536975973e-08, "loss": 0.22797216475009918, "step": 5482 }, { "epoch": 2.8797268907563023, "grad_norm": 15.340707137384607, "learning_rate": 4.9323497261659635e-08, "loss": 0.4636194109916687, "step": 5483 }, { "epoch": 2.880252100840336, "grad_norm": 9.916330054183163, "learning_rate": 4.889622858417431e-08, "loss": 0.31082162261009216, "step": 5484 }, { "epoch": 2.88077731092437, "grad_norm": 12.900591211866876, "learning_rate": 4.847080949691996e-08, "loss": 0.4545387625694275, "step": 5485 }, { "epoch": 2.8813025210084033, "grad_norm": 9.614124620505395, "learning_rate": 4.8047240158819456e-08, "loss": 0.31297287344932556, "step": 5486 }, { "epoch": 2.8818277310924367, "grad_norm": 8.079027376898408, "learning_rate": 4.7625520728107885e-08, "loss": 0.44994843006134033, "step": 5487 }, { "epoch": 2.8823529411764706, "grad_norm": 20.39232014648757, "learning_rate": 4.7205651362326467e-08, "loss": 0.5275421142578125, "step": 5488 }, { "epoch": 2.8828781512605044, "grad_norm": 6.461821347193511, "learning_rate": 4.6787632218326385e-08, "loss": 0.12535065412521362, "step": 5489 }, { "epoch": 2.883403361344538, "grad_norm": 14.803654230875447, "learning_rate": 4.637146345226828e-08, "loss": 0.2589206099510193, "step": 5490 }, { "epoch": 2.883928571428571, "grad_norm": 9.213012065927948, "learning_rate": 4.595714521962003e-08, "loss": 0.674689531326294, "step": 5491 }, { "epoch": 2.884453781512605, "grad_norm": 9.93503370366437, "learning_rate": 4.554467767515947e-08, "loss": 0.45697227120399475, "step": 5492 }, { "epoch": 2.884978991596639, "grad_norm": 10.199226438660155, "learning_rate": 4.513406097297224e-08, "loss": 1.5334389209747314, "step": 5493 }, { "epoch": 2.8855042016806722, "grad_norm": 12.688864967982823, "learning_rate": 4.4725295266453414e-08, "loss": 0.6125204563140869, "step": 5494 }, { "epoch": 2.8860294117647056, "grad_norm": 10.180992691466882, "learning_rate": 4.4318380708305854e-08, "loss": 0.32915446162223816, "step": 5495 }, { "epoch": 2.8865546218487395, "grad_norm": 7.707363991047359, "learning_rate": 4.391331745054128e-08, "loss": 0.2560163736343384, "step": 5496 }, { "epoch": 2.8870798319327733, "grad_norm": 9.08705619682907, "learning_rate": 4.351010564447977e-08, "loss": 0.3261929750442505, "step": 5497 }, { "epoch": 2.8876050420168067, "grad_norm": 16.527528011698962, "learning_rate": 4.3108745440749723e-08, "loss": 0.6061902642250061, "step": 5498 }, { "epoch": 2.88813025210084, "grad_norm": 11.518720452811419, "learning_rate": 4.2709236989287305e-08, "loss": 1.0793002843856812, "step": 5499 }, { "epoch": 2.888655462184874, "grad_norm": 10.518716502044702, "learning_rate": 4.231158043933814e-08, "loss": 0.44680339097976685, "step": 5500 }, { "epoch": 2.8891806722689077, "grad_norm": 13.78884730075112, "learning_rate": 4.1915775939454506e-08, "loss": 0.5705138444900513, "step": 5501 }, { "epoch": 2.889705882352941, "grad_norm": 11.969615813507975, "learning_rate": 4.1521823637498125e-08, "loss": 0.2875642776489258, "step": 5502 }, { "epoch": 2.8902310924369745, "grad_norm": 9.214314533689762, "learning_rate": 4.112972368063794e-08, "loss": 0.5676702857017517, "step": 5503 }, { "epoch": 2.8907563025210083, "grad_norm": 26.369423471159987, "learning_rate": 4.073947621535179e-08, "loss": 2.802802324295044, "step": 5504 }, { "epoch": 2.891281512605042, "grad_norm": 20.98682174304777, "learning_rate": 4.035108138742416e-08, "loss": 0.8653183579444885, "step": 5505 }, { "epoch": 2.8918067226890756, "grad_norm": 15.171345850810335, "learning_rate": 3.996453934194899e-08, "loss": 0.5532779097557068, "step": 5506 }, { "epoch": 2.8923319327731094, "grad_norm": 18.425444751794625, "learning_rate": 3.9579850223326887e-08, "loss": 1.120945692062378, "step": 5507 }, { "epoch": 2.892857142857143, "grad_norm": 13.827765534186234, "learning_rate": 3.9197014175266226e-08, "loss": 0.5387973189353943, "step": 5508 }, { "epoch": 2.8933823529411766, "grad_norm": 16.027681711092658, "learning_rate": 3.881603134078482e-08, "loss": 0.5398526191711426, "step": 5509 }, { "epoch": 2.89390756302521, "grad_norm": 15.81006173365238, "learning_rate": 3.84369018622055e-08, "loss": 0.5760936737060547, "step": 5510 }, { "epoch": 2.894432773109244, "grad_norm": 7.814529768780656, "learning_rate": 3.805962588116108e-08, "loss": 0.7528111934661865, "step": 5511 }, { "epoch": 2.8949579831932772, "grad_norm": 12.499997463553747, "learning_rate": 3.768420353859048e-08, "loss": 0.3807353079319, "step": 5512 }, { "epoch": 2.895483193277311, "grad_norm": 9.174869443834577, "learning_rate": 3.731063497474152e-08, "loss": 0.6745905876159668, "step": 5513 }, { "epoch": 2.8960084033613445, "grad_norm": 9.461731345377686, "learning_rate": 3.693892032916757e-08, "loss": 0.42122939229011536, "step": 5514 }, { "epoch": 2.8965336134453783, "grad_norm": 10.522417401812165, "learning_rate": 3.656905974073144e-08, "loss": 0.8252875804901123, "step": 5515 }, { "epoch": 2.8970588235294117, "grad_norm": 14.425462061084643, "learning_rate": 3.620105334760205e-08, "loss": 0.34680360555648804, "step": 5516 }, { "epoch": 2.8975840336134455, "grad_norm": 9.202109117314102, "learning_rate": 3.583490128725553e-08, "loss": 0.6464153528213501, "step": 5517 }, { "epoch": 2.898109243697479, "grad_norm": 12.648835037111676, "learning_rate": 3.547060369647693e-08, "loss": 0.4509446620941162, "step": 5518 }, { "epoch": 2.8986344537815127, "grad_norm": 17.193040915204442, "learning_rate": 3.510816071135681e-08, "loss": 0.6343562602996826, "step": 5519 }, { "epoch": 2.899159663865546, "grad_norm": 12.106320234862146, "learning_rate": 3.474757246729354e-08, "loss": 0.7017316818237305, "step": 5520 }, { "epoch": 2.89968487394958, "grad_norm": 7.536342382664705, "learning_rate": 3.4388839098992154e-08, "loss": 0.6642085909843445, "step": 5521 }, { "epoch": 2.9002100840336134, "grad_norm": 13.947203182987222, "learning_rate": 3.403196074046544e-08, "loss": 0.2990296185016632, "step": 5522 }, { "epoch": 2.900735294117647, "grad_norm": 8.862421317089296, "learning_rate": 3.3676937525032314e-08, "loss": 0.4765544533729553, "step": 5523 }, { "epoch": 2.9012605042016806, "grad_norm": 14.64265029931189, "learning_rate": 3.3323769585320575e-08, "loss": 0.5519246459007263, "step": 5524 }, { "epoch": 2.9017857142857144, "grad_norm": 10.032561602461207, "learning_rate": 3.2972457053262466e-08, "loss": 0.4705575108528137, "step": 5525 }, { "epoch": 2.902310924369748, "grad_norm": 11.31221576743726, "learning_rate": 3.2623000060099106e-08, "loss": 0.8108884692192078, "step": 5526 }, { "epoch": 2.9028361344537816, "grad_norm": 8.313214471537519, "learning_rate": 3.227539873637664e-08, "loss": 0.36999955773353577, "step": 5527 }, { "epoch": 2.903361344537815, "grad_norm": 16.498289568601937, "learning_rate": 3.192965321195007e-08, "loss": 0.9184820652008057, "step": 5528 }, { "epoch": 2.903886554621849, "grad_norm": 8.01495543923635, "learning_rate": 3.158576361597887e-08, "loss": 0.2793254256248474, "step": 5529 }, { "epoch": 2.9044117647058822, "grad_norm": 7.314269170981081, "learning_rate": 3.124373007693082e-08, "loss": 0.5376075506210327, "step": 5530 }, { "epoch": 2.904936974789916, "grad_norm": 11.086366691726877, "learning_rate": 3.090355272257983e-08, "loss": 0.2915005683898926, "step": 5531 }, { "epoch": 2.9054621848739495, "grad_norm": 7.423482865502868, "learning_rate": 3.0565231680007024e-08, "loss": 0.26723939180374146, "step": 5532 }, { "epoch": 2.9059873949579833, "grad_norm": 11.329995064799036, "learning_rate": 3.022876707559796e-08, "loss": 0.3294193148612976, "step": 5533 }, { "epoch": 2.9065126050420167, "grad_norm": 8.381833455253949, "learning_rate": 2.9894159035047666e-08, "loss": 0.5151461958885193, "step": 5534 }, { "epoch": 2.9070378151260505, "grad_norm": 13.08703037335673, "learning_rate": 2.9561407683355027e-08, "loss": 0.9340593218803406, "step": 5535 }, { "epoch": 2.907563025210084, "grad_norm": 7.57724074373758, "learning_rate": 2.9230513144827277e-08, "loss": 0.7225733995437622, "step": 5536 }, { "epoch": 2.9080882352941178, "grad_norm": 15.104470484094323, "learning_rate": 2.890147554307665e-08, "loss": 0.7668702006340027, "step": 5537 }, { "epoch": 2.908613445378151, "grad_norm": 12.72051977274319, "learning_rate": 2.8574295001021492e-08, "loss": 0.3595678508281708, "step": 5538 }, { "epoch": 2.909138655462185, "grad_norm": 9.29151576715842, "learning_rate": 2.8248971640887913e-08, "loss": 0.4152783155441284, "step": 5539 }, { "epoch": 2.9096638655462184, "grad_norm": 11.06768050742571, "learning_rate": 2.792550558420759e-08, "loss": 0.7512202262878418, "step": 5540 }, { "epoch": 2.910189075630252, "grad_norm": 10.855614816848565, "learning_rate": 2.7603896951817755e-08, "loss": 0.6991416811943054, "step": 5541 }, { "epoch": 2.9107142857142856, "grad_norm": 9.021255865800349, "learning_rate": 2.7284145863861743e-08, "loss": 0.797561764717102, "step": 5542 }, { "epoch": 2.9112394957983194, "grad_norm": 13.837501672475726, "learning_rate": 2.696625243979012e-08, "loss": 1.0461021661758423, "step": 5543 }, { "epoch": 2.911764705882353, "grad_norm": 7.89782215518247, "learning_rate": 2.665021679835844e-08, "loss": 0.43711721897125244, "step": 5544 }, { "epoch": 2.9122899159663866, "grad_norm": 11.324726518508411, "learning_rate": 2.633603905762838e-08, "loss": 0.4912574887275696, "step": 5545 }, { "epoch": 2.91281512605042, "grad_norm": 20.470491479941607, "learning_rate": 2.6023719334967724e-08, "loss": 0.48075029253959656, "step": 5546 }, { "epoch": 2.913340336134454, "grad_norm": 11.612039587280671, "learning_rate": 2.571325774705036e-08, "loss": 0.2778392434120178, "step": 5547 }, { "epoch": 2.9138655462184873, "grad_norm": 9.99319271774061, "learning_rate": 2.5404654409856288e-08, "loss": 0.5540282726287842, "step": 5548 }, { "epoch": 2.914390756302521, "grad_norm": 10.038184913341135, "learning_rate": 2.5097909438669964e-08, "loss": 0.6935964226722717, "step": 5549 }, { "epoch": 2.9149159663865545, "grad_norm": 23.56634252320344, "learning_rate": 2.47930229480825e-08, "loss": 0.5073999166488647, "step": 5550 }, { "epoch": 2.9154411764705883, "grad_norm": 9.275570355949101, "learning_rate": 2.4489995051991678e-08, "loss": 0.4122755229473114, "step": 5551 }, { "epoch": 2.9159663865546217, "grad_norm": 10.166117457516405, "learning_rate": 2.4188825863599164e-08, "loss": 1.0233631134033203, "step": 5552 }, { "epoch": 2.9164915966386555, "grad_norm": 10.621888518892153, "learning_rate": 2.3889515495413297e-08, "loss": 0.3412795960903168, "step": 5553 }, { "epoch": 2.917016806722689, "grad_norm": 17.83502737509378, "learning_rate": 2.3592064059247967e-08, "loss": 0.771084189414978, "step": 5554 }, { "epoch": 2.9175420168067228, "grad_norm": 9.974548839878679, "learning_rate": 2.329647166622262e-08, "loss": 0.46054673194885254, "step": 5555 }, { "epoch": 2.918067226890756, "grad_norm": 9.237729916948592, "learning_rate": 2.300273842676226e-08, "loss": 0.5346497297286987, "step": 5556 }, { "epoch": 2.91859243697479, "grad_norm": 9.901566661048781, "learning_rate": 2.2710864450596336e-08, "loss": 0.9071778059005737, "step": 5557 }, { "epoch": 2.9191176470588234, "grad_norm": 15.997355385243313, "learning_rate": 2.2420849846761517e-08, "loss": 0.5223240256309509, "step": 5558 }, { "epoch": 2.919642857142857, "grad_norm": 9.803395559821737, "learning_rate": 2.213269472359836e-08, "loss": 0.43600529432296753, "step": 5559 }, { "epoch": 2.9201680672268906, "grad_norm": 13.010491901735929, "learning_rate": 2.1846399188752975e-08, "loss": 0.6825572848320007, "step": 5560 }, { "epoch": 2.9206932773109244, "grad_norm": 8.188936087268589, "learning_rate": 2.1561963349178704e-08, "loss": 0.4675530195236206, "step": 5561 }, { "epoch": 2.921218487394958, "grad_norm": 10.357185874315604, "learning_rate": 2.1279387311131106e-08, "loss": 0.3140292763710022, "step": 5562 }, { "epoch": 2.9217436974789917, "grad_norm": 20.561503814788562, "learning_rate": 2.0998671180172957e-08, "loss": 1.3401788473129272, "step": 5563 }, { "epoch": 2.9222689075630255, "grad_norm": 10.61845234333747, "learning_rate": 2.0719815061172045e-08, "loss": 0.2538623809814453, "step": 5564 }, { "epoch": 2.922794117647059, "grad_norm": 12.461548877181114, "learning_rate": 2.0442819058300588e-08, "loss": 0.3584285080432892, "step": 5565 }, { "epoch": 2.9233193277310923, "grad_norm": 16.45580997541605, "learning_rate": 2.0167683275036376e-08, "loss": 0.4364638924598694, "step": 5566 }, { "epoch": 2.923844537815126, "grad_norm": 16.462900973950404, "learning_rate": 1.9894407814162186e-08, "loss": 0.2806379795074463, "step": 5567 }, { "epoch": 2.92436974789916, "grad_norm": 9.563375260727465, "learning_rate": 1.962299277776636e-08, "loss": 0.12565010786056519, "step": 5568 }, { "epoch": 2.9248949579831933, "grad_norm": 8.227634681315745, "learning_rate": 1.935343826724112e-08, "loss": 0.17257870733737946, "step": 5569 }, { "epoch": 2.9254201680672267, "grad_norm": 8.187935823789262, "learning_rate": 1.908574438328481e-08, "loss": 0.37759828567504883, "step": 5570 }, { "epoch": 2.9259453781512605, "grad_norm": 7.633462545317691, "learning_rate": 1.881991122590021e-08, "loss": 0.2629449963569641, "step": 5571 }, { "epoch": 2.9264705882352944, "grad_norm": 10.54009413604542, "learning_rate": 1.8555938894394554e-08, "loss": 0.23956085741519928, "step": 5572 }, { "epoch": 2.9269957983193278, "grad_norm": 11.484444966656907, "learning_rate": 1.8293827487380623e-08, "loss": 0.35607922077178955, "step": 5573 }, { "epoch": 2.927521008403361, "grad_norm": 11.831120728173534, "learning_rate": 1.8033577102775645e-08, "loss": 0.24531540274620056, "step": 5574 }, { "epoch": 2.928046218487395, "grad_norm": 8.260889344628955, "learning_rate": 1.77751878378013e-08, "loss": 0.3835732340812683, "step": 5575 }, { "epoch": 2.928571428571429, "grad_norm": 16.696906888139786, "learning_rate": 1.7518659788984817e-08, "loss": 0.7035274505615234, "step": 5576 }, { "epoch": 2.929096638655462, "grad_norm": 18.3334617633976, "learning_rate": 1.726399305215787e-08, "loss": 0.562627911567688, "step": 5577 }, { "epoch": 2.9296218487394956, "grad_norm": 6.776640164261589, "learning_rate": 1.701118772245658e-08, "loss": 0.1429840475320816, "step": 5578 }, { "epoch": 2.9301470588235294, "grad_norm": 6.560455773753327, "learning_rate": 1.6760243894321513e-08, "loss": 0.44401729106903076, "step": 5579 }, { "epoch": 2.9306722689075633, "grad_norm": 13.492556925985618, "learning_rate": 1.651116166149769e-08, "loss": 1.0103554725646973, "step": 5580 }, { "epoch": 2.9311974789915967, "grad_norm": 9.644731701714283, "learning_rate": 1.626394111703622e-08, "loss": 0.858881950378418, "step": 5581 }, { "epoch": 2.93172268907563, "grad_norm": 11.560863470788007, "learning_rate": 1.6018582353290456e-08, "loss": 0.2774242162704468, "step": 5582 }, { "epoch": 2.932247899159664, "grad_norm": 16.204354110885234, "learning_rate": 1.577508546191986e-08, "loss": 0.41454803943634033, "step": 5583 }, { "epoch": 2.9327731092436977, "grad_norm": 10.980428655057459, "learning_rate": 1.5533450533888326e-08, "loss": 0.714098334312439, "step": 5584 }, { "epoch": 2.933298319327731, "grad_norm": 12.8824573940971, "learning_rate": 1.5293677659463104e-08, "loss": 0.2153351604938507, "step": 5585 }, { "epoch": 2.9338235294117645, "grad_norm": 13.104349080337803, "learning_rate": 1.505576692821642e-08, "loss": 0.5149781703948975, "step": 5586 }, { "epoch": 2.9343487394957983, "grad_norm": 11.003157779077517, "learning_rate": 1.4819718429024965e-08, "loss": 0.25556814670562744, "step": 5587 }, { "epoch": 2.934873949579832, "grad_norm": 12.355349444171148, "learning_rate": 1.4585532250070423e-08, "loss": 0.7575448751449585, "step": 5588 }, { "epoch": 2.9353991596638656, "grad_norm": 12.950030139239844, "learning_rate": 1.4353208478837256e-08, "loss": 0.6556761264801025, "step": 5589 }, { "epoch": 2.935924369747899, "grad_norm": 15.112237567499033, "learning_rate": 1.4122747202114928e-08, "loss": 0.4966975450515747, "step": 5590 }, { "epoch": 2.9364495798319328, "grad_norm": 9.247774954551618, "learning_rate": 1.3894148505997352e-08, "loss": 0.27970755100250244, "step": 5591 }, { "epoch": 2.9369747899159666, "grad_norm": 10.28911719953282, "learning_rate": 1.3667412475882325e-08, "loss": 0.6997286677360535, "step": 5592 }, { "epoch": 2.9375, "grad_norm": 9.208707155593626, "learning_rate": 1.3442539196472647e-08, "loss": 0.590284526348114, "step": 5593 }, { "epoch": 2.9380252100840334, "grad_norm": 10.131472094571073, "learning_rate": 1.3219528751773348e-08, "loss": 0.5730470418930054, "step": 5594 }, { "epoch": 2.9385504201680672, "grad_norm": 10.558149205353557, "learning_rate": 1.2998381225095557e-08, "loss": 0.658598005771637, "step": 5595 }, { "epoch": 2.939075630252101, "grad_norm": 9.65076945448155, "learning_rate": 1.2779096699053195e-08, "loss": 0.531460702419281, "step": 5596 }, { "epoch": 2.9396008403361344, "grad_norm": 11.906003911325623, "learning_rate": 1.2561675255564621e-08, "loss": 0.3952830135822296, "step": 5597 }, { "epoch": 2.940126050420168, "grad_norm": 16.26547790120726, "learning_rate": 1.2346116975853194e-08, "loss": 0.5298194885253906, "step": 5598 }, { "epoch": 2.9406512605042017, "grad_norm": 13.037334965116791, "learning_rate": 1.213242194044395e-08, "loss": 0.3888705372810364, "step": 5599 }, { "epoch": 2.9411764705882355, "grad_norm": 8.155687647693032, "learning_rate": 1.1920590229168028e-08, "loss": 0.2725866734981537, "step": 5600 }, { "epoch": 2.941701680672269, "grad_norm": 7.470703469400965, "learning_rate": 1.1710621921159904e-08, "loss": 0.4168459177017212, "step": 5601 }, { "epoch": 2.9422268907563023, "grad_norm": 14.4781653785187, "learning_rate": 1.1502517094856836e-08, "loss": 1.1077405214309692, "step": 5602 }, { "epoch": 2.942752100840336, "grad_norm": 7.398408819836944, "learning_rate": 1.1296275828001635e-08, "loss": 0.5583397150039673, "step": 5603 }, { "epoch": 2.94327731092437, "grad_norm": 9.566556489167208, "learning_rate": 1.1091898197639339e-08, "loss": 1.3542029857635498, "step": 5604 }, { "epoch": 2.9438025210084033, "grad_norm": 10.694305734078142, "learning_rate": 1.0889384280119985e-08, "loss": 0.9673854112625122, "step": 5605 }, { "epoch": 2.9443277310924367, "grad_norm": 9.651329406855588, "learning_rate": 1.0688734151096947e-08, "loss": 0.24388881027698517, "step": 5606 }, { "epoch": 2.9448529411764706, "grad_norm": 17.481026746232192, "learning_rate": 1.048994788552804e-08, "loss": 0.45848214626312256, "step": 5607 }, { "epoch": 2.9453781512605044, "grad_norm": 14.050724281727012, "learning_rate": 1.0293025557672753e-08, "loss": 0.38031795620918274, "step": 5608 }, { "epoch": 2.945903361344538, "grad_norm": 7.6029368596583655, "learning_rate": 1.009796724109613e-08, "loss": 0.6128751039505005, "step": 5609 }, { "epoch": 2.946428571428571, "grad_norm": 10.19092600123891, "learning_rate": 9.904773008667101e-09, "loss": 0.2897670865058899, "step": 5610 }, { "epoch": 2.946953781512605, "grad_norm": 12.9249108178978, "learning_rate": 9.713442932556828e-09, "loss": 0.38133394718170166, "step": 5611 }, { "epoch": 2.947478991596639, "grad_norm": 8.0924426893087, "learning_rate": 9.523977084240354e-09, "loss": 0.5820958614349365, "step": 5612 }, { "epoch": 2.9480042016806722, "grad_norm": 11.493191714545501, "learning_rate": 9.336375534497732e-09, "loss": 0.5863905549049377, "step": 5613 }, { "epoch": 2.9485294117647056, "grad_norm": 14.327094366456585, "learning_rate": 9.150638353410123e-09, "loss": 0.7361984252929688, "step": 5614 }, { "epoch": 2.9490546218487395, "grad_norm": 11.366103174396544, "learning_rate": 8.966765610365357e-09, "loss": 0.228278249502182, "step": 5615 }, { "epoch": 2.9495798319327733, "grad_norm": 10.527788204520025, "learning_rate": 8.784757374051267e-09, "loss": 0.6901432871818542, "step": 5616 }, { "epoch": 2.9501050420168067, "grad_norm": 11.224789400796311, "learning_rate": 8.60461371246235e-09, "loss": 0.6597157716751099, "step": 5617 }, { "epoch": 2.95063025210084, "grad_norm": 11.013315168173806, "learning_rate": 8.426334692893668e-09, "loss": 0.4447717070579529, "step": 5618 }, { "epoch": 2.951155462184874, "grad_norm": 13.531968632115458, "learning_rate": 8.249920381946387e-09, "loss": 0.6904777884483337, "step": 5619 }, { "epoch": 2.9516806722689077, "grad_norm": 9.454423351371974, "learning_rate": 8.075370845523344e-09, "loss": 0.5729291439056396, "step": 5620 }, { "epoch": 2.952205882352941, "grad_norm": 9.531897908238461, "learning_rate": 7.902686148831273e-09, "loss": 0.5849839448928833, "step": 5621 }, { "epoch": 2.9527310924369745, "grad_norm": 10.234014033685307, "learning_rate": 7.731866356380235e-09, "loss": 0.4954346716403961, "step": 5622 }, { "epoch": 2.9532563025210083, "grad_norm": 11.00023380637929, "learning_rate": 7.56291153198363e-09, "loss": 0.36600130796432495, "step": 5623 }, { "epoch": 2.953781512605042, "grad_norm": 10.833577104485304, "learning_rate": 7.395821738758191e-09, "loss": 0.45721763372421265, "step": 5624 }, { "epoch": 2.9543067226890756, "grad_norm": 11.445569870353463, "learning_rate": 7.230597039123433e-09, "loss": 0.48243996500968933, "step": 5625 }, { "epoch": 2.9548319327731094, "grad_norm": 9.671976783819268, "learning_rate": 7.067237494802759e-09, "loss": 0.6383078694343567, "step": 5626 }, { "epoch": 2.955357142857143, "grad_norm": 10.303071456198102, "learning_rate": 6.905743166822909e-09, "loss": 0.48624855279922485, "step": 5627 }, { "epoch": 2.9558823529411766, "grad_norm": 9.856130065633476, "learning_rate": 6.746114115513402e-09, "loss": 0.5085325241088867, "step": 5628 }, { "epoch": 2.95640756302521, "grad_norm": 11.854247092639392, "learning_rate": 6.588350400507093e-09, "loss": 0.6341557502746582, "step": 5629 }, { "epoch": 2.956932773109244, "grad_norm": 16.474906065428893, "learning_rate": 6.432452080739615e-09, "loss": 0.5361814498901367, "step": 5630 }, { "epoch": 2.9574579831932772, "grad_norm": 7.734640267131578, "learning_rate": 6.2784192144504926e-09, "loss": 0.17374935746192932, "step": 5631 }, { "epoch": 2.957983193277311, "grad_norm": 17.135966116435647, "learning_rate": 6.1262518591820305e-09, "loss": 1.087705373764038, "step": 5632 }, { "epoch": 2.9585084033613445, "grad_norm": 16.91123275408573, "learning_rate": 5.975950071779313e-09, "loss": 0.7166892290115356, "step": 5633 }, { "epoch": 2.9590336134453783, "grad_norm": 10.70550524126326, "learning_rate": 5.827513908390759e-09, "loss": 0.3969137668609619, "step": 5634 }, { "epoch": 2.9595588235294117, "grad_norm": 12.765748763205398, "learning_rate": 5.6809434244681215e-09, "loss": 0.27330633997917175, "step": 5635 }, { "epoch": 2.9600840336134455, "grad_norm": 7.536149360956921, "learning_rate": 5.536238674765937e-09, "loss": 0.2314409613609314, "step": 5636 }, { "epoch": 2.960609243697479, "grad_norm": 12.99447399307754, "learning_rate": 5.393399713341518e-09, "loss": 0.4921409785747528, "step": 5637 }, { "epoch": 2.9611344537815127, "grad_norm": 13.114680784542122, "learning_rate": 5.252426593555515e-09, "loss": 0.32266703248023987, "step": 5638 }, { "epoch": 2.961659663865546, "grad_norm": 10.048323761103624, "learning_rate": 5.113319368070801e-09, "loss": 0.29325148463249207, "step": 5639 }, { "epoch": 2.96218487394958, "grad_norm": 15.986007663448719, "learning_rate": 4.976078088855252e-09, "loss": 0.45709648728370667, "step": 5640 }, { "epoch": 2.9627100840336134, "grad_norm": 11.789635949689533, "learning_rate": 4.8407028071773e-09, "loss": 0.5876717567443848, "step": 5641 }, { "epoch": 2.963235294117647, "grad_norm": 10.165642894319092, "learning_rate": 4.70719357360927e-09, "loss": 0.46749863028526306, "step": 5642 }, { "epoch": 2.9637605042016806, "grad_norm": 8.247655211470287, "learning_rate": 4.575550438026266e-09, "loss": 0.6239333152770996, "step": 5643 }, { "epoch": 2.9642857142857144, "grad_norm": 14.606346455869812, "learning_rate": 4.445773449606727e-09, "loss": 1.1496306657791138, "step": 5644 }, { "epoch": 2.964810924369748, "grad_norm": 21.37063961876002, "learning_rate": 4.317862656831873e-09, "loss": 0.6101829409599304, "step": 5645 }, { "epoch": 2.9653361344537816, "grad_norm": 14.804959858310237, "learning_rate": 4.191818107485146e-09, "loss": 0.6226564645767212, "step": 5646 }, { "epoch": 2.965861344537815, "grad_norm": 11.998054483815745, "learning_rate": 4.0676398486527715e-09, "loss": 0.385552316904068, "step": 5647 }, { "epoch": 2.966386554621849, "grad_norm": 12.225844594671564, "learning_rate": 3.9453279267248625e-09, "loss": 0.643718957901001, "step": 5648 }, { "epoch": 2.9669117647058822, "grad_norm": 12.383298611969805, "learning_rate": 3.8248823873932026e-09, "loss": 0.4943666160106659, "step": 5649 }, { "epoch": 2.967436974789916, "grad_norm": 8.856930762340792, "learning_rate": 3.7063032756534666e-09, "loss": 0.3606548607349396, "step": 5650 }, { "epoch": 2.9679621848739495, "grad_norm": 12.398796097493587, "learning_rate": 3.5895906358024424e-09, "loss": 0.24608448147773743, "step": 5651 }, { "epoch": 2.9684873949579833, "grad_norm": 13.768671634267214, "learning_rate": 3.4747445114413634e-09, "loss": 0.4157842993736267, "step": 5652 }, { "epoch": 2.9690126050420167, "grad_norm": 9.603321706103921, "learning_rate": 3.361764945473134e-09, "loss": 0.47171053290367126, "step": 5653 }, { "epoch": 2.9695378151260505, "grad_norm": 10.562426921855787, "learning_rate": 3.2506519801034363e-09, "loss": 0.4859582185745239, "step": 5654 }, { "epoch": 2.970063025210084, "grad_norm": 10.185667359707393, "learning_rate": 3.14140565684129e-09, "loss": 0.7889777421951294, "step": 5655 }, { "epoch": 2.9705882352941178, "grad_norm": 13.56718656916088, "learning_rate": 3.0340260164979375e-09, "loss": 0.34324878454208374, "step": 5656 }, { "epoch": 2.971113445378151, "grad_norm": 16.696034099932895, "learning_rate": 2.928513099187402e-09, "loss": 0.45598477125167847, "step": 5657 }, { "epoch": 2.971638655462185, "grad_norm": 6.703788131113769, "learning_rate": 2.8248669443253775e-09, "loss": 0.4168074131011963, "step": 5658 }, { "epoch": 2.9721638655462184, "grad_norm": 9.494975141445275, "learning_rate": 2.723087590632556e-09, "loss": 0.8817811608314514, "step": 5659 }, { "epoch": 2.972689075630252, "grad_norm": 11.273881748031549, "learning_rate": 2.623175076130191e-09, "loss": 0.4042503833770752, "step": 5660 }, { "epoch": 2.9732142857142856, "grad_norm": 9.327964147462112, "learning_rate": 2.52512943814176e-09, "loss": 0.34631478786468506, "step": 5661 }, { "epoch": 2.9737394957983194, "grad_norm": 11.240881291707485, "learning_rate": 2.428950713295741e-09, "loss": 0.8159435391426086, "step": 5662 }, { "epoch": 2.974264705882353, "grad_norm": 16.717701558220515, "learning_rate": 2.334638937521172e-09, "loss": 0.4300820827484131, "step": 5663 }, { "epoch": 2.9747899159663866, "grad_norm": 12.488644629851397, "learning_rate": 2.242194146050425e-09, "loss": 0.380912184715271, "step": 5664 }, { "epoch": 2.97531512605042, "grad_norm": 9.16332819734836, "learning_rate": 2.151616373417542e-09, "loss": 0.386635422706604, "step": 5665 }, { "epoch": 2.975840336134454, "grad_norm": 9.503764539594915, "learning_rate": 2.0629056534599014e-09, "loss": 0.2878572642803192, "step": 5666 }, { "epoch": 2.9763655462184873, "grad_norm": 8.849958471707785, "learning_rate": 1.9760620193182144e-09, "loss": 0.2314329296350479, "step": 5667 }, { "epoch": 2.976890756302521, "grad_norm": 10.523764182521704, "learning_rate": 1.891085503433754e-09, "loss": 0.6014482378959656, "step": 5668 }, { "epoch": 2.9774159663865545, "grad_norm": 12.027695286433763, "learning_rate": 1.8079761375522365e-09, "loss": 0.4198509156703949, "step": 5669 }, { "epoch": 2.9779411764705883, "grad_norm": 12.510137893586682, "learning_rate": 1.726733952719939e-09, "loss": 0.26352745294570923, "step": 5670 }, { "epoch": 2.9784663865546217, "grad_norm": 16.31323441077259, "learning_rate": 1.6473589792875832e-09, "loss": 0.308188259601593, "step": 5671 }, { "epoch": 2.9789915966386555, "grad_norm": 12.730371356484046, "learning_rate": 1.569851246906451e-09, "loss": 0.33039066195487976, "step": 5672 }, { "epoch": 2.979516806722689, "grad_norm": 9.72219997453046, "learning_rate": 1.4942107845317132e-09, "loss": 0.30831047892570496, "step": 5673 }, { "epoch": 2.9800420168067228, "grad_norm": 7.164965305532418, "learning_rate": 1.420437620420212e-09, "loss": 0.307595819234848, "step": 5674 }, { "epoch": 2.980567226890756, "grad_norm": 11.265195686345777, "learning_rate": 1.3485317821321231e-09, "loss": 0.25833243131637573, "step": 5675 }, { "epoch": 2.98109243697479, "grad_norm": 9.455864162725236, "learning_rate": 1.2784932965287378e-09, "loss": 0.415718138217926, "step": 5676 }, { "epoch": 2.9816176470588234, "grad_norm": 16.196923012472972, "learning_rate": 1.210322189774682e-09, "loss": 0.7805691957473755, "step": 5677 }, { "epoch": 2.982142857142857, "grad_norm": 9.290485662290374, "learning_rate": 1.1440184873362514e-09, "loss": 0.45192593336105347, "step": 5678 }, { "epoch": 2.9826680672268906, "grad_norm": 9.314863646095231, "learning_rate": 1.079582213983632e-09, "loss": 0.7352830767631531, "step": 5679 }, { "epoch": 2.9831932773109244, "grad_norm": 10.222129047944414, "learning_rate": 1.0170133937875692e-09, "loss": 0.43010446429252625, "step": 5680 }, { "epoch": 2.983718487394958, "grad_norm": 14.478667435454975, "learning_rate": 9.563120501221434e-10, "loss": 0.5366514921188354, "step": 5681 }, { "epoch": 2.9842436974789917, "grad_norm": 10.527100068397836, "learning_rate": 8.9747820566366e-10, "loss": 0.7565234303474426, "step": 5682 }, { "epoch": 2.9847689075630255, "grad_norm": 14.890558663670724, "learning_rate": 8.405118823906489e-10, "loss": 0.46974968910217285, "step": 5683 }, { "epoch": 2.985294117647059, "grad_norm": 9.425281217033564, "learning_rate": 7.854131015844201e-10, "loss": 0.3968534469604492, "step": 5684 }, { "epoch": 2.9858193277310923, "grad_norm": 14.620206726148263, "learning_rate": 7.321818838279537e-10, "loss": 0.5368586182594299, "step": 5685 }, { "epoch": 2.986344537815126, "grad_norm": 6.6917640984500775, "learning_rate": 6.808182490070092e-10, "loss": 0.5833422541618347, "step": 5686 }, { "epoch": 2.98686974789916, "grad_norm": 13.430856231064407, "learning_rate": 6.313222163095711e-10, "loss": 0.45203086733818054, "step": 5687 }, { "epoch": 2.9873949579831933, "grad_norm": 9.388426030845077, "learning_rate": 5.836938042258489e-10, "loss": 0.6880434155464172, "step": 5688 }, { "epoch": 2.9879201680672267, "grad_norm": 9.866153797945675, "learning_rate": 5.379330305488317e-10, "loss": 0.3018028736114502, "step": 5689 }, { "epoch": 2.9884453781512605, "grad_norm": 10.567725023760596, "learning_rate": 4.940399123731787e-10, "loss": 0.20725136995315552, "step": 5690 }, { "epoch": 2.9889705882352944, "grad_norm": 14.340152025454126, "learning_rate": 4.520144660957737e-10, "loss": 0.34831321239471436, "step": 5691 }, { "epoch": 2.9894957983193278, "grad_norm": 10.140995778667797, "learning_rate": 4.118567074168356e-10, "loss": 0.3369051218032837, "step": 5692 }, { "epoch": 2.990021008403361, "grad_norm": 11.613086347026366, "learning_rate": 3.735666513371428e-10, "loss": 0.4291839599609375, "step": 5693 }, { "epoch": 2.990546218487395, "grad_norm": 8.262672556370198, "learning_rate": 3.371443121619189e-10, "loss": 0.6771973371505737, "step": 5694 }, { "epoch": 2.991071428571429, "grad_norm": 18.988430129138063, "learning_rate": 3.0258970349639204e-10, "loss": 0.36443549394607544, "step": 5695 }, { "epoch": 2.991596638655462, "grad_norm": 10.925737321961899, "learning_rate": 2.6990283825023554e-10, "loss": 0.9888339042663574, "step": 5696 }, { "epoch": 2.9921218487394956, "grad_norm": 15.978284780391945, "learning_rate": 2.3908372863368223e-10, "loss": 1.6931525468826294, "step": 5697 }, { "epoch": 2.9926470588235294, "grad_norm": 12.290472234322491, "learning_rate": 2.1013238615974486e-10, "loss": 0.5588763356208801, "step": 5698 }, { "epoch": 2.9931722689075633, "grad_norm": 11.573922698374671, "learning_rate": 1.830488216442161e-10, "loss": 1.2311294078826904, "step": 5699 }, { "epoch": 2.9936974789915967, "grad_norm": 15.287092679863187, "learning_rate": 1.5783304520455844e-10, "loss": 0.6111997961997986, "step": 5700 }, { "epoch": 2.99422268907563, "grad_norm": 6.716267768886236, "learning_rate": 1.344850662604591e-10, "loss": 0.3660454750061035, "step": 5701 }, { "epoch": 2.994747899159664, "grad_norm": 10.11498413982614, "learning_rate": 1.1300489353438526e-10, "loss": 0.3281766176223755, "step": 5702 }, { "epoch": 2.9952731092436977, "grad_norm": 10.783934579798286, "learning_rate": 9.339253505102896e-11, "loss": 0.5336626768112183, "step": 5703 }, { "epoch": 2.995798319327731, "grad_norm": 9.245609678239921, "learning_rate": 7.564799813619683e-11, "loss": 0.2390085905790329, "step": 5704 }, { "epoch": 2.9963235294117645, "grad_norm": 8.371824258006043, "learning_rate": 5.977128941903055e-11, "loss": 0.6434842944145203, "step": 5705 }, { "epoch": 2.9968487394957983, "grad_norm": 11.54270914337302, "learning_rate": 4.576241483089661e-11, "loss": 0.36174070835113525, "step": 5706 }, { "epoch": 2.997373949579832, "grad_norm": 8.655617354594044, "learning_rate": 3.362137960483125e-11, "loss": 0.5723540186882019, "step": 5707 }, { "epoch": 2.9978991596638656, "grad_norm": 7.511434574508938, "learning_rate": 2.334818827665064e-11, "loss": 0.41090860962867737, "step": 5708 }, { "epoch": 2.998424369747899, "grad_norm": 10.231341927399738, "learning_rate": 1.494284468384066e-11, "loss": 0.45292237401008606, "step": 5709 }, { "epoch": 2.9989495798319328, "grad_norm": 11.027730083955783, "learning_rate": 8.40535196611203e-12, "loss": 0.43816691637039185, "step": 5710 }, { "epoch": 2.9994747899159666, "grad_norm": 12.07965501739737, "learning_rate": 3.735712566510508e-12, "loss": 0.38289082050323486, "step": 5711 }, { "epoch": 3.0, "grad_norm": 10.016519800861975, "learning_rate": 9.339282286413565e-13, "loss": 0.3404456675052643, "step": 5712 }, { "epoch": 3.0, "step": 5712, "total_flos": 15197432340480.0, "train_loss": 1.6582482910574172, "train_runtime": 6482.0567, "train_samples_per_second": 3.524, "train_steps_per_second": 0.881 } ], "logging_steps": 1, "max_steps": 5712, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 15197432340480.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }