{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 1131, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9411764705882355e-06, "loss": 3.5459, "step": 1 }, { "epoch": 0.01, "learning_rate": 5.882352941176471e-06, "loss": 3.4515, "step": 2 }, { "epoch": 0.01, "learning_rate": 8.823529411764707e-06, "loss": 3.5459, "step": 3 }, { "epoch": 0.01, "learning_rate": 1.1764705882352942e-05, "loss": 3.3973, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.4705882352941177e-05, "loss": 3.3427, "step": 5 }, { "epoch": 0.02, "learning_rate": 1.7647058823529414e-05, "loss": 3.1525, "step": 6 }, { "epoch": 0.02, "learning_rate": 2.058823529411765e-05, "loss": 3.1169, "step": 7 }, { "epoch": 0.02, "learning_rate": 2.3529411764705884e-05, "loss": 3.0744, "step": 8 }, { "epoch": 0.02, "learning_rate": 2.647058823529412e-05, "loss": 2.9719, "step": 9 }, { "epoch": 0.03, "learning_rate": 2.9411764705882354e-05, "loss": 2.8901, "step": 10 }, { "epoch": 0.03, "learning_rate": 3.235294117647059e-05, "loss": 2.9138, "step": 11 }, { "epoch": 0.03, "learning_rate": 3.529411764705883e-05, "loss": 2.7916, "step": 12 }, { "epoch": 0.03, "learning_rate": 3.8235294117647055e-05, "loss": 2.7394, "step": 13 }, { "epoch": 0.04, "learning_rate": 4.11764705882353e-05, "loss": 2.6911, "step": 14 }, { "epoch": 0.04, "learning_rate": 4.411764705882353e-05, "loss": 2.6669, "step": 15 }, { "epoch": 0.04, "learning_rate": 4.705882352941177e-05, "loss": 2.7453, "step": 16 }, { "epoch": 0.05, "learning_rate": 5e-05, "loss": 2.5887, "step": 17 }, { "epoch": 0.05, "learning_rate": 5.294117647058824e-05, "loss": 2.5516, "step": 18 }, { "epoch": 0.05, "learning_rate": 5.588235294117647e-05, "loss": 2.6737, "step": 19 }, { "epoch": 0.05, "learning_rate": 5.882352941176471e-05, "loss": 2.4736, "step": 20 }, { "epoch": 0.06, "learning_rate": 6.176470588235295e-05, "loss": 2.397, "step": 21 }, { "epoch": 0.06, "learning_rate": 6.470588235294118e-05, "loss": 2.4433, "step": 22 }, { "epoch": 0.06, "learning_rate": 6.764705882352942e-05, "loss": 2.354, "step": 23 }, { "epoch": 0.06, "learning_rate": 7.058823529411765e-05, "loss": 2.4111, "step": 24 }, { "epoch": 0.07, "learning_rate": 7.352941176470589e-05, "loss": 2.3523, "step": 25 }, { "epoch": 0.07, "learning_rate": 7.647058823529411e-05, "loss": 2.3626, "step": 26 }, { "epoch": 0.07, "learning_rate": 7.941176470588235e-05, "loss": 2.2676, "step": 27 }, { "epoch": 0.07, "learning_rate": 8.23529411764706e-05, "loss": 2.403, "step": 28 }, { "epoch": 0.08, "learning_rate": 8.529411764705883e-05, "loss": 2.2134, "step": 29 }, { "epoch": 0.08, "learning_rate": 8.823529411764706e-05, "loss": 2.2442, "step": 30 }, { "epoch": 0.08, "learning_rate": 9.11764705882353e-05, "loss": 2.2206, "step": 31 }, { "epoch": 0.08, "learning_rate": 9.411764705882353e-05, "loss": 2.1881, "step": 32 }, { "epoch": 0.09, "learning_rate": 9.705882352941177e-05, "loss": 2.1922, "step": 33 }, { "epoch": 0.09, "learning_rate": 0.0001, "loss": 2.1636, "step": 34 }, { "epoch": 0.09, "learning_rate": 9.999979496585003e-05, "loss": 2.1973, "step": 35 }, { "epoch": 0.1, "learning_rate": 9.999917986508165e-05, "loss": 2.1669, "step": 36 }, { "epoch": 0.1, "learning_rate": 9.999815470273954e-05, "loss": 2.1175, "step": 37 }, { "epoch": 0.1, "learning_rate": 9.999671948723141e-05, "loss": 2.2195, "step": 38 }, { "epoch": 0.1, "learning_rate": 9.999487423032803e-05, "loss": 2.0737, "step": 39 }, { "epoch": 0.11, "learning_rate": 9.999261894716299e-05, "loss": 2.1123, "step": 40 }, { "epoch": 0.11, "learning_rate": 9.998995365623271e-05, "loss": 2.1978, "step": 41 }, { "epoch": 0.11, "learning_rate": 9.998687837939621e-05, "loss": 2.0849, "step": 42 }, { "epoch": 0.11, "learning_rate": 9.998339314187497e-05, "loss": 2.0458, "step": 43 }, { "epoch": 0.12, "learning_rate": 9.997949797225268e-05, "loss": 1.9736, "step": 44 }, { "epoch": 0.12, "learning_rate": 9.997519290247507e-05, "loss": 2.1129, "step": 45 }, { "epoch": 0.12, "learning_rate": 9.997047796784959e-05, "loss": 2.0466, "step": 46 }, { "epoch": 0.12, "learning_rate": 9.996535320704514e-05, "loss": 2.0619, "step": 47 }, { "epoch": 0.13, "learning_rate": 9.995981866209174e-05, "loss": 2.1507, "step": 48 }, { "epoch": 0.13, "learning_rate": 9.995387437838026e-05, "loss": 2.0693, "step": 49 }, { "epoch": 0.13, "learning_rate": 9.99475204046619e-05, "loss": 1.9994, "step": 50 }, { "epoch": 0.14, "learning_rate": 9.994075679304798e-05, "loss": 2.1005, "step": 51 }, { "epoch": 0.14, "learning_rate": 9.993358359900931e-05, "loss": 2.0013, "step": 52 }, { "epoch": 0.14, "learning_rate": 9.99260008813759e-05, "loss": 2.0427, "step": 53 }, { "epoch": 0.14, "learning_rate": 9.991800870233638e-05, "loss": 2.0812, "step": 54 }, { "epoch": 0.15, "learning_rate": 9.990960712743754e-05, "loss": 1.9826, "step": 55 }, { "epoch": 0.15, "learning_rate": 9.990079622558377e-05, "loss": 2.0451, "step": 56 }, { "epoch": 0.15, "learning_rate": 9.989157606903649e-05, "loss": 2.0134, "step": 57 }, { "epoch": 0.15, "learning_rate": 9.988194673341362e-05, "loss": 2.0029, "step": 58 }, { "epoch": 0.16, "learning_rate": 9.987190829768882e-05, "loss": 1.9063, "step": 59 }, { "epoch": 0.16, "learning_rate": 9.986146084419099e-05, "loss": 1.9806, "step": 60 }, { "epoch": 0.16, "learning_rate": 9.985060445860352e-05, "loss": 1.9337, "step": 61 }, { "epoch": 0.16, "learning_rate": 9.983933922996361e-05, "loss": 1.9956, "step": 62 }, { "epoch": 0.17, "learning_rate": 9.982766525066152e-05, "loss": 2.0342, "step": 63 }, { "epoch": 0.17, "learning_rate": 9.981558261643981e-05, "loss": 2.0178, "step": 64 }, { "epoch": 0.17, "learning_rate": 9.980309142639261e-05, "loss": 1.9531, "step": 65 }, { "epoch": 0.18, "learning_rate": 9.979019178296473e-05, "loss": 2.0035, "step": 66 }, { "epoch": 0.18, "learning_rate": 9.977688379195087e-05, "loss": 1.8774, "step": 67 }, { "epoch": 0.18, "learning_rate": 9.976316756249472e-05, "loss": 2.0842, "step": 68 }, { "epoch": 0.18, "learning_rate": 9.97490432070881e-05, "loss": 1.9861, "step": 69 }, { "epoch": 0.19, "learning_rate": 9.973451084157006e-05, "loss": 1.8747, "step": 70 }, { "epoch": 0.19, "learning_rate": 9.97195705851258e-05, "loss": 1.9883, "step": 71 }, { "epoch": 0.19, "learning_rate": 9.970422256028587e-05, "loss": 1.9212, "step": 72 }, { "epoch": 0.19, "learning_rate": 9.9688466892925e-05, "loss": 1.9101, "step": 73 }, { "epoch": 0.2, "learning_rate": 9.96723037122612e-05, "loss": 2.0329, "step": 74 }, { "epoch": 0.2, "learning_rate": 9.965573315085462e-05, "loss": 1.9547, "step": 75 }, { "epoch": 0.2, "learning_rate": 9.963875534460653e-05, "loss": 1.9255, "step": 76 }, { "epoch": 0.2, "learning_rate": 9.96213704327581e-05, "loss": 1.9336, "step": 77 }, { "epoch": 0.21, "learning_rate": 9.960357855788938e-05, "loss": 1.8824, "step": 78 }, { "epoch": 0.21, "learning_rate": 9.958537986591803e-05, "loss": 1.884, "step": 79 }, { "epoch": 0.21, "learning_rate": 9.95667745060982e-05, "loss": 1.9605, "step": 80 }, { "epoch": 0.21, "learning_rate": 9.954776263101924e-05, "loss": 1.8576, "step": 81 }, { "epoch": 0.22, "learning_rate": 9.95283443966045e-05, "loss": 1.9995, "step": 82 }, { "epoch": 0.22, "learning_rate": 9.950851996211004e-05, "loss": 1.944, "step": 83 }, { "epoch": 0.22, "learning_rate": 9.948828949012327e-05, "loss": 1.9854, "step": 84 }, { "epoch": 0.23, "learning_rate": 9.946765314656174e-05, "loss": 1.9635, "step": 85 }, { "epoch": 0.23, "learning_rate": 9.944661110067162e-05, "loss": 2.0009, "step": 86 }, { "epoch": 0.23, "learning_rate": 9.942516352502644e-05, "loss": 1.9422, "step": 87 }, { "epoch": 0.23, "learning_rate": 9.940331059552563e-05, "loss": 1.8891, "step": 88 }, { "epoch": 0.24, "learning_rate": 9.938105249139306e-05, "loss": 1.9399, "step": 89 }, { "epoch": 0.24, "learning_rate": 9.935838939517556e-05, "loss": 1.9283, "step": 90 }, { "epoch": 0.24, "learning_rate": 9.933532149274152e-05, "loss": 1.9132, "step": 91 }, { "epoch": 0.24, "learning_rate": 9.931184897327922e-05, "loss": 1.8997, "step": 92 }, { "epoch": 0.25, "learning_rate": 9.928797202929539e-05, "loss": 1.8664, "step": 93 }, { "epoch": 0.25, "learning_rate": 9.92636908566136e-05, "loss": 1.8697, "step": 94 }, { "epoch": 0.25, "learning_rate": 9.923900565437262e-05, "loss": 1.9275, "step": 95 }, { "epoch": 0.25, "learning_rate": 9.921391662502483e-05, "loss": 1.9188, "step": 96 }, { "epoch": 0.26, "learning_rate": 9.918842397433455e-05, "loss": 1.8527, "step": 97 }, { "epoch": 0.26, "learning_rate": 9.916252791137631e-05, "loss": 1.9087, "step": 98 }, { "epoch": 0.26, "learning_rate": 9.913622864853325e-05, "loss": 1.8689, "step": 99 }, { "epoch": 0.27, "learning_rate": 9.91095264014952e-05, "loss": 1.9366, "step": 100 }, { "epoch": 0.27, "learning_rate": 9.908242138925709e-05, "loss": 1.8494, "step": 101 }, { "epoch": 0.27, "learning_rate": 9.905491383411705e-05, "loss": 1.8945, "step": 102 }, { "epoch": 0.27, "learning_rate": 9.902700396167459e-05, "loss": 1.914, "step": 103 }, { "epoch": 0.28, "learning_rate": 9.899869200082881e-05, "loss": 1.8494, "step": 104 }, { "epoch": 0.28, "learning_rate": 9.896997818377642e-05, "loss": 1.8909, "step": 105 }, { "epoch": 0.28, "learning_rate": 9.894086274601e-05, "loss": 1.8623, "step": 106 }, { "epoch": 0.28, "learning_rate": 9.891134592631587e-05, "loss": 1.8872, "step": 107 }, { "epoch": 0.29, "learning_rate": 9.88814279667723e-05, "loss": 1.8787, "step": 108 }, { "epoch": 0.29, "learning_rate": 9.885110911274738e-05, "loss": 1.8782, "step": 109 }, { "epoch": 0.29, "learning_rate": 9.88203896128972e-05, "loss": 1.7577, "step": 110 }, { "epoch": 0.29, "learning_rate": 9.878926971916354e-05, "loss": 1.8954, "step": 111 }, { "epoch": 0.3, "learning_rate": 9.87577496867721e-05, "loss": 1.8517, "step": 112 }, { "epoch": 0.3, "learning_rate": 9.872582977423018e-05, "loss": 1.839, "step": 113 }, { "epoch": 0.3, "learning_rate": 9.869351024332467e-05, "loss": 1.8851, "step": 114 }, { "epoch": 0.31, "learning_rate": 9.866079135911986e-05, "loss": 1.854, "step": 115 }, { "epoch": 0.31, "learning_rate": 9.86276733899553e-05, "loss": 1.8384, "step": 116 }, { "epoch": 0.31, "learning_rate": 9.85941566074436e-05, "loss": 1.8775, "step": 117 }, { "epoch": 0.31, "learning_rate": 9.856024128646812e-05, "loss": 1.8111, "step": 118 }, { "epoch": 0.32, "learning_rate": 9.852592770518085e-05, "loss": 1.8075, "step": 119 }, { "epoch": 0.32, "learning_rate": 9.849121614500001e-05, "loss": 1.7952, "step": 120 }, { "epoch": 0.32, "learning_rate": 9.845610689060782e-05, "loss": 1.8371, "step": 121 }, { "epoch": 0.32, "learning_rate": 9.842060022994814e-05, "loss": 1.8761, "step": 122 }, { "epoch": 0.33, "learning_rate": 9.838469645422406e-05, "loss": 1.8242, "step": 123 }, { "epoch": 0.33, "learning_rate": 9.834839585789559e-05, "loss": 1.8772, "step": 124 }, { "epoch": 0.33, "learning_rate": 9.831169873867723e-05, "loss": 1.7998, "step": 125 }, { "epoch": 0.33, "learning_rate": 9.827460539753546e-05, "loss": 1.8881, "step": 126 }, { "epoch": 0.34, "learning_rate": 9.823711613868636e-05, "loss": 1.8629, "step": 127 }, { "epoch": 0.34, "learning_rate": 9.819923126959308e-05, "loss": 1.8018, "step": 128 }, { "epoch": 0.34, "learning_rate": 9.816095110096325e-05, "loss": 1.8126, "step": 129 }, { "epoch": 0.34, "learning_rate": 9.812227594674659e-05, "loss": 1.8217, "step": 130 }, { "epoch": 0.35, "learning_rate": 9.808320612413217e-05, "loss": 1.8537, "step": 131 }, { "epoch": 0.35, "learning_rate": 9.804374195354591e-05, "loss": 1.8028, "step": 132 }, { "epoch": 0.35, "learning_rate": 9.80038837586479e-05, "loss": 1.8954, "step": 133 }, { "epoch": 0.36, "learning_rate": 9.796363186632985e-05, "loss": 1.8818, "step": 134 }, { "epoch": 0.36, "learning_rate": 9.792298660671217e-05, "loss": 1.8965, "step": 135 }, { "epoch": 0.36, "learning_rate": 9.788194831314158e-05, "loss": 1.8414, "step": 136 }, { "epoch": 0.36, "learning_rate": 9.784051732218808e-05, "loss": 1.8456, "step": 137 }, { "epoch": 0.37, "learning_rate": 9.779869397364247e-05, "loss": 1.8479, "step": 138 }, { "epoch": 0.37, "learning_rate": 9.775647861051329e-05, "loss": 1.8176, "step": 139 }, { "epoch": 0.37, "learning_rate": 9.771387157902417e-05, "loss": 1.7994, "step": 140 }, { "epoch": 0.37, "learning_rate": 9.767087322861102e-05, "loss": 1.8153, "step": 141 }, { "epoch": 0.38, "learning_rate": 9.7627483911919e-05, "loss": 1.8048, "step": 142 }, { "epoch": 0.38, "learning_rate": 9.758370398479981e-05, "loss": 1.8491, "step": 143 }, { "epoch": 0.38, "learning_rate": 9.753953380630862e-05, "loss": 1.82, "step": 144 }, { "epoch": 0.38, "learning_rate": 9.74949737387013e-05, "loss": 1.922, "step": 145 }, { "epoch": 0.39, "learning_rate": 9.745002414743119e-05, "loss": 1.8061, "step": 146 }, { "epoch": 0.39, "learning_rate": 9.740468540114638e-05, "loss": 1.8676, "step": 147 }, { "epoch": 0.39, "learning_rate": 9.735895787168652e-05, "loss": 1.904, "step": 148 }, { "epoch": 0.4, "learning_rate": 9.73128419340798e-05, "loss": 1.7908, "step": 149 }, { "epoch": 0.4, "learning_rate": 9.726633796653994e-05, "loss": 1.8096, "step": 150 }, { "epoch": 0.4, "learning_rate": 9.721944635046297e-05, "loss": 1.8669, "step": 151 }, { "epoch": 0.4, "learning_rate": 9.717216747042419e-05, "loss": 1.7547, "step": 152 }, { "epoch": 0.41, "learning_rate": 9.712450171417502e-05, "loss": 1.7849, "step": 153 }, { "epoch": 0.41, "learning_rate": 9.707644947263976e-05, "loss": 1.8122, "step": 154 }, { "epoch": 0.41, "learning_rate": 9.702801113991243e-05, "loss": 1.768, "step": 155 }, { "epoch": 0.41, "learning_rate": 9.697918711325353e-05, "loss": 1.8519, "step": 156 }, { "epoch": 0.42, "learning_rate": 9.692997779308677e-05, "loss": 1.7329, "step": 157 }, { "epoch": 0.42, "learning_rate": 9.688038358299578e-05, "loss": 1.7725, "step": 158 }, { "epoch": 0.42, "learning_rate": 9.683040488972086e-05, "loss": 1.7678, "step": 159 }, { "epoch": 0.42, "learning_rate": 9.678004212315554e-05, "loss": 1.7351, "step": 160 }, { "epoch": 0.43, "learning_rate": 9.672929569634331e-05, "loss": 1.8248, "step": 161 }, { "epoch": 0.43, "learning_rate": 9.66781660254742e-05, "loss": 1.7674, "step": 162 }, { "epoch": 0.43, "learning_rate": 9.662665352988133e-05, "loss": 1.7685, "step": 163 }, { "epoch": 0.44, "learning_rate": 9.657475863203755e-05, "loss": 1.8122, "step": 164 }, { "epoch": 0.44, "learning_rate": 9.65224817575519e-05, "loss": 1.8858, "step": 165 }, { "epoch": 0.44, "learning_rate": 9.646982333516616e-05, "loss": 1.8532, "step": 166 }, { "epoch": 0.44, "learning_rate": 9.641678379675135e-05, "loss": 1.8341, "step": 167 }, { "epoch": 0.45, "learning_rate": 9.63633635773041e-05, "loss": 1.6986, "step": 168 }, { "epoch": 0.45, "learning_rate": 9.63095631149432e-05, "loss": 1.7467, "step": 169 }, { "epoch": 0.45, "learning_rate": 9.625538285090595e-05, "loss": 1.769, "step": 170 }, { "epoch": 0.45, "learning_rate": 9.620082322954448e-05, "loss": 1.7554, "step": 171 }, { "epoch": 0.46, "learning_rate": 9.614588469832225e-05, "loss": 1.6883, "step": 172 }, { "epoch": 0.46, "learning_rate": 9.609056770781026e-05, "loss": 1.7908, "step": 173 }, { "epoch": 0.46, "learning_rate": 9.603487271168336e-05, "loss": 1.769, "step": 174 }, { "epoch": 0.46, "learning_rate": 9.597880016671665e-05, "loss": 1.8701, "step": 175 }, { "epoch": 0.47, "learning_rate": 9.592235053278157e-05, "loss": 1.7404, "step": 176 }, { "epoch": 0.47, "learning_rate": 9.586552427284223e-05, "loss": 1.7462, "step": 177 }, { "epoch": 0.47, "learning_rate": 9.580832185295156e-05, "loss": 1.7704, "step": 178 }, { "epoch": 0.47, "learning_rate": 9.575074374224758e-05, "loss": 1.7354, "step": 179 }, { "epoch": 0.48, "learning_rate": 9.569279041294944e-05, "loss": 1.9016, "step": 180 }, { "epoch": 0.48, "learning_rate": 9.563446234035358e-05, "loss": 1.7546, "step": 181 }, { "epoch": 0.48, "learning_rate": 9.557576000282991e-05, "loss": 1.7814, "step": 182 }, { "epoch": 0.49, "learning_rate": 9.551668388181776e-05, "loss": 1.7423, "step": 183 }, { "epoch": 0.49, "learning_rate": 9.545723446182202e-05, "loss": 1.737, "step": 184 }, { "epoch": 0.49, "learning_rate": 9.539741223040915e-05, "loss": 1.7577, "step": 185 }, { "epoch": 0.49, "learning_rate": 9.533721767820317e-05, "loss": 1.7864, "step": 186 }, { "epoch": 0.5, "learning_rate": 9.527665129888161e-05, "loss": 1.7015, "step": 187 }, { "epoch": 0.5, "learning_rate": 9.521571358917153e-05, "loss": 1.7017, "step": 188 }, { "epoch": 0.5, "learning_rate": 9.51544050488454e-05, "loss": 1.7616, "step": 189 }, { "epoch": 0.5, "learning_rate": 9.509272618071699e-05, "loss": 1.7538, "step": 190 }, { "epoch": 0.51, "learning_rate": 9.503067749063726e-05, "loss": 1.8012, "step": 191 }, { "epoch": 0.51, "learning_rate": 9.496825948749024e-05, "loss": 1.7607, "step": 192 }, { "epoch": 0.51, "learning_rate": 9.490547268318881e-05, "loss": 1.7575, "step": 193 }, { "epoch": 0.51, "learning_rate": 9.484231759267054e-05, "loss": 1.7102, "step": 194 }, { "epoch": 0.52, "learning_rate": 9.477879473389345e-05, "loss": 1.7801, "step": 195 }, { "epoch": 0.52, "learning_rate": 9.471490462783175e-05, "loss": 1.7379, "step": 196 }, { "epoch": 0.52, "learning_rate": 9.465064779847156e-05, "loss": 1.718, "step": 197 }, { "epoch": 0.53, "learning_rate": 9.458602477280668e-05, "loss": 1.6832, "step": 198 }, { "epoch": 0.53, "learning_rate": 9.452103608083417e-05, "loss": 1.7995, "step": 199 }, { "epoch": 0.53, "learning_rate": 9.445568225555014e-05, "loss": 1.7036, "step": 200 }, { "epoch": 0.53, "learning_rate": 9.438996383294516e-05, "loss": 1.6973, "step": 201 }, { "epoch": 0.54, "learning_rate": 9.43238813520001e-05, "loss": 1.757, "step": 202 }, { "epoch": 0.54, "learning_rate": 9.425743535468156e-05, "loss": 1.7293, "step": 203 }, { "epoch": 0.54, "learning_rate": 9.41906263859375e-05, "loss": 1.8156, "step": 204 }, { "epoch": 0.54, "learning_rate": 9.412345499369271e-05, "loss": 1.7483, "step": 205 }, { "epoch": 0.55, "learning_rate": 9.405592172884437e-05, "loss": 1.7947, "step": 206 }, { "epoch": 0.55, "learning_rate": 9.39880271452575e-05, "loss": 1.8237, "step": 207 }, { "epoch": 0.55, "learning_rate": 9.391977179976043e-05, "loss": 1.6674, "step": 208 }, { "epoch": 0.55, "learning_rate": 9.385115625214022e-05, "loss": 1.7484, "step": 209 }, { "epoch": 0.56, "learning_rate": 9.378218106513812e-05, "loss": 1.7449, "step": 210 }, { "epoch": 0.56, "learning_rate": 9.371284680444483e-05, "loss": 1.7444, "step": 211 }, { "epoch": 0.56, "learning_rate": 9.364315403869606e-05, "loss": 1.6666, "step": 212 }, { "epoch": 0.56, "learning_rate": 9.357310333946763e-05, "loss": 1.7569, "step": 213 }, { "epoch": 0.57, "learning_rate": 9.3502695281271e-05, "loss": 1.6859, "step": 214 }, { "epoch": 0.57, "learning_rate": 9.343193044154843e-05, "loss": 1.7095, "step": 215 }, { "epoch": 0.57, "learning_rate": 9.336080940066826e-05, "loss": 1.8226, "step": 216 }, { "epoch": 0.58, "learning_rate": 9.328933274192015e-05, "loss": 1.8059, "step": 217 }, { "epoch": 0.58, "learning_rate": 9.32175010515104e-05, "loss": 1.699, "step": 218 }, { "epoch": 0.58, "learning_rate": 9.314531491855692e-05, "loss": 1.7162, "step": 219 }, { "epoch": 0.58, "learning_rate": 9.307277493508465e-05, "loss": 1.7898, "step": 220 }, { "epoch": 0.59, "learning_rate": 9.299988169602054e-05, "loss": 1.8133, "step": 221 }, { "epoch": 0.59, "learning_rate": 9.292663579918873e-05, "loss": 1.81, "step": 222 }, { "epoch": 0.59, "learning_rate": 9.285303784530558e-05, "loss": 1.694, "step": 223 }, { "epoch": 0.59, "learning_rate": 9.277908843797492e-05, "loss": 1.648, "step": 224 }, { "epoch": 0.6, "learning_rate": 9.270478818368287e-05, "loss": 1.8439, "step": 225 }, { "epoch": 0.6, "learning_rate": 9.263013769179298e-05, "loss": 1.7486, "step": 226 }, { "epoch": 0.6, "learning_rate": 9.25551375745413e-05, "loss": 1.7601, "step": 227 }, { "epoch": 0.6, "learning_rate": 9.247978844703122e-05, "loss": 1.7399, "step": 228 }, { "epoch": 0.61, "learning_rate": 9.240409092722852e-05, "loss": 1.8162, "step": 229 }, { "epoch": 0.61, "learning_rate": 9.232804563595626e-05, "loss": 1.6533, "step": 230 }, { "epoch": 0.61, "learning_rate": 9.22516531968897e-05, "loss": 1.7488, "step": 231 }, { "epoch": 0.62, "learning_rate": 9.217491423655123e-05, "loss": 1.7544, "step": 232 }, { "epoch": 0.62, "learning_rate": 9.209782938430509e-05, "loss": 1.6406, "step": 233 }, { "epoch": 0.62, "learning_rate": 9.202039927235241e-05, "loss": 1.7158, "step": 234 }, { "epoch": 0.62, "learning_rate": 9.194262453572586e-05, "loss": 1.7827, "step": 235 }, { "epoch": 0.63, "learning_rate": 9.186450581228454e-05, "loss": 1.6567, "step": 236 }, { "epoch": 0.63, "learning_rate": 9.178604374270867e-05, "loss": 1.7305, "step": 237 }, { "epoch": 0.63, "learning_rate": 9.170723897049439e-05, "loss": 1.7544, "step": 238 }, { "epoch": 0.63, "learning_rate": 9.162809214194851e-05, "loss": 1.7247, "step": 239 }, { "epoch": 0.64, "learning_rate": 9.154860390618313e-05, "loss": 1.8192, "step": 240 }, { "epoch": 0.64, "learning_rate": 9.146877491511035e-05, "loss": 1.7016, "step": 241 }, { "epoch": 0.64, "learning_rate": 9.138860582343696e-05, "loss": 1.7377, "step": 242 }, { "epoch": 0.64, "learning_rate": 9.130809728865901e-05, "loss": 1.6459, "step": 243 }, { "epoch": 0.65, "learning_rate": 9.122724997105647e-05, "loss": 1.7161, "step": 244 }, { "epoch": 0.65, "learning_rate": 9.114606453368779e-05, "loss": 1.6868, "step": 245 }, { "epoch": 0.65, "learning_rate": 9.106454164238442e-05, "loss": 1.7086, "step": 246 }, { "epoch": 0.66, "learning_rate": 9.098268196574546e-05, "loss": 1.7164, "step": 247 }, { "epoch": 0.66, "learning_rate": 9.090048617513207e-05, "loss": 1.6877, "step": 248 }, { "epoch": 0.66, "learning_rate": 9.081795494466201e-05, "loss": 1.6701, "step": 249 }, { "epoch": 0.66, "learning_rate": 9.073508895120411e-05, "loss": 1.7393, "step": 250 }, { "epoch": 0.67, "learning_rate": 9.065188887437273e-05, "loss": 1.746, "step": 251 }, { "epoch": 0.67, "learning_rate": 9.056835539652211e-05, "loss": 1.79, "step": 252 }, { "epoch": 0.67, "learning_rate": 9.048448920274088e-05, "loss": 1.6791, "step": 253 }, { "epoch": 0.67, "learning_rate": 9.040029098084643e-05, "loss": 1.6771, "step": 254 }, { "epoch": 0.68, "learning_rate": 9.031576142137919e-05, "loss": 1.644, "step": 255 }, { "epoch": 0.68, "learning_rate": 9.023090121759699e-05, "loss": 1.7242, "step": 256 }, { "epoch": 0.68, "learning_rate": 9.01457110654694e-05, "loss": 1.7745, "step": 257 }, { "epoch": 0.68, "learning_rate": 9.006019166367208e-05, "loss": 1.7381, "step": 258 }, { "epoch": 0.69, "learning_rate": 8.997434371358093e-05, "loss": 1.6923, "step": 259 }, { "epoch": 0.69, "learning_rate": 8.98881679192664e-05, "loss": 1.8049, "step": 260 }, { "epoch": 0.69, "learning_rate": 8.980166498748774e-05, "loss": 1.6683, "step": 261 }, { "epoch": 0.69, "learning_rate": 8.971483562768712e-05, "loss": 1.7033, "step": 262 }, { "epoch": 0.7, "learning_rate": 8.962768055198394e-05, "loss": 1.761, "step": 263 }, { "epoch": 0.7, "learning_rate": 8.954020047516884e-05, "loss": 1.7824, "step": 264 }, { "epoch": 0.7, "learning_rate": 8.945239611469796e-05, "loss": 1.725, "step": 265 }, { "epoch": 0.71, "learning_rate": 8.9364268190687e-05, "loss": 1.6417, "step": 266 }, { "epoch": 0.71, "learning_rate": 8.927581742590533e-05, "loss": 1.7119, "step": 267 }, { "epoch": 0.71, "learning_rate": 8.918704454577003e-05, "loss": 1.7466, "step": 268 }, { "epoch": 0.71, "learning_rate": 8.909795027833998e-05, "loss": 1.6963, "step": 269 }, { "epoch": 0.72, "learning_rate": 8.900853535430986e-05, "loss": 1.7345, "step": 270 }, { "epoch": 0.72, "learning_rate": 8.891880050700424e-05, "loss": 1.6779, "step": 271 }, { "epoch": 0.72, "learning_rate": 8.882874647237138e-05, "loss": 1.6923, "step": 272 }, { "epoch": 0.72, "learning_rate": 8.873837398897742e-05, "loss": 1.6592, "step": 273 }, { "epoch": 0.73, "learning_rate": 8.864768379800016e-05, "loss": 1.6333, "step": 274 }, { "epoch": 0.73, "learning_rate": 8.855667664322307e-05, "loss": 1.7154, "step": 275 }, { "epoch": 0.73, "learning_rate": 8.846535327102909e-05, "loss": 1.7901, "step": 276 }, { "epoch": 0.73, "learning_rate": 8.837371443039466e-05, "loss": 1.6907, "step": 277 }, { "epoch": 0.74, "learning_rate": 8.828176087288345e-05, "loss": 1.7244, "step": 278 }, { "epoch": 0.74, "learning_rate": 8.818949335264021e-05, "loss": 1.7037, "step": 279 }, { "epoch": 0.74, "learning_rate": 8.809691262638467e-05, "loss": 1.6272, "step": 280 }, { "epoch": 0.75, "learning_rate": 8.800401945340523e-05, "loss": 1.6574, "step": 281 }, { "epoch": 0.75, "learning_rate": 8.791081459555281e-05, "loss": 1.6544, "step": 282 }, { "epoch": 0.75, "learning_rate": 8.781729881723458e-05, "loss": 1.6271, "step": 283 }, { "epoch": 0.75, "learning_rate": 8.772347288540763e-05, "loss": 1.7392, "step": 284 }, { "epoch": 0.76, "learning_rate": 8.762933756957281e-05, "loss": 1.6172, "step": 285 }, { "epoch": 0.76, "learning_rate": 8.753489364176826e-05, "loss": 1.7241, "step": 286 }, { "epoch": 0.76, "learning_rate": 8.744014187656321e-05, "loss": 1.726, "step": 287 }, { "epoch": 0.76, "learning_rate": 8.734508305105158e-05, "loss": 1.699, "step": 288 }, { "epoch": 0.77, "learning_rate": 8.724971794484556e-05, "loss": 1.6371, "step": 289 }, { "epoch": 0.77, "learning_rate": 8.715404734006931e-05, "loss": 1.7337, "step": 290 }, { "epoch": 0.77, "learning_rate": 8.705807202135248e-05, "loss": 1.6385, "step": 291 }, { "epoch": 0.77, "learning_rate": 8.69617927758238e-05, "loss": 1.7023, "step": 292 }, { "epoch": 0.78, "learning_rate": 8.686521039310454e-05, "loss": 1.6796, "step": 293 }, { "epoch": 0.78, "learning_rate": 8.676832566530221e-05, "loss": 1.7157, "step": 294 }, { "epoch": 0.78, "learning_rate": 8.667113938700396e-05, "loss": 1.6873, "step": 295 }, { "epoch": 0.79, "learning_rate": 8.657365235526995e-05, "loss": 1.7194, "step": 296 }, { "epoch": 0.79, "learning_rate": 8.647586536962707e-05, "loss": 1.7695, "step": 297 }, { "epoch": 0.79, "learning_rate": 8.637777923206215e-05, "loss": 1.6464, "step": 298 }, { "epoch": 0.79, "learning_rate": 8.62793947470155e-05, "loss": 1.7462, "step": 299 }, { "epoch": 0.8, "learning_rate": 8.618071272137431e-05, "loss": 1.6386, "step": 300 }, { "epoch": 0.8, "learning_rate": 8.608173396446598e-05, "loss": 1.6692, "step": 301 }, { "epoch": 0.8, "learning_rate": 8.598245928805152e-05, "loss": 1.7241, "step": 302 }, { "epoch": 0.8, "learning_rate": 8.588288950631889e-05, "loss": 1.744, "step": 303 }, { "epoch": 0.81, "learning_rate": 8.578302543587631e-05, "loss": 1.6958, "step": 304 }, { "epoch": 0.81, "learning_rate": 8.568286789574557e-05, "loss": 1.7288, "step": 305 }, { "epoch": 0.81, "learning_rate": 8.558241770735531e-05, "loss": 1.7376, "step": 306 }, { "epoch": 0.81, "learning_rate": 8.548167569453429e-05, "loss": 1.668, "step": 307 }, { "epoch": 0.82, "learning_rate": 8.538064268350465e-05, "loss": 1.6949, "step": 308 }, { "epoch": 0.82, "learning_rate": 8.527931950287507e-05, "loss": 1.645, "step": 309 }, { "epoch": 0.82, "learning_rate": 8.517770698363404e-05, "loss": 1.6848, "step": 310 }, { "epoch": 0.82, "learning_rate": 8.507580595914303e-05, "loss": 1.7163, "step": 311 }, { "epoch": 0.83, "learning_rate": 8.497361726512965e-05, "loss": 1.7366, "step": 312 }, { "epoch": 0.83, "learning_rate": 8.487114173968074e-05, "loss": 1.7858, "step": 313 }, { "epoch": 0.83, "learning_rate": 8.476838022323561e-05, "loss": 1.6975, "step": 314 }, { "epoch": 0.84, "learning_rate": 8.466533355857908e-05, "loss": 1.7549, "step": 315 }, { "epoch": 0.84, "learning_rate": 8.456200259083454e-05, "loss": 1.6796, "step": 316 }, { "epoch": 0.84, "learning_rate": 8.445838816745709e-05, "loss": 1.6895, "step": 317 }, { "epoch": 0.84, "learning_rate": 8.435449113822655e-05, "loss": 1.6524, "step": 318 }, { "epoch": 0.85, "learning_rate": 8.425031235524046e-05, "loss": 1.7097, "step": 319 }, { "epoch": 0.85, "learning_rate": 8.414585267290715e-05, "loss": 1.7021, "step": 320 }, { "epoch": 0.85, "learning_rate": 8.404111294793873e-05, "loss": 1.7239, "step": 321 }, { "epoch": 0.85, "learning_rate": 8.393609403934398e-05, "loss": 1.6201, "step": 322 }, { "epoch": 0.86, "learning_rate": 8.383079680842145e-05, "loss": 1.6921, "step": 323 }, { "epoch": 0.86, "learning_rate": 8.372522211875224e-05, "loss": 1.6285, "step": 324 }, { "epoch": 0.86, "learning_rate": 8.361937083619304e-05, "loss": 1.692, "step": 325 }, { "epoch": 0.86, "learning_rate": 8.351324382886895e-05, "loss": 1.7094, "step": 326 }, { "epoch": 0.87, "learning_rate": 8.340684196716639e-05, "loss": 1.661, "step": 327 }, { "epoch": 0.87, "learning_rate": 8.330016612372599e-05, "loss": 1.6573, "step": 328 }, { "epoch": 0.87, "learning_rate": 8.319321717343535e-05, "loss": 1.7666, "step": 329 }, { "epoch": 0.88, "learning_rate": 8.308599599342202e-05, "loss": 1.6458, "step": 330 }, { "epoch": 0.88, "learning_rate": 8.297850346304608e-05, "loss": 1.6689, "step": 331 }, { "epoch": 0.88, "learning_rate": 8.287074046389312e-05, "loss": 1.6694, "step": 332 }, { "epoch": 0.88, "learning_rate": 8.276270787976696e-05, "loss": 1.7342, "step": 333 }, { "epoch": 0.89, "learning_rate": 8.265440659668236e-05, "loss": 1.7041, "step": 334 }, { "epoch": 0.89, "learning_rate": 8.254583750285776e-05, "loss": 1.707, "step": 335 }, { "epoch": 0.89, "learning_rate": 8.243700148870805e-05, "loss": 1.6359, "step": 336 }, { "epoch": 0.89, "learning_rate": 8.232789944683723e-05, "loss": 1.6944, "step": 337 }, { "epoch": 0.9, "learning_rate": 8.221853227203106e-05, "loss": 1.6221, "step": 338 }, { "epoch": 0.9, "learning_rate": 8.210890086124977e-05, "loss": 1.6485, "step": 339 }, { "epoch": 0.9, "learning_rate": 8.199900611362068e-05, "loss": 1.6927, "step": 340 }, { "epoch": 0.9, "learning_rate": 8.188884893043083e-05, "loss": 1.71, "step": 341 }, { "epoch": 0.91, "learning_rate": 8.177843021511962e-05, "loss": 1.6721, "step": 342 }, { "epoch": 0.91, "learning_rate": 8.166775087327133e-05, "loss": 1.7052, "step": 343 }, { "epoch": 0.91, "learning_rate": 8.155681181260777e-05, "loss": 1.679, "step": 344 }, { "epoch": 0.92, "learning_rate": 8.144561394298075e-05, "loss": 1.6976, "step": 345 }, { "epoch": 0.92, "learning_rate": 8.133415817636471e-05, "loss": 1.591, "step": 346 }, { "epoch": 0.92, "learning_rate": 8.12224454268492e-05, "loss": 1.7302, "step": 347 }, { "epoch": 0.92, "learning_rate": 8.111047661063136e-05, "loss": 1.649, "step": 348 }, { "epoch": 0.93, "learning_rate": 8.099825264600842e-05, "loss": 1.7271, "step": 349 }, { "epoch": 0.93, "learning_rate": 8.08857744533702e-05, "loss": 1.7033, "step": 350 }, { "epoch": 0.93, "learning_rate": 8.077304295519151e-05, "loss": 1.6853, "step": 351 }, { "epoch": 0.93, "learning_rate": 8.066005907602465e-05, "loss": 1.6198, "step": 352 }, { "epoch": 0.94, "learning_rate": 8.054682374249174e-05, "loss": 1.5788, "step": 353 }, { "epoch": 0.94, "learning_rate": 8.04333378832772e-05, "loss": 1.6358, "step": 354 }, { "epoch": 0.94, "learning_rate": 8.031960242912011e-05, "loss": 1.6205, "step": 355 }, { "epoch": 0.94, "learning_rate": 8.020561831280654e-05, "loss": 1.6251, "step": 356 }, { "epoch": 0.95, "learning_rate": 8.009138646916196e-05, "loss": 1.6325, "step": 357 }, { "epoch": 0.95, "learning_rate": 7.997690783504353e-05, "loss": 1.5752, "step": 358 }, { "epoch": 0.95, "learning_rate": 7.986218334933241e-05, "loss": 1.702, "step": 359 }, { "epoch": 0.95, "learning_rate": 7.97472139529261e-05, "loss": 1.6434, "step": 360 }, { "epoch": 0.96, "learning_rate": 7.963200058873072e-05, "loss": 1.6503, "step": 361 }, { "epoch": 0.96, "learning_rate": 7.951654420165323e-05, "loss": 1.6811, "step": 362 }, { "epoch": 0.96, "learning_rate": 7.940084573859369e-05, "loss": 1.6883, "step": 363 }, { "epoch": 0.97, "learning_rate": 7.928490614843757e-05, "loss": 1.6747, "step": 364 }, { "epoch": 0.97, "learning_rate": 7.916872638204788e-05, "loss": 1.6585, "step": 365 }, { "epoch": 0.97, "learning_rate": 7.90523073922574e-05, "loss": 1.6598, "step": 366 }, { "epoch": 0.97, "learning_rate": 7.893565013386087e-05, "loss": 1.6732, "step": 367 }, { "epoch": 0.98, "learning_rate": 7.881875556360717e-05, "loss": 1.6139, "step": 368 }, { "epoch": 0.98, "learning_rate": 7.870162464019144e-05, "loss": 1.7143, "step": 369 }, { "epoch": 0.98, "learning_rate": 7.858425832424728e-05, "loss": 1.6749, "step": 370 }, { "epoch": 0.98, "learning_rate": 7.846665757833878e-05, "loss": 1.7282, "step": 371 }, { "epoch": 0.99, "learning_rate": 7.83488233669527e-05, "loss": 1.6329, "step": 372 }, { "epoch": 0.99, "learning_rate": 7.823075665649056e-05, "loss": 1.6273, "step": 373 }, { "epoch": 0.99, "learning_rate": 7.811245841526063e-05, "loss": 1.6262, "step": 374 }, { "epoch": 0.99, "learning_rate": 7.79939296134701e-05, "loss": 1.6977, "step": 375 }, { "epoch": 1.0, "learning_rate": 7.787517122321706e-05, "loss": 1.735, "step": 376 }, { "epoch": 1.0, "learning_rate": 7.775618421848252e-05, "loss": 1.6294, "step": 377 }, { "epoch": 1.0, "learning_rate": 7.763696957512246e-05, "loss": 1.5115, "step": 378 }, { "epoch": 1.01, "learning_rate": 7.75175282708598e-05, "loss": 1.5511, "step": 379 }, { "epoch": 1.01, "learning_rate": 7.739786128527643e-05, "loss": 1.6208, "step": 380 }, { "epoch": 1.01, "learning_rate": 7.727796959980504e-05, "loss": 1.5682, "step": 381 }, { "epoch": 1.01, "learning_rate": 7.715785419772126e-05, "loss": 1.5706, "step": 382 }, { "epoch": 1.02, "learning_rate": 7.703751606413542e-05, "loss": 1.6126, "step": 383 }, { "epoch": 1.02, "learning_rate": 7.691695618598467e-05, "loss": 1.6065, "step": 384 }, { "epoch": 1.02, "learning_rate": 7.679617555202463e-05, "loss": 1.5688, "step": 385 }, { "epoch": 1.02, "learning_rate": 7.667517515282152e-05, "loss": 1.5788, "step": 386 }, { "epoch": 1.03, "learning_rate": 7.655395598074389e-05, "loss": 1.513, "step": 387 }, { "epoch": 1.03, "learning_rate": 7.643251902995452e-05, "loss": 1.5044, "step": 388 }, { "epoch": 1.03, "learning_rate": 7.63108652964023e-05, "loss": 1.5667, "step": 389 }, { "epoch": 1.03, "learning_rate": 7.618899577781404e-05, "loss": 1.5765, "step": 390 }, { "epoch": 1.04, "learning_rate": 7.606691147368627e-05, "loss": 1.5661, "step": 391 }, { "epoch": 1.04, "learning_rate": 7.594461338527701e-05, "loss": 1.5763, "step": 392 }, { "epoch": 1.04, "learning_rate": 7.582210251559769e-05, "loss": 1.5253, "step": 393 }, { "epoch": 1.05, "learning_rate": 7.569937986940477e-05, "loss": 1.5982, "step": 394 }, { "epoch": 1.05, "learning_rate": 7.557644645319158e-05, "loss": 1.5583, "step": 395 }, { "epoch": 1.05, "learning_rate": 7.545330327518007e-05, "loss": 1.488, "step": 396 }, { "epoch": 1.05, "learning_rate": 7.532995134531251e-05, "loss": 1.5368, "step": 397 }, { "epoch": 1.06, "learning_rate": 7.520639167524322e-05, "loss": 1.5863, "step": 398 }, { "epoch": 1.06, "learning_rate": 7.508262527833029e-05, "loss": 1.6736, "step": 399 }, { "epoch": 1.06, "learning_rate": 7.495865316962723e-05, "loss": 1.5957, "step": 400 }, { "epoch": 1.06, "learning_rate": 7.483447636587467e-05, "loss": 1.5553, "step": 401 }, { "epoch": 1.07, "learning_rate": 7.471009588549205e-05, "loss": 1.5217, "step": 402 }, { "epoch": 1.07, "learning_rate": 7.458551274856918e-05, "loss": 1.5806, "step": 403 }, { "epoch": 1.07, "learning_rate": 7.4460727976858e-05, "loss": 1.6075, "step": 404 }, { "epoch": 1.07, "learning_rate": 7.433574259376407e-05, "loss": 1.5302, "step": 405 }, { "epoch": 1.08, "learning_rate": 7.421055762433826e-05, "loss": 1.4965, "step": 406 }, { "epoch": 1.08, "learning_rate": 7.408517409526835e-05, "loss": 1.6272, "step": 407 }, { "epoch": 1.08, "learning_rate": 7.39595930348705e-05, "loss": 1.5668, "step": 408 }, { "epoch": 1.08, "learning_rate": 7.3833815473081e-05, "loss": 1.5652, "step": 409 }, { "epoch": 1.09, "learning_rate": 7.370784244144762e-05, "loss": 1.5885, "step": 410 }, { "epoch": 1.09, "learning_rate": 7.358167497312134e-05, "loss": 1.5324, "step": 411 }, { "epoch": 1.09, "learning_rate": 7.345531410284774e-05, "loss": 1.6304, "step": 412 }, { "epoch": 1.1, "learning_rate": 7.332876086695855e-05, "loss": 1.5931, "step": 413 }, { "epoch": 1.1, "learning_rate": 7.320201630336318e-05, "loss": 1.5992, "step": 414 }, { "epoch": 1.1, "learning_rate": 7.307508145154019e-05, "loss": 1.5467, "step": 415 }, { "epoch": 1.1, "learning_rate": 7.294795735252875e-05, "loss": 1.5775, "step": 416 }, { "epoch": 1.11, "learning_rate": 7.282064504892015e-05, "loss": 1.5119, "step": 417 }, { "epoch": 1.11, "learning_rate": 7.269314558484914e-05, "loss": 1.5829, "step": 418 }, { "epoch": 1.11, "learning_rate": 7.256546000598551e-05, "loss": 1.6211, "step": 419 }, { "epoch": 1.11, "learning_rate": 7.243758935952547e-05, "loss": 1.5241, "step": 420 }, { "epoch": 1.12, "learning_rate": 7.230953469418292e-05, "loss": 1.5521, "step": 421 }, { "epoch": 1.12, "learning_rate": 7.218129706018108e-05, "loss": 1.5349, "step": 422 }, { "epoch": 1.12, "learning_rate": 7.205287750924372e-05, "loss": 1.5815, "step": 423 }, { "epoch": 1.12, "learning_rate": 7.192427709458656e-05, "loss": 1.5188, "step": 424 }, { "epoch": 1.13, "learning_rate": 7.179549687090867e-05, "loss": 1.5987, "step": 425 }, { "epoch": 1.13, "learning_rate": 7.166653789438382e-05, "loss": 1.5643, "step": 426 }, { "epoch": 1.13, "learning_rate": 7.153740122265176e-05, "loss": 1.5052, "step": 427 }, { "epoch": 1.14, "learning_rate": 7.140808791480959e-05, "loss": 1.6092, "step": 428 }, { "epoch": 1.14, "learning_rate": 7.127859903140311e-05, "loss": 1.5671, "step": 429 }, { "epoch": 1.14, "learning_rate": 7.114893563441802e-05, "loss": 1.5004, "step": 430 }, { "epoch": 1.14, "learning_rate": 7.101909878727128e-05, "loss": 1.5558, "step": 431 }, { "epoch": 1.15, "learning_rate": 7.088908955480244e-05, "loss": 1.5113, "step": 432 }, { "epoch": 1.15, "learning_rate": 7.075890900326475e-05, "loss": 1.6546, "step": 433 }, { "epoch": 1.15, "learning_rate": 7.062855820031659e-05, "loss": 1.5282, "step": 434 }, { "epoch": 1.15, "learning_rate": 7.049803821501259e-05, "loss": 1.5285, "step": 435 }, { "epoch": 1.16, "learning_rate": 7.036735011779492e-05, "loss": 1.5854, "step": 436 }, { "epoch": 1.16, "learning_rate": 7.023649498048451e-05, "loss": 1.6048, "step": 437 }, { "epoch": 1.16, "learning_rate": 7.01054738762722e-05, "loss": 1.5618, "step": 438 }, { "epoch": 1.16, "learning_rate": 6.997428787971005e-05, "loss": 1.6191, "step": 439 }, { "epoch": 1.17, "learning_rate": 6.984293806670244e-05, "loss": 1.5588, "step": 440 }, { "epoch": 1.17, "learning_rate": 6.971142551449725e-05, "loss": 1.6202, "step": 441 }, { "epoch": 1.17, "learning_rate": 6.957975130167705e-05, "loss": 1.607, "step": 442 }, { "epoch": 1.18, "learning_rate": 6.944791650815023e-05, "loss": 1.554, "step": 443 }, { "epoch": 1.18, "learning_rate": 6.931592221514222e-05, "loss": 1.6057, "step": 444 }, { "epoch": 1.18, "learning_rate": 6.91837695051865e-05, "loss": 1.5725, "step": 445 }, { "epoch": 1.18, "learning_rate": 6.905145946211583e-05, "loss": 1.5788, "step": 446 }, { "epoch": 1.19, "learning_rate": 6.891899317105329e-05, "loss": 1.5324, "step": 447 }, { "epoch": 1.19, "learning_rate": 6.878637171840343e-05, "loss": 1.5962, "step": 448 }, { "epoch": 1.19, "learning_rate": 6.865359619184331e-05, "loss": 1.5458, "step": 449 }, { "epoch": 1.19, "learning_rate": 6.85206676803136e-05, "loss": 1.6023, "step": 450 }, { "epoch": 1.2, "learning_rate": 6.83875872740097e-05, "loss": 1.5291, "step": 451 }, { "epoch": 1.2, "learning_rate": 6.825435606437273e-05, "loss": 1.5929, "step": 452 }, { "epoch": 1.2, "learning_rate": 6.81209751440806e-05, "loss": 1.5424, "step": 453 }, { "epoch": 1.2, "learning_rate": 6.798744560703905e-05, "loss": 1.5881, "step": 454 }, { "epoch": 1.21, "learning_rate": 6.785376854837268e-05, "loss": 1.4747, "step": 455 }, { "epoch": 1.21, "learning_rate": 6.771994506441597e-05, "loss": 1.5215, "step": 456 }, { "epoch": 1.21, "learning_rate": 6.758597625270433e-05, "loss": 1.465, "step": 457 }, { "epoch": 1.21, "learning_rate": 6.745186321196495e-05, "loss": 1.5071, "step": 458 }, { "epoch": 1.22, "learning_rate": 6.731760704210802e-05, "loss": 1.4882, "step": 459 }, { "epoch": 1.22, "learning_rate": 6.718320884421751e-05, "loss": 1.5905, "step": 460 }, { "epoch": 1.22, "learning_rate": 6.704866972054223e-05, "loss": 1.5922, "step": 461 }, { "epoch": 1.23, "learning_rate": 6.691399077448677e-05, "loss": 1.5448, "step": 462 }, { "epoch": 1.23, "learning_rate": 6.677917311060246e-05, "loss": 1.5675, "step": 463 }, { "epoch": 1.23, "learning_rate": 6.66442178345783e-05, "loss": 1.6005, "step": 464 }, { "epoch": 1.23, "learning_rate": 6.650912605323194e-05, "loss": 1.6179, "step": 465 }, { "epoch": 1.24, "learning_rate": 6.637389887450045e-05, "loss": 1.5711, "step": 466 }, { "epoch": 1.24, "learning_rate": 6.623853740743146e-05, "loss": 1.6179, "step": 467 }, { "epoch": 1.24, "learning_rate": 6.610304276217392e-05, "loss": 1.6407, "step": 468 }, { "epoch": 1.24, "learning_rate": 6.596741604996897e-05, "loss": 1.6296, "step": 469 }, { "epoch": 1.25, "learning_rate": 6.583165838314095e-05, "loss": 1.6393, "step": 470 }, { "epoch": 1.25, "learning_rate": 6.569577087508814e-05, "loss": 1.5851, "step": 471 }, { "epoch": 1.25, "learning_rate": 6.555975464027375e-05, "loss": 1.5772, "step": 472 }, { "epoch": 1.25, "learning_rate": 6.542361079421669e-05, "loss": 1.5792, "step": 473 }, { "epoch": 1.26, "learning_rate": 6.528734045348248e-05, "loss": 1.5866, "step": 474 }, { "epoch": 1.26, "learning_rate": 6.515094473567407e-05, "loss": 1.5141, "step": 475 }, { "epoch": 1.26, "learning_rate": 6.501442475942265e-05, "loss": 1.5783, "step": 476 }, { "epoch": 1.27, "learning_rate": 6.48777816443785e-05, "loss": 1.5052, "step": 477 }, { "epoch": 1.27, "learning_rate": 6.474101651120184e-05, "loss": 1.5681, "step": 478 }, { "epoch": 1.27, "learning_rate": 6.460413048155355e-05, "loss": 1.6441, "step": 479 }, { "epoch": 1.27, "learning_rate": 6.446712467808608e-05, "loss": 1.5737, "step": 480 }, { "epoch": 1.28, "learning_rate": 6.433000022443419e-05, "loss": 1.5541, "step": 481 }, { "epoch": 1.28, "learning_rate": 6.419275824520568e-05, "loss": 1.5573, "step": 482 }, { "epoch": 1.28, "learning_rate": 6.405539986597225e-05, "loss": 1.5178, "step": 483 }, { "epoch": 1.28, "learning_rate": 6.391792621326027e-05, "loss": 1.5345, "step": 484 }, { "epoch": 1.29, "learning_rate": 6.378033841454147e-05, "loss": 1.6092, "step": 485 }, { "epoch": 1.29, "learning_rate": 6.364263759822371e-05, "loss": 1.5439, "step": 486 }, { "epoch": 1.29, "learning_rate": 6.350482489364186e-05, "loss": 1.547, "step": 487 }, { "epoch": 1.29, "learning_rate": 6.336690143104827e-05, "loss": 1.5803, "step": 488 }, { "epoch": 1.3, "learning_rate": 6.322886834160378e-05, "loss": 1.584, "step": 489 }, { "epoch": 1.3, "learning_rate": 6.309072675736827e-05, "loss": 1.46, "step": 490 }, { "epoch": 1.3, "learning_rate": 6.29524778112914e-05, "loss": 1.5754, "step": 491 }, { "epoch": 1.31, "learning_rate": 6.281412263720344e-05, "loss": 1.5056, "step": 492 }, { "epoch": 1.31, "learning_rate": 6.267566236980574e-05, "loss": 1.5539, "step": 493 }, { "epoch": 1.31, "learning_rate": 6.253709814466168e-05, "loss": 1.5229, "step": 494 }, { "epoch": 1.31, "learning_rate": 6.239843109818716e-05, "loss": 1.4894, "step": 495 }, { "epoch": 1.32, "learning_rate": 6.22596623676414e-05, "loss": 1.5337, "step": 496 }, { "epoch": 1.32, "learning_rate": 6.212079309111753e-05, "loss": 1.592, "step": 497 }, { "epoch": 1.32, "learning_rate": 6.19818244075333e-05, "loss": 1.4937, "step": 498 }, { "epoch": 1.32, "learning_rate": 6.18427574566218e-05, "loss": 1.5862, "step": 499 }, { "epoch": 1.33, "learning_rate": 6.170359337892194e-05, "loss": 1.5252, "step": 500 }, { "epoch": 1.33, "learning_rate": 6.156433331576927e-05, "loss": 1.5639, "step": 501 }, { "epoch": 1.33, "learning_rate": 6.142497840928656e-05, "loss": 1.5306, "step": 502 }, { "epoch": 1.33, "learning_rate": 6.128552980237437e-05, "loss": 1.6537, "step": 503 }, { "epoch": 1.34, "learning_rate": 6.114598863870177e-05, "loss": 1.4589, "step": 504 }, { "epoch": 1.34, "learning_rate": 6.100635606269694e-05, "loss": 1.5472, "step": 505 }, { "epoch": 1.34, "learning_rate": 6.0866633219537694e-05, "loss": 1.5372, "step": 506 }, { "epoch": 1.34, "learning_rate": 6.0726821255142255e-05, "loss": 1.4852, "step": 507 }, { "epoch": 1.35, "learning_rate": 6.058692131615968e-05, "loss": 1.509, "step": 508 }, { "epoch": 1.35, "learning_rate": 6.04469345499606e-05, "loss": 1.5736, "step": 509 }, { "epoch": 1.35, "learning_rate": 6.0306862104627705e-05, "loss": 1.5348, "step": 510 }, { "epoch": 1.36, "learning_rate": 6.0166705128946375e-05, "loss": 1.5519, "step": 511 }, { "epoch": 1.36, "learning_rate": 6.00264647723953e-05, "loss": 1.5526, "step": 512 }, { "epoch": 1.36, "learning_rate": 5.988614218513693e-05, "loss": 1.5908, "step": 513 }, { "epoch": 1.36, "learning_rate": 5.974573851800818e-05, "loss": 1.5455, "step": 514 }, { "epoch": 1.37, "learning_rate": 5.9605254922510926e-05, "loss": 1.5317, "step": 515 }, { "epoch": 1.37, "learning_rate": 5.946469255080251e-05, "loss": 1.5962, "step": 516 }, { "epoch": 1.37, "learning_rate": 5.9324052555686436e-05, "loss": 1.6437, "step": 517 }, { "epoch": 1.37, "learning_rate": 5.918333609060276e-05, "loss": 1.5306, "step": 518 }, { "epoch": 1.38, "learning_rate": 5.9042544309618694e-05, "loss": 1.5289, "step": 519 }, { "epoch": 1.38, "learning_rate": 5.890167836741919e-05, "loss": 1.5338, "step": 520 }, { "epoch": 1.38, "learning_rate": 5.8760739419297384e-05, "loss": 1.6154, "step": 521 }, { "epoch": 1.38, "learning_rate": 5.861972862114518e-05, "loss": 1.5108, "step": 522 }, { "epoch": 1.39, "learning_rate": 5.847864712944373e-05, "loss": 1.5818, "step": 523 }, { "epoch": 1.39, "learning_rate": 5.833749610125402e-05, "loss": 1.6317, "step": 524 }, { "epoch": 1.39, "learning_rate": 5.819627669420724e-05, "loss": 1.5724, "step": 525 }, { "epoch": 1.4, "learning_rate": 5.805499006649547e-05, "loss": 1.5023, "step": 526 }, { "epoch": 1.4, "learning_rate": 5.791363737686205e-05, "loss": 1.5374, "step": 527 }, { "epoch": 1.4, "learning_rate": 5.7772219784592105e-05, "loss": 1.5141, "step": 528 }, { "epoch": 1.4, "learning_rate": 5.76307384495031e-05, "loss": 1.6443, "step": 529 }, { "epoch": 1.41, "learning_rate": 5.748919453193521e-05, "loss": 1.5954, "step": 530 }, { "epoch": 1.41, "learning_rate": 5.734758919274192e-05, "loss": 1.6019, "step": 531 }, { "epoch": 1.41, "learning_rate": 5.720592359328047e-05, "loss": 1.6241, "step": 532 }, { "epoch": 1.41, "learning_rate": 5.706419889540225e-05, "loss": 1.5813, "step": 533 }, { "epoch": 1.42, "learning_rate": 5.69224162614434e-05, "loss": 1.518, "step": 534 }, { "epoch": 1.42, "learning_rate": 5.6780576854215195e-05, "loss": 1.5473, "step": 535 }, { "epoch": 1.42, "learning_rate": 5.6638681836994535e-05, "loss": 1.6277, "step": 536 }, { "epoch": 1.42, "learning_rate": 5.649673237351436e-05, "loss": 1.6213, "step": 537 }, { "epoch": 1.43, "learning_rate": 5.6354729627954195e-05, "loss": 1.5182, "step": 538 }, { "epoch": 1.43, "learning_rate": 5.621267476493053e-05, "loss": 1.6186, "step": 539 }, { "epoch": 1.43, "learning_rate": 5.607056894948728e-05, "loss": 1.5195, "step": 540 }, { "epoch": 1.44, "learning_rate": 5.592841334708624e-05, "loss": 1.5293, "step": 541 }, { "epoch": 1.44, "learning_rate": 5.578620912359758e-05, "loss": 1.6225, "step": 542 }, { "epoch": 1.44, "learning_rate": 5.564395744529012e-05, "loss": 1.5548, "step": 543 }, { "epoch": 1.44, "learning_rate": 5.5501659478821964e-05, "loss": 1.556, "step": 544 }, { "epoch": 1.45, "learning_rate": 5.535931639123083e-05, "loss": 1.4946, "step": 545 }, { "epoch": 1.45, "learning_rate": 5.521692934992447e-05, "loss": 1.5343, "step": 546 }, { "epoch": 1.45, "learning_rate": 5.5074499522671106e-05, "loss": 1.5353, "step": 547 }, { "epoch": 1.45, "learning_rate": 5.493202807758992e-05, "loss": 1.5644, "step": 548 }, { "epoch": 1.46, "learning_rate": 5.478951618314133e-05, "loss": 1.4671, "step": 549 }, { "epoch": 1.46, "learning_rate": 5.464696500811757e-05, "loss": 1.553, "step": 550 }, { "epoch": 1.46, "learning_rate": 5.450437572163298e-05, "loss": 1.5658, "step": 551 }, { "epoch": 1.46, "learning_rate": 5.4361749493114514e-05, "loss": 1.5448, "step": 552 }, { "epoch": 1.47, "learning_rate": 5.4219087492292054e-05, "loss": 1.5305, "step": 553 }, { "epoch": 1.47, "learning_rate": 5.407639088918888e-05, "loss": 1.5567, "step": 554 }, { "epoch": 1.47, "learning_rate": 5.3933660854112075e-05, "loss": 1.5312, "step": 555 }, { "epoch": 1.47, "learning_rate": 5.37908985576429e-05, "loss": 1.4669, "step": 556 }, { "epoch": 1.48, "learning_rate": 5.364810517062717e-05, "loss": 1.6714, "step": 557 }, { "epoch": 1.48, "learning_rate": 5.350528186416573e-05, "loss": 1.5867, "step": 558 }, { "epoch": 1.48, "learning_rate": 5.3362429809604806e-05, "loss": 1.5232, "step": 559 }, { "epoch": 1.49, "learning_rate": 5.321955017852637e-05, "loss": 1.5636, "step": 560 }, { "epoch": 1.49, "learning_rate": 5.307664414273855e-05, "loss": 1.4686, "step": 561 }, { "epoch": 1.49, "learning_rate": 5.2933712874266084e-05, "loss": 1.5301, "step": 562 }, { "epoch": 1.49, "learning_rate": 5.2790757545340586e-05, "loss": 1.5631, "step": 563 }, { "epoch": 1.5, "learning_rate": 5.2647779328391045e-05, "loss": 1.5805, "step": 564 }, { "epoch": 1.5, "learning_rate": 5.2504779396034146e-05, "loss": 1.6171, "step": 565 }, { "epoch": 1.5, "learning_rate": 5.236175892106467e-05, "loss": 1.5264, "step": 566 }, { "epoch": 1.5, "learning_rate": 5.221871907644589e-05, "loss": 1.5189, "step": 567 }, { "epoch": 1.51, "learning_rate": 5.207566103529991e-05, "loss": 1.5974, "step": 568 }, { "epoch": 1.51, "learning_rate": 5.1932585970898096e-05, "loss": 1.5221, "step": 569 }, { "epoch": 1.51, "learning_rate": 5.17894950566514e-05, "loss": 1.5471, "step": 570 }, { "epoch": 1.51, "learning_rate": 5.1646389466100795e-05, "loss": 1.521, "step": 571 }, { "epoch": 1.52, "learning_rate": 5.150327037290761e-05, "loss": 1.5258, "step": 572 }, { "epoch": 1.52, "learning_rate": 5.136013895084388e-05, "loss": 1.4685, "step": 573 }, { "epoch": 1.52, "learning_rate": 5.121699637378282e-05, "loss": 1.5678, "step": 574 }, { "epoch": 1.53, "learning_rate": 5.107384381568907e-05, "loss": 1.4684, "step": 575 }, { "epoch": 1.53, "learning_rate": 5.093068245060917e-05, "loss": 1.4688, "step": 576 }, { "epoch": 1.53, "learning_rate": 5.0787513452661864e-05, "loss": 1.566, "step": 577 }, { "epoch": 1.53, "learning_rate": 5.064433799602849e-05, "loss": 1.5323, "step": 578 }, { "epoch": 1.54, "learning_rate": 5.05011572549434e-05, "loss": 1.581, "step": 579 }, { "epoch": 1.54, "learning_rate": 5.0357972403684225e-05, "loss": 1.5065, "step": 580 }, { "epoch": 1.54, "learning_rate": 5.021478461656235e-05, "loss": 1.5708, "step": 581 }, { "epoch": 1.54, "learning_rate": 5.007159506791325e-05, "loss": 1.5121, "step": 582 }, { "epoch": 1.55, "learning_rate": 4.992840493208676e-05, "loss": 1.5743, "step": 583 }, { "epoch": 1.55, "learning_rate": 4.9785215383437646e-05, "loss": 1.5861, "step": 584 }, { "epoch": 1.55, "learning_rate": 4.9642027596315786e-05, "loss": 1.5671, "step": 585 }, { "epoch": 1.55, "learning_rate": 4.949884274505661e-05, "loss": 1.5105, "step": 586 }, { "epoch": 1.56, "learning_rate": 4.935566200397152e-05, "loss": 1.5658, "step": 587 }, { "epoch": 1.56, "learning_rate": 4.921248654733814e-05, "loss": 1.5483, "step": 588 }, { "epoch": 1.56, "learning_rate": 4.906931754939084e-05, "loss": 1.567, "step": 589 }, { "epoch": 1.56, "learning_rate": 4.8926156184310946e-05, "loss": 1.5763, "step": 590 }, { "epoch": 1.57, "learning_rate": 4.878300362621719e-05, "loss": 1.5044, "step": 591 }, { "epoch": 1.57, "learning_rate": 4.8639861049156136e-05, "loss": 1.5653, "step": 592 }, { "epoch": 1.57, "learning_rate": 4.8496729627092405e-05, "loss": 1.5588, "step": 593 }, { "epoch": 1.58, "learning_rate": 4.835361053389922e-05, "loss": 1.4821, "step": 594 }, { "epoch": 1.58, "learning_rate": 4.821050494334861e-05, "loss": 1.6273, "step": 595 }, { "epoch": 1.58, "learning_rate": 4.806741402910193e-05, "loss": 1.4818, "step": 596 }, { "epoch": 1.58, "learning_rate": 4.7924338964700096e-05, "loss": 1.4659, "step": 597 }, { "epoch": 1.59, "learning_rate": 4.778128092355412e-05, "loss": 1.5297, "step": 598 }, { "epoch": 1.59, "learning_rate": 4.7638241078935325e-05, "loss": 1.585, "step": 599 }, { "epoch": 1.59, "learning_rate": 4.7495220603965866e-05, "loss": 1.4958, "step": 600 }, { "epoch": 1.59, "learning_rate": 4.735222067160896e-05, "loss": 1.5098, "step": 601 }, { "epoch": 1.6, "learning_rate": 4.720924245465943e-05, "loss": 1.6065, "step": 602 }, { "epoch": 1.6, "learning_rate": 4.706628712573394e-05, "loss": 1.5091, "step": 603 }, { "epoch": 1.6, "learning_rate": 4.6923355857261455e-05, "loss": 1.4611, "step": 604 }, { "epoch": 1.6, "learning_rate": 4.678044982147365e-05, "loss": 1.5287, "step": 605 }, { "epoch": 1.61, "learning_rate": 4.6637570190395205e-05, "loss": 1.5573, "step": 606 }, { "epoch": 1.61, "learning_rate": 4.649471813583427e-05, "loss": 1.6371, "step": 607 }, { "epoch": 1.61, "learning_rate": 4.635189482937284e-05, "loss": 1.5336, "step": 608 }, { "epoch": 1.62, "learning_rate": 4.620910144235712e-05, "loss": 1.5559, "step": 609 }, { "epoch": 1.62, "learning_rate": 4.606633914588793e-05, "loss": 1.5399, "step": 610 }, { "epoch": 1.62, "learning_rate": 4.592360911081113e-05, "loss": 1.487, "step": 611 }, { "epoch": 1.62, "learning_rate": 4.5780912507707944e-05, "loss": 1.5583, "step": 612 }, { "epoch": 1.63, "learning_rate": 4.563825050688549e-05, "loss": 1.5271, "step": 613 }, { "epoch": 1.63, "learning_rate": 4.549562427836701e-05, "loss": 1.5934, "step": 614 }, { "epoch": 1.63, "learning_rate": 4.535303499188244e-05, "loss": 1.5261, "step": 615 }, { "epoch": 1.63, "learning_rate": 4.5210483816858676e-05, "loss": 1.6577, "step": 616 }, { "epoch": 1.64, "learning_rate": 4.506797192241009e-05, "loss": 1.4575, "step": 617 }, { "epoch": 1.64, "learning_rate": 4.49255004773289e-05, "loss": 1.4948, "step": 618 }, { "epoch": 1.64, "learning_rate": 4.478307065007554e-05, "loss": 1.4523, "step": 619 }, { "epoch": 1.64, "learning_rate": 4.464068360876919e-05, "loss": 1.6135, "step": 620 }, { "epoch": 1.65, "learning_rate": 4.449834052117804e-05, "loss": 1.5568, "step": 621 }, { "epoch": 1.65, "learning_rate": 4.4356042554709905e-05, "loss": 1.5823, "step": 622 }, { "epoch": 1.65, "learning_rate": 4.421379087640244e-05, "loss": 1.664, "step": 623 }, { "epoch": 1.66, "learning_rate": 4.407158665291377e-05, "loss": 1.5322, "step": 624 }, { "epoch": 1.66, "learning_rate": 4.3929431050512727e-05, "loss": 1.5811, "step": 625 }, { "epoch": 1.66, "learning_rate": 4.3787325235069487e-05, "loss": 1.5768, "step": 626 }, { "epoch": 1.66, "learning_rate": 4.36452703720458e-05, "loss": 1.5219, "step": 627 }, { "epoch": 1.67, "learning_rate": 4.350326762648565e-05, "loss": 1.5525, "step": 628 }, { "epoch": 1.67, "learning_rate": 4.3361318163005484e-05, "loss": 1.477, "step": 629 }, { "epoch": 1.67, "learning_rate": 4.321942314578482e-05, "loss": 1.524, "step": 630 }, { "epoch": 1.67, "learning_rate": 4.307758373855661e-05, "loss": 1.5741, "step": 631 }, { "epoch": 1.68, "learning_rate": 4.293580110459776e-05, "loss": 1.531, "step": 632 }, { "epoch": 1.68, "learning_rate": 4.279407640671956e-05, "loss": 1.5424, "step": 633 }, { "epoch": 1.68, "learning_rate": 4.265241080725808e-05, "loss": 1.471, "step": 634 }, { "epoch": 1.68, "learning_rate": 4.251080546806481e-05, "loss": 1.5149, "step": 635 }, { "epoch": 1.69, "learning_rate": 4.2369261550496905e-05, "loss": 1.5289, "step": 636 }, { "epoch": 1.69, "learning_rate": 4.22277802154079e-05, "loss": 1.455, "step": 637 }, { "epoch": 1.69, "learning_rate": 4.2086362623137955e-05, "loss": 1.5351, "step": 638 }, { "epoch": 1.69, "learning_rate": 4.194500993350454e-05, "loss": 1.5747, "step": 639 }, { "epoch": 1.7, "learning_rate": 4.180372330579276e-05, "loss": 1.5356, "step": 640 }, { "epoch": 1.7, "learning_rate": 4.1662503898745994e-05, "loss": 1.4969, "step": 641 }, { "epoch": 1.7, "learning_rate": 4.1521352870556266e-05, "loss": 1.5077, "step": 642 }, { "epoch": 1.71, "learning_rate": 4.1380271378854833e-05, "loss": 1.5598, "step": 643 }, { "epoch": 1.71, "learning_rate": 4.1239260580702635e-05, "loss": 1.5431, "step": 644 }, { "epoch": 1.71, "learning_rate": 4.1098321632580824e-05, "loss": 1.5806, "step": 645 }, { "epoch": 1.71, "learning_rate": 4.095745569038133e-05, "loss": 1.4687, "step": 646 }, { "epoch": 1.72, "learning_rate": 4.0816663909397256e-05, "loss": 1.534, "step": 647 }, { "epoch": 1.72, "learning_rate": 4.067594744431358e-05, "loss": 1.5602, "step": 648 }, { "epoch": 1.72, "learning_rate": 4.053530744919749e-05, "loss": 1.5434, "step": 649 }, { "epoch": 1.72, "learning_rate": 4.03947450774891e-05, "loss": 1.4529, "step": 650 }, { "epoch": 1.73, "learning_rate": 4.0254261481991825e-05, "loss": 1.5127, "step": 651 }, { "epoch": 1.73, "learning_rate": 4.011385781486308e-05, "loss": 1.5195, "step": 652 }, { "epoch": 1.73, "learning_rate": 3.9973535227604714e-05, "loss": 1.5714, "step": 653 }, { "epoch": 1.73, "learning_rate": 3.983329487105364e-05, "loss": 1.5864, "step": 654 }, { "epoch": 1.74, "learning_rate": 3.96931378953723e-05, "loss": 1.5457, "step": 655 }, { "epoch": 1.74, "learning_rate": 3.955306545003941e-05, "loss": 1.5544, "step": 656 }, { "epoch": 1.74, "learning_rate": 3.941307868384034e-05, "loss": 1.5802, "step": 657 }, { "epoch": 1.75, "learning_rate": 3.927317874485776e-05, "loss": 1.4793, "step": 658 }, { "epoch": 1.75, "learning_rate": 3.9133366780462325e-05, "loss": 1.5746, "step": 659 }, { "epoch": 1.75, "learning_rate": 3.899364393730308e-05, "loss": 1.5031, "step": 660 }, { "epoch": 1.75, "learning_rate": 3.8854011361298246e-05, "loss": 1.5029, "step": 661 }, { "epoch": 1.76, "learning_rate": 3.871447019762564e-05, "loss": 1.4952, "step": 662 }, { "epoch": 1.76, "learning_rate": 3.857502159071346e-05, "loss": 1.556, "step": 663 }, { "epoch": 1.76, "learning_rate": 3.843566668423073e-05, "loss": 1.5939, "step": 664 }, { "epoch": 1.76, "learning_rate": 3.829640662107807e-05, "loss": 1.5231, "step": 665 }, { "epoch": 1.77, "learning_rate": 3.8157242543378205e-05, "loss": 1.543, "step": 666 }, { "epoch": 1.77, "learning_rate": 3.8018175592466695e-05, "loss": 1.5051, "step": 667 }, { "epoch": 1.77, "learning_rate": 3.787920690888248e-05, "loss": 1.4483, "step": 668 }, { "epoch": 1.77, "learning_rate": 3.7740337632358616e-05, "loss": 1.5926, "step": 669 }, { "epoch": 1.78, "learning_rate": 3.760156890181283e-05, "loss": 1.5499, "step": 670 }, { "epoch": 1.78, "learning_rate": 3.746290185533833e-05, "loss": 1.6084, "step": 671 }, { "epoch": 1.78, "learning_rate": 3.732433763019428e-05, "loss": 1.4915, "step": 672 }, { "epoch": 1.79, "learning_rate": 3.718587736279658e-05, "loss": 1.5149, "step": 673 }, { "epoch": 1.79, "learning_rate": 3.704752218870861e-05, "loss": 1.5557, "step": 674 }, { "epoch": 1.79, "learning_rate": 3.690927324263175e-05, "loss": 1.4818, "step": 675 }, { "epoch": 1.79, "learning_rate": 3.677113165839623e-05, "loss": 1.4723, "step": 676 }, { "epoch": 1.8, "learning_rate": 3.663309856895174e-05, "loss": 1.4855, "step": 677 }, { "epoch": 1.8, "learning_rate": 3.6495175106358154e-05, "loss": 1.5185, "step": 678 }, { "epoch": 1.8, "learning_rate": 3.6357362401776277e-05, "loss": 1.5155, "step": 679 }, { "epoch": 1.8, "learning_rate": 3.621966158545855e-05, "loss": 1.5517, "step": 680 }, { "epoch": 1.81, "learning_rate": 3.608207378673973e-05, "loss": 1.4894, "step": 681 }, { "epoch": 1.81, "learning_rate": 3.594460013402775e-05, "loss": 1.4591, "step": 682 }, { "epoch": 1.81, "learning_rate": 3.580724175479432e-05, "loss": 1.5306, "step": 683 }, { "epoch": 1.81, "learning_rate": 3.566999977556582e-05, "loss": 1.4702, "step": 684 }, { "epoch": 1.82, "learning_rate": 3.5532875321913935e-05, "loss": 1.6138, "step": 685 }, { "epoch": 1.82, "learning_rate": 3.5395869518446464e-05, "loss": 1.4431, "step": 686 }, { "epoch": 1.82, "learning_rate": 3.525898348879819e-05, "loss": 1.6268, "step": 687 }, { "epoch": 1.82, "learning_rate": 3.5122218355621514e-05, "loss": 1.5443, "step": 688 }, { "epoch": 1.83, "learning_rate": 3.4985575240577365e-05, "loss": 1.6156, "step": 689 }, { "epoch": 1.83, "learning_rate": 3.484905526432594e-05, "loss": 1.5196, "step": 690 }, { "epoch": 1.83, "learning_rate": 3.471265954651752e-05, "loss": 1.5153, "step": 691 }, { "epoch": 1.84, "learning_rate": 3.457638920578331e-05, "loss": 1.5637, "step": 692 }, { "epoch": 1.84, "learning_rate": 3.4440245359726266e-05, "loss": 1.555, "step": 693 }, { "epoch": 1.84, "learning_rate": 3.4304229124911856e-05, "loss": 1.5495, "step": 694 }, { "epoch": 1.84, "learning_rate": 3.416834161685907e-05, "loss": 1.5596, "step": 695 }, { "epoch": 1.85, "learning_rate": 3.403258395003102e-05, "loss": 1.5496, "step": 696 }, { "epoch": 1.85, "learning_rate": 3.389695723782609e-05, "loss": 1.5649, "step": 697 }, { "epoch": 1.85, "learning_rate": 3.376146259256855e-05, "loss": 1.5552, "step": 698 }, { "epoch": 1.85, "learning_rate": 3.3626101125499555e-05, "loss": 1.5355, "step": 699 }, { "epoch": 1.86, "learning_rate": 3.349087394676809e-05, "loss": 1.5022, "step": 700 }, { "epoch": 1.86, "learning_rate": 3.33557821654217e-05, "loss": 1.527, "step": 701 }, { "epoch": 1.86, "learning_rate": 3.322082688939755e-05, "loss": 1.5452, "step": 702 }, { "epoch": 1.86, "learning_rate": 3.308600922551324e-05, "loss": 1.5208, "step": 703 }, { "epoch": 1.87, "learning_rate": 3.295133027945778e-05, "loss": 1.47, "step": 704 }, { "epoch": 1.87, "learning_rate": 3.281679115578249e-05, "loss": 1.5202, "step": 705 }, { "epoch": 1.87, "learning_rate": 3.2682392957891985e-05, "loss": 1.4507, "step": 706 }, { "epoch": 1.88, "learning_rate": 3.254813678803504e-05, "loss": 1.6117, "step": 707 }, { "epoch": 1.88, "learning_rate": 3.241402374729569e-05, "loss": 1.6149, "step": 708 }, { "epoch": 1.88, "learning_rate": 3.2280054935584025e-05, "loss": 1.5947, "step": 709 }, { "epoch": 1.88, "learning_rate": 3.2146231451627334e-05, "loss": 1.5165, "step": 710 }, { "epoch": 1.89, "learning_rate": 3.2012554392960966e-05, "loss": 1.4893, "step": 711 }, { "epoch": 1.89, "learning_rate": 3.187902485591941e-05, "loss": 1.6028, "step": 712 }, { "epoch": 1.89, "learning_rate": 3.174564393562728e-05, "loss": 1.5429, "step": 713 }, { "epoch": 1.89, "learning_rate": 3.161241272599031e-05, "loss": 1.5214, "step": 714 }, { "epoch": 1.9, "learning_rate": 3.147933231968642e-05, "loss": 1.5541, "step": 715 }, { "epoch": 1.9, "learning_rate": 3.1346403808156713e-05, "loss": 1.5747, "step": 716 }, { "epoch": 1.9, "learning_rate": 3.121362828159659e-05, "loss": 1.5768, "step": 717 }, { "epoch": 1.9, "learning_rate": 3.108100682894671e-05, "loss": 1.6119, "step": 718 }, { "epoch": 1.91, "learning_rate": 3.094854053788418e-05, "loss": 1.577, "step": 719 }, { "epoch": 1.91, "learning_rate": 3.08162304948135e-05, "loss": 1.5888, "step": 720 }, { "epoch": 1.91, "learning_rate": 3.06840777848578e-05, "loss": 1.5093, "step": 721 }, { "epoch": 1.92, "learning_rate": 3.055208349184977e-05, "loss": 1.4787, "step": 722 }, { "epoch": 1.92, "learning_rate": 3.0420248698322973e-05, "loss": 1.5513, "step": 723 }, { "epoch": 1.92, "learning_rate": 3.0288574485502757e-05, "loss": 1.594, "step": 724 }, { "epoch": 1.92, "learning_rate": 3.015706193329757e-05, "loss": 1.5548, "step": 725 }, { "epoch": 1.93, "learning_rate": 3.002571212028995e-05, "loss": 1.5783, "step": 726 }, { "epoch": 1.93, "learning_rate": 2.9894526123727808e-05, "loss": 1.5001, "step": 727 }, { "epoch": 1.93, "learning_rate": 2.9763505019515525e-05, "loss": 1.5542, "step": 728 }, { "epoch": 1.93, "learning_rate": 2.9632649882205088e-05, "loss": 1.5134, "step": 729 }, { "epoch": 1.94, "learning_rate": 2.950196178498743e-05, "loss": 1.5232, "step": 730 }, { "epoch": 1.94, "learning_rate": 2.937144179968342e-05, "loss": 1.4753, "step": 731 }, { "epoch": 1.94, "learning_rate": 2.9241090996735266e-05, "loss": 1.4371, "step": 732 }, { "epoch": 1.94, "learning_rate": 2.911091044519757e-05, "loss": 1.5026, "step": 733 }, { "epoch": 1.95, "learning_rate": 2.8980901212728728e-05, "loss": 1.5565, "step": 734 }, { "epoch": 1.95, "learning_rate": 2.8851064365581982e-05, "loss": 1.509, "step": 735 }, { "epoch": 1.95, "learning_rate": 2.8721400968596903e-05, "loss": 1.5417, "step": 736 }, { "epoch": 1.95, "learning_rate": 2.8591912085190392e-05, "loss": 1.4827, "step": 737 }, { "epoch": 1.96, "learning_rate": 2.8462598777348247e-05, "loss": 1.5347, "step": 738 }, { "epoch": 1.96, "learning_rate": 2.8333462105616194e-05, "loss": 1.5072, "step": 739 }, { "epoch": 1.96, "learning_rate": 2.820450312909134e-05, "loss": 1.4506, "step": 740 }, { "epoch": 1.97, "learning_rate": 2.807572290541346e-05, "loss": 1.5673, "step": 741 }, { "epoch": 1.97, "learning_rate": 2.79471224907563e-05, "loss": 1.5108, "step": 742 }, { "epoch": 1.97, "learning_rate": 2.781870293981893e-05, "loss": 1.4845, "step": 743 }, { "epoch": 1.97, "learning_rate": 2.7690465305817088e-05, "loss": 1.5846, "step": 744 }, { "epoch": 1.98, "learning_rate": 2.756241064047456e-05, "loss": 1.5504, "step": 745 }, { "epoch": 1.98, "learning_rate": 2.7434539994014475e-05, "loss": 1.5451, "step": 746 }, { "epoch": 1.98, "learning_rate": 2.730685441515088e-05, "loss": 1.4817, "step": 747 }, { "epoch": 1.98, "learning_rate": 2.7179354951079856e-05, "loss": 1.4819, "step": 748 }, { "epoch": 1.99, "learning_rate": 2.7052042647471252e-05, "loss": 1.5487, "step": 749 }, { "epoch": 1.99, "learning_rate": 2.69249185484598e-05, "loss": 1.4851, "step": 750 }, { "epoch": 1.99, "learning_rate": 2.679798369663683e-05, "loss": 1.5208, "step": 751 }, { "epoch": 1.99, "learning_rate": 2.667123913304146e-05, "loss": 1.536, "step": 752 }, { "epoch": 2.0, "learning_rate": 2.6544685897152272e-05, "loss": 1.5505, "step": 753 }, { "epoch": 2.0, "learning_rate": 2.6418325026878665e-05, "loss": 1.6026, "step": 754 }, { "epoch": 2.0, "learning_rate": 2.629215755855239e-05, "loss": 1.4181, "step": 755 }, { "epoch": 2.01, "learning_rate": 2.6166184526919047e-05, "loss": 1.4751, "step": 756 }, { "epoch": 2.01, "learning_rate": 2.6040406965129515e-05, "loss": 1.4894, "step": 757 }, { "epoch": 2.01, "learning_rate": 2.5914825904731686e-05, "loss": 1.5007, "step": 758 }, { "epoch": 2.01, "learning_rate": 2.5789442375661744e-05, "loss": 1.372, "step": 759 }, { "epoch": 2.02, "learning_rate": 2.5664257406235955e-05, "loss": 1.4389, "step": 760 }, { "epoch": 2.02, "learning_rate": 2.5539272023141995e-05, "loss": 1.4259, "step": 761 }, { "epoch": 2.02, "learning_rate": 2.541448725143083e-05, "loss": 1.4355, "step": 762 }, { "epoch": 2.02, "learning_rate": 2.5289904114507946e-05, "loss": 1.4497, "step": 763 }, { "epoch": 2.03, "learning_rate": 2.516552363412534e-05, "loss": 1.4206, "step": 764 }, { "epoch": 2.03, "learning_rate": 2.504134683037278e-05, "loss": 1.481, "step": 765 }, { "epoch": 2.03, "learning_rate": 2.491737472166972e-05, "loss": 1.4599, "step": 766 }, { "epoch": 2.03, "learning_rate": 2.479360832475679e-05, "loss": 1.5219, "step": 767 }, { "epoch": 2.04, "learning_rate": 2.46700486546875e-05, "loss": 1.4852, "step": 768 }, { "epoch": 2.04, "learning_rate": 2.4546696724819963e-05, "loss": 1.4385, "step": 769 }, { "epoch": 2.04, "learning_rate": 2.4423553546808427e-05, "loss": 1.4962, "step": 770 }, { "epoch": 2.05, "learning_rate": 2.430062013059526e-05, "loss": 1.441, "step": 771 }, { "epoch": 2.05, "learning_rate": 2.4177897484402306e-05, "loss": 1.4178, "step": 772 }, { "epoch": 2.05, "learning_rate": 2.4055386614722996e-05, "loss": 1.499, "step": 773 }, { "epoch": 2.05, "learning_rate": 2.393308852631373e-05, "loss": 1.4574, "step": 774 }, { "epoch": 2.06, "learning_rate": 2.381100422218596e-05, "loss": 1.4838, "step": 775 }, { "epoch": 2.06, "learning_rate": 2.3689134703597706e-05, "loss": 1.479, "step": 776 }, { "epoch": 2.06, "learning_rate": 2.3567480970045492e-05, "loss": 1.5401, "step": 777 }, { "epoch": 2.06, "learning_rate": 2.344604401925613e-05, "loss": 1.4839, "step": 778 }, { "epoch": 2.07, "learning_rate": 2.3324824847178494e-05, "loss": 1.4536, "step": 779 }, { "epoch": 2.07, "learning_rate": 2.3203824447975392e-05, "loss": 1.3847, "step": 780 }, { "epoch": 2.07, "learning_rate": 2.308304381401534e-05, "loss": 1.4686, "step": 781 }, { "epoch": 2.07, "learning_rate": 2.296248393586459e-05, "loss": 1.4785, "step": 782 }, { "epoch": 2.08, "learning_rate": 2.284214580227875e-05, "loss": 1.4651, "step": 783 }, { "epoch": 2.08, "learning_rate": 2.2722030400194976e-05, "loss": 1.4577, "step": 784 }, { "epoch": 2.08, "learning_rate": 2.2602138714723574e-05, "loss": 1.4656, "step": 785 }, { "epoch": 2.08, "learning_rate": 2.24824717291402e-05, "loss": 1.4736, "step": 786 }, { "epoch": 2.09, "learning_rate": 2.2363030424877535e-05, "loss": 1.4946, "step": 787 }, { "epoch": 2.09, "learning_rate": 2.2243815781517496e-05, "loss": 1.4902, "step": 788 }, { "epoch": 2.09, "learning_rate": 2.2124828776782957e-05, "loss": 1.3805, "step": 789 }, { "epoch": 2.1, "learning_rate": 2.2006070386529913e-05, "loss": 1.4926, "step": 790 }, { "epoch": 2.1, "learning_rate": 2.1887541584739385e-05, "loss": 1.4136, "step": 791 }, { "epoch": 2.1, "learning_rate": 2.1769243343509454e-05, "loss": 1.4177, "step": 792 }, { "epoch": 2.1, "learning_rate": 2.165117663304732e-05, "loss": 1.4555, "step": 793 }, { "epoch": 2.11, "learning_rate": 2.153334242166123e-05, "loss": 1.4362, "step": 794 }, { "epoch": 2.11, "learning_rate": 2.1415741675752742e-05, "loss": 1.4483, "step": 795 }, { "epoch": 2.11, "learning_rate": 2.129837535980856e-05, "loss": 1.3899, "step": 796 }, { "epoch": 2.11, "learning_rate": 2.1181244436392855e-05, "loss": 1.521, "step": 797 }, { "epoch": 2.12, "learning_rate": 2.1064349866139132e-05, "loss": 1.4221, "step": 798 }, { "epoch": 2.12, "learning_rate": 2.094769260774262e-05, "loss": 1.4968, "step": 799 }, { "epoch": 2.12, "learning_rate": 2.0831273617952136e-05, "loss": 1.4631, "step": 800 }, { "epoch": 2.12, "learning_rate": 2.071509385156244e-05, "loss": 1.4571, "step": 801 }, { "epoch": 2.13, "learning_rate": 2.0599154261406316e-05, "loss": 1.4922, "step": 802 }, { "epoch": 2.13, "learning_rate": 2.0483455798346786e-05, "loss": 1.4316, "step": 803 }, { "epoch": 2.13, "learning_rate": 2.0367999411269285e-05, "loss": 1.4226, "step": 804 }, { "epoch": 2.14, "learning_rate": 2.0252786047073895e-05, "loss": 1.4586, "step": 805 }, { "epoch": 2.14, "learning_rate": 2.0137816650667612e-05, "loss": 1.4131, "step": 806 }, { "epoch": 2.14, "learning_rate": 2.0023092164956474e-05, "loss": 1.4782, "step": 807 }, { "epoch": 2.14, "learning_rate": 1.9908613530838055e-05, "loss": 1.4648, "step": 808 }, { "epoch": 2.15, "learning_rate": 1.979438168719346e-05, "loss": 1.4328, "step": 809 }, { "epoch": 2.15, "learning_rate": 1.968039757087991e-05, "loss": 1.4804, "step": 810 }, { "epoch": 2.15, "learning_rate": 1.9566662116722793e-05, "loss": 1.5185, "step": 811 }, { "epoch": 2.15, "learning_rate": 1.9453176257508275e-05, "loss": 1.418, "step": 812 }, { "epoch": 2.16, "learning_rate": 1.9339940923975364e-05, "loss": 1.5342, "step": 813 }, { "epoch": 2.16, "learning_rate": 1.9226957044808497e-05, "loss": 1.4951, "step": 814 }, { "epoch": 2.16, "learning_rate": 1.911422554662981e-05, "loss": 1.5001, "step": 815 }, { "epoch": 2.16, "learning_rate": 1.9001747353991582e-05, "loss": 1.4289, "step": 816 }, { "epoch": 2.17, "learning_rate": 1.888952338936864e-05, "loss": 1.4779, "step": 817 }, { "epoch": 2.17, "learning_rate": 1.8777554573150795e-05, "loss": 1.4541, "step": 818 }, { "epoch": 2.17, "learning_rate": 1.8665841823635284e-05, "loss": 1.3708, "step": 819 }, { "epoch": 2.18, "learning_rate": 1.855438605701925e-05, "loss": 1.4434, "step": 820 }, { "epoch": 2.18, "learning_rate": 1.8443188187392257e-05, "loss": 1.4388, "step": 821 }, { "epoch": 2.18, "learning_rate": 1.8332249126728666e-05, "loss": 1.543, "step": 822 }, { "epoch": 2.18, "learning_rate": 1.8221569784880397e-05, "loss": 1.4487, "step": 823 }, { "epoch": 2.19, "learning_rate": 1.811115106956918e-05, "loss": 1.4323, "step": 824 }, { "epoch": 2.19, "learning_rate": 1.8000993886379342e-05, "loss": 1.4424, "step": 825 }, { "epoch": 2.19, "learning_rate": 1.789109913875025e-05, "loss": 1.3609, "step": 826 }, { "epoch": 2.19, "learning_rate": 1.7781467727968953e-05, "loss": 1.4008, "step": 827 }, { "epoch": 2.2, "learning_rate": 1.7672100553162774e-05, "loss": 1.4308, "step": 828 }, { "epoch": 2.2, "learning_rate": 1.7562998511291946e-05, "loss": 1.5258, "step": 829 }, { "epoch": 2.2, "learning_rate": 1.745416249714224e-05, "loss": 1.4535, "step": 830 }, { "epoch": 2.2, "learning_rate": 1.734559340331765e-05, "loss": 1.4607, "step": 831 }, { "epoch": 2.21, "learning_rate": 1.7237292120233044e-05, "loss": 1.3692, "step": 832 }, { "epoch": 2.21, "learning_rate": 1.7129259536106885e-05, "loss": 1.4383, "step": 833 }, { "epoch": 2.21, "learning_rate": 1.702149653695395e-05, "loss": 1.3952, "step": 834 }, { "epoch": 2.21, "learning_rate": 1.691400400657799e-05, "loss": 1.3934, "step": 835 }, { "epoch": 2.22, "learning_rate": 1.6806782826564654e-05, "loss": 1.4273, "step": 836 }, { "epoch": 2.22, "learning_rate": 1.6699833876274028e-05, "loss": 1.4847, "step": 837 }, { "epoch": 2.22, "learning_rate": 1.6593158032833624e-05, "loss": 1.4369, "step": 838 }, { "epoch": 2.23, "learning_rate": 1.6486756171131063e-05, "loss": 1.4289, "step": 839 }, { "epoch": 2.23, "learning_rate": 1.638062916380697e-05, "loss": 1.4509, "step": 840 }, { "epoch": 2.23, "learning_rate": 1.627477788124776e-05, "loss": 1.4375, "step": 841 }, { "epoch": 2.23, "learning_rate": 1.6169203191578557e-05, "loss": 1.5603, "step": 842 }, { "epoch": 2.24, "learning_rate": 1.606390596065602e-05, "loss": 1.5002, "step": 843 }, { "epoch": 2.24, "learning_rate": 1.5958887052061283e-05, "loss": 1.4442, "step": 844 }, { "epoch": 2.24, "learning_rate": 1.5854147327092855e-05, "loss": 1.4966, "step": 845 }, { "epoch": 2.24, "learning_rate": 1.5749687644759552e-05, "loss": 1.4576, "step": 846 }, { "epoch": 2.25, "learning_rate": 1.564550886177348e-05, "loss": 1.3977, "step": 847 }, { "epoch": 2.25, "learning_rate": 1.5541611832542925e-05, "loss": 1.5316, "step": 848 }, { "epoch": 2.25, "learning_rate": 1.5437997409165478e-05, "loss": 1.434, "step": 849 }, { "epoch": 2.25, "learning_rate": 1.533466644142095e-05, "loss": 1.449, "step": 850 }, { "epoch": 2.26, "learning_rate": 1.523161977676441e-05, "loss": 1.4726, "step": 851 }, { "epoch": 2.26, "learning_rate": 1.5128858260319285e-05, "loss": 1.4609, "step": 852 }, { "epoch": 2.26, "learning_rate": 1.5026382734870376e-05, "loss": 1.3527, "step": 853 }, { "epoch": 2.27, "learning_rate": 1.4924194040856975e-05, "loss": 1.4362, "step": 854 }, { "epoch": 2.27, "learning_rate": 1.4822293016365962e-05, "loss": 1.4483, "step": 855 }, { "epoch": 2.27, "learning_rate": 1.4720680497124934e-05, "loss": 1.3976, "step": 856 }, { "epoch": 2.27, "learning_rate": 1.4619357316495352e-05, "loss": 1.4461, "step": 857 }, { "epoch": 2.28, "learning_rate": 1.4518324305465702e-05, "loss": 1.4909, "step": 858 }, { "epoch": 2.28, "learning_rate": 1.4417582292644694e-05, "loss": 1.4755, "step": 859 }, { "epoch": 2.28, "learning_rate": 1.4317132104254438e-05, "loss": 1.4927, "step": 860 }, { "epoch": 2.28, "learning_rate": 1.421697456412371e-05, "loss": 1.4471, "step": 861 }, { "epoch": 2.29, "learning_rate": 1.4117110493681124e-05, "loss": 1.4859, "step": 862 }, { "epoch": 2.29, "learning_rate": 1.401754071194849e-05, "loss": 1.471, "step": 863 }, { "epoch": 2.29, "learning_rate": 1.3918266035534027e-05, "loss": 1.3538, "step": 864 }, { "epoch": 2.29, "learning_rate": 1.3819287278625697e-05, "loss": 1.4906, "step": 865 }, { "epoch": 2.3, "learning_rate": 1.3720605252984503e-05, "loss": 1.5514, "step": 866 }, { "epoch": 2.3, "learning_rate": 1.362222076793786e-05, "loss": 1.45, "step": 867 }, { "epoch": 2.3, "learning_rate": 1.3524134630372937e-05, "loss": 1.4378, "step": 868 }, { "epoch": 2.31, "learning_rate": 1.3426347644730047e-05, "loss": 1.4655, "step": 869 }, { "epoch": 2.31, "learning_rate": 1.3328860612996053e-05, "loss": 1.4872, "step": 870 }, { "epoch": 2.31, "learning_rate": 1.3231674334697774e-05, "loss": 1.5583, "step": 871 }, { "epoch": 2.31, "learning_rate": 1.3134789606895476e-05, "loss": 1.3942, "step": 872 }, { "epoch": 2.32, "learning_rate": 1.3038207224176213e-05, "loss": 1.4931, "step": 873 }, { "epoch": 2.32, "learning_rate": 1.2941927978647528e-05, "loss": 1.4525, "step": 874 }, { "epoch": 2.32, "learning_rate": 1.2845952659930693e-05, "loss": 1.5043, "step": 875 }, { "epoch": 2.32, "learning_rate": 1.275028205515445e-05, "loss": 1.3989, "step": 876 }, { "epoch": 2.33, "learning_rate": 1.2654916948948436e-05, "loss": 1.4537, "step": 877 }, { "epoch": 2.33, "learning_rate": 1.2559858123436802e-05, "loss": 1.4277, "step": 878 }, { "epoch": 2.33, "learning_rate": 1.2465106358231753e-05, "loss": 1.441, "step": 879 }, { "epoch": 2.33, "learning_rate": 1.23706624304272e-05, "loss": 1.4923, "step": 880 }, { "epoch": 2.34, "learning_rate": 1.2276527114592367e-05, "loss": 1.4097, "step": 881 }, { "epoch": 2.34, "learning_rate": 1.2182701182765426e-05, "loss": 1.4913, "step": 882 }, { "epoch": 2.34, "learning_rate": 1.208918540444719e-05, "loss": 1.421, "step": 883 }, { "epoch": 2.34, "learning_rate": 1.1995980546594776e-05, "loss": 1.4794, "step": 884 }, { "epoch": 2.35, "learning_rate": 1.1903087373615351e-05, "loss": 1.4501, "step": 885 }, { "epoch": 2.35, "learning_rate": 1.1810506647359793e-05, "loss": 1.5201, "step": 886 }, { "epoch": 2.35, "learning_rate": 1.171823912711657e-05, "loss": 1.4111, "step": 887 }, { "epoch": 2.36, "learning_rate": 1.1626285569605344e-05, "loss": 1.4176, "step": 888 }, { "epoch": 2.36, "learning_rate": 1.153464672897091e-05, "loss": 1.5283, "step": 889 }, { "epoch": 2.36, "learning_rate": 1.144332335677694e-05, "loss": 1.4544, "step": 890 }, { "epoch": 2.36, "learning_rate": 1.1352316201999841e-05, "loss": 1.4489, "step": 891 }, { "epoch": 2.37, "learning_rate": 1.1261626011022586e-05, "loss": 1.4405, "step": 892 }, { "epoch": 2.37, "learning_rate": 1.1171253527628628e-05, "loss": 1.4862, "step": 893 }, { "epoch": 2.37, "learning_rate": 1.1081199492995781e-05, "loss": 1.4785, "step": 894 }, { "epoch": 2.37, "learning_rate": 1.0991464645690142e-05, "loss": 1.4659, "step": 895 }, { "epoch": 2.38, "learning_rate": 1.0902049721660046e-05, "loss": 1.438, "step": 896 }, { "epoch": 2.38, "learning_rate": 1.0812955454229978e-05, "loss": 1.4393, "step": 897 }, { "epoch": 2.38, "learning_rate": 1.0724182574094682e-05, "loss": 1.4228, "step": 898 }, { "epoch": 2.38, "learning_rate": 1.0635731809312993e-05, "loss": 1.4681, "step": 899 }, { "epoch": 2.39, "learning_rate": 1.0547603885302049e-05, "loss": 1.4792, "step": 900 }, { "epoch": 2.39, "learning_rate": 1.045979952483117e-05, "loss": 1.4224, "step": 901 }, { "epoch": 2.39, "learning_rate": 1.037231944801607e-05, "loss": 1.4102, "step": 902 }, { "epoch": 2.4, "learning_rate": 1.0285164372312884e-05, "loss": 1.4084, "step": 903 }, { "epoch": 2.4, "learning_rate": 1.0198335012512272e-05, "loss": 1.493, "step": 904 }, { "epoch": 2.4, "learning_rate": 1.0111832080733601e-05, "loss": 1.4612, "step": 905 }, { "epoch": 2.4, "learning_rate": 1.0025656286419078e-05, "loss": 1.4915, "step": 906 }, { "epoch": 2.41, "learning_rate": 9.939808336327921e-06, "loss": 1.4335, "step": 907 }, { "epoch": 2.41, "learning_rate": 9.854288934530604e-06, "loss": 1.4599, "step": 908 }, { "epoch": 2.41, "learning_rate": 9.769098782403041e-06, "loss": 1.3922, "step": 909 }, { "epoch": 2.41, "learning_rate": 9.684238578620814e-06, "loss": 1.4095, "step": 910 }, { "epoch": 2.42, "learning_rate": 9.599709019153568e-06, "loss": 1.4492, "step": 911 }, { "epoch": 2.42, "learning_rate": 9.515510797259102e-06, "loss": 1.4238, "step": 912 }, { "epoch": 2.42, "learning_rate": 9.431644603477907e-06, "loss": 1.4694, "step": 913 }, { "epoch": 2.42, "learning_rate": 9.34811112562728e-06, "loss": 1.5137, "step": 914 }, { "epoch": 2.43, "learning_rate": 9.264911048795893e-06, "loss": 1.3667, "step": 915 }, { "epoch": 2.43, "learning_rate": 9.182045055337995e-06, "loss": 1.4456, "step": 916 }, { "epoch": 2.43, "learning_rate": 9.099513824867939e-06, "loss": 1.4658, "step": 917 }, { "epoch": 2.44, "learning_rate": 9.017318034254546e-06, "loss": 1.4212, "step": 918 }, { "epoch": 2.44, "learning_rate": 8.935458357615584e-06, "loss": 1.4453, "step": 919 }, { "epoch": 2.44, "learning_rate": 8.853935466312225e-06, "loss": 1.4445, "step": 920 }, { "epoch": 2.44, "learning_rate": 8.772750028943527e-06, "loss": 1.3734, "step": 921 }, { "epoch": 2.45, "learning_rate": 8.691902711341e-06, "loss": 1.4893, "step": 922 }, { "epoch": 2.45, "learning_rate": 8.611394176563038e-06, "loss": 1.5218, "step": 923 }, { "epoch": 2.45, "learning_rate": 8.531225084889654e-06, "loss": 1.4519, "step": 924 }, { "epoch": 2.45, "learning_rate": 8.451396093816872e-06, "loss": 1.4522, "step": 925 }, { "epoch": 2.46, "learning_rate": 8.371907858051497e-06, "loss": 1.4729, "step": 926 }, { "epoch": 2.46, "learning_rate": 8.292761029505603e-06, "loss": 1.4275, "step": 927 }, { "epoch": 2.46, "learning_rate": 8.21395625729135e-06, "loss": 1.4604, "step": 928 }, { "epoch": 2.46, "learning_rate": 8.135494187715475e-06, "loss": 1.4039, "step": 929 }, { "epoch": 2.47, "learning_rate": 8.057375464274142e-06, "loss": 1.4912, "step": 930 }, { "epoch": 2.47, "learning_rate": 7.979600727647596e-06, "loss": 1.4187, "step": 931 }, { "epoch": 2.47, "learning_rate": 7.902170615694915e-06, "loss": 1.4225, "step": 932 }, { "epoch": 2.47, "learning_rate": 7.825085763448798e-06, "loss": 1.4011, "step": 933 }, { "epoch": 2.48, "learning_rate": 7.748346803110295e-06, "loss": 1.3841, "step": 934 }, { "epoch": 2.48, "learning_rate": 7.671954364043754e-06, "loss": 1.474, "step": 935 }, { "epoch": 2.48, "learning_rate": 7.595909072771485e-06, "loss": 1.4427, "step": 936 }, { "epoch": 2.49, "learning_rate": 7.520211552968792e-06, "loss": 1.5081, "step": 937 }, { "epoch": 2.49, "learning_rate": 7.444862425458699e-06, "loss": 1.4544, "step": 938 }, { "epoch": 2.49, "learning_rate": 7.369862308207026e-06, "loss": 1.5018, "step": 939 }, { "epoch": 2.49, "learning_rate": 7.295211816317149e-06, "loss": 1.4605, "step": 940 }, { "epoch": 2.5, "learning_rate": 7.220911562025085e-06, "loss": 1.4122, "step": 941 }, { "epoch": 2.5, "learning_rate": 7.146962154694409e-06, "loss": 1.4672, "step": 942 }, { "epoch": 2.5, "learning_rate": 7.0733642008112836e-06, "loss": 1.5257, "step": 943 }, { "epoch": 2.5, "learning_rate": 7.000118303979464e-06, "loss": 1.4355, "step": 944 }, { "epoch": 2.51, "learning_rate": 6.927225064915349e-06, "loss": 1.4799, "step": 945 }, { "epoch": 2.51, "learning_rate": 6.854685081443097e-06, "loss": 1.5002, "step": 946 }, { "epoch": 2.51, "learning_rate": 6.782498948489613e-06, "loss": 1.478, "step": 947 }, { "epoch": 2.51, "learning_rate": 6.71066725807985e-06, "loss": 1.4507, "step": 948 }, { "epoch": 2.52, "learning_rate": 6.639190599331746e-06, "loss": 1.482, "step": 949 }, { "epoch": 2.52, "learning_rate": 6.5680695584515725e-06, "loss": 1.4785, "step": 950 }, { "epoch": 2.52, "learning_rate": 6.497304718728986e-06, "loss": 1.4368, "step": 951 }, { "epoch": 2.53, "learning_rate": 6.4268966605323725e-06, "loss": 1.4422, "step": 952 }, { "epoch": 2.53, "learning_rate": 6.3568459613039536e-06, "loss": 1.4643, "step": 953 }, { "epoch": 2.53, "learning_rate": 6.287153195555174e-06, "loss": 1.4136, "step": 954 }, { "epoch": 2.53, "learning_rate": 6.217818934861896e-06, "loss": 1.4759, "step": 955 }, { "epoch": 2.54, "learning_rate": 6.148843747859778e-06, "loss": 1.5521, "step": 956 }, { "epoch": 2.54, "learning_rate": 6.080228200239585e-06, "loss": 1.4326, "step": 957 }, { "epoch": 2.54, "learning_rate": 6.011972854742503e-06, "loss": 1.4925, "step": 958 }, { "epoch": 2.54, "learning_rate": 5.94407827115564e-06, "loss": 1.5337, "step": 959 }, { "epoch": 2.55, "learning_rate": 5.876545006307288e-06, "loss": 1.4804, "step": 960 }, { "epoch": 2.55, "learning_rate": 5.809373614062508e-06, "loss": 1.4134, "step": 961 }, { "epoch": 2.55, "learning_rate": 5.742564645318432e-06, "loss": 1.4892, "step": 962 }, { "epoch": 2.55, "learning_rate": 5.6761186479999115e-06, "loss": 1.5565, "step": 963 }, { "epoch": 2.56, "learning_rate": 5.610036167054839e-06, "loss": 1.4506, "step": 964 }, { "epoch": 2.56, "learning_rate": 5.544317744449873e-06, "loss": 1.3972, "step": 965 }, { "epoch": 2.56, "learning_rate": 5.478963919165819e-06, "loss": 1.5298, "step": 966 }, { "epoch": 2.56, "learning_rate": 5.4139752271933295e-06, "loss": 1.507, "step": 967 }, { "epoch": 2.57, "learning_rate": 5.349352201528446e-06, "loss": 1.5116, "step": 968 }, { "epoch": 2.57, "learning_rate": 5.285095372168264e-06, "loss": 1.4707, "step": 969 }, { "epoch": 2.57, "learning_rate": 5.2212052661065656e-06, "loss": 1.4136, "step": 970 }, { "epoch": 2.58, "learning_rate": 5.157682407329456e-06, "loss": 1.5139, "step": 971 }, { "epoch": 2.58, "learning_rate": 5.094527316811204e-06, "loss": 1.4348, "step": 972 }, { "epoch": 2.58, "learning_rate": 5.031740512509769e-06, "loss": 1.5051, "step": 973 }, { "epoch": 2.58, "learning_rate": 4.969322509362762e-06, "loss": 1.4504, "step": 974 }, { "epoch": 2.59, "learning_rate": 4.9072738192830255e-06, "loss": 1.3567, "step": 975 }, { "epoch": 2.59, "learning_rate": 4.845594951154614e-06, "loss": 1.5088, "step": 976 }, { "epoch": 2.59, "learning_rate": 4.784286410828481e-06, "loss": 1.4605, "step": 977 }, { "epoch": 2.59, "learning_rate": 4.723348701118407e-06, "loss": 1.4053, "step": 978 }, { "epoch": 2.6, "learning_rate": 4.662782321796849e-06, "loss": 1.3966, "step": 979 }, { "epoch": 2.6, "learning_rate": 4.60258776959086e-06, "loss": 1.4311, "step": 980 }, { "epoch": 2.6, "learning_rate": 4.54276553817799e-06, "loss": 1.4258, "step": 981 }, { "epoch": 2.6, "learning_rate": 4.483316118182251e-06, "loss": 1.436, "step": 982 }, { "epoch": 2.61, "learning_rate": 4.424239997170105e-06, "loss": 1.4556, "step": 983 }, { "epoch": 2.61, "learning_rate": 4.365537659646418e-06, "loss": 1.399, "step": 984 }, { "epoch": 2.61, "learning_rate": 4.307209587050576e-06, "loss": 1.5502, "step": 985 }, { "epoch": 2.62, "learning_rate": 4.249256257752421e-06, "loss": 1.4414, "step": 986 }, { "epoch": 2.62, "learning_rate": 4.191678147048445e-06, "loss": 1.5388, "step": 987 }, { "epoch": 2.62, "learning_rate": 4.134475727157777e-06, "loss": 1.419, "step": 988 }, { "epoch": 2.62, "learning_rate": 4.077649467218436e-06, "loss": 1.471, "step": 989 }, { "epoch": 2.63, "learning_rate": 4.0211998332833514e-06, "loss": 1.4475, "step": 990 }, { "epoch": 2.63, "learning_rate": 3.965127288316634e-06, "loss": 1.4398, "step": 991 }, { "epoch": 2.63, "learning_rate": 3.9094322921897574e-06, "loss": 1.3985, "step": 992 }, { "epoch": 2.63, "learning_rate": 3.854115301677757e-06, "loss": 1.3773, "step": 993 }, { "epoch": 2.64, "learning_rate": 3.799176770455526e-06, "loss": 1.4409, "step": 994 }, { "epoch": 2.64, "learning_rate": 3.7446171490940706e-06, "loss": 1.4241, "step": 995 }, { "epoch": 2.64, "learning_rate": 3.690436885056808e-06, "loss": 1.4467, "step": 996 }, { "epoch": 2.64, "learning_rate": 3.6366364226959047e-06, "loss": 1.433, "step": 997 }, { "epoch": 2.65, "learning_rate": 3.5832162032486684e-06, "loss": 1.4797, "step": 998 }, { "epoch": 2.65, "learning_rate": 3.530176664833834e-06, "loss": 1.3771, "step": 999 }, { "epoch": 2.65, "learning_rate": 3.4775182424481135e-06, "loss": 1.4442, "step": 1000 }, { "epoch": 2.66, "learning_rate": 3.4252413679624616e-06, "loss": 1.4842, "step": 1001 }, { "epoch": 2.66, "learning_rate": 3.373346470118682e-06, "loss": 1.4969, "step": 1002 }, { "epoch": 2.66, "learning_rate": 3.321833974525812e-06, "loss": 1.4653, "step": 1003 }, { "epoch": 2.66, "learning_rate": 3.2707043036566965e-06, "loss": 1.4349, "step": 1004 }, { "epoch": 2.67, "learning_rate": 3.219957876844465e-06, "loss": 1.4189, "step": 1005 }, { "epoch": 2.67, "learning_rate": 3.16959511027915e-06, "loss": 1.4463, "step": 1006 }, { "epoch": 2.67, "learning_rate": 3.119616417004223e-06, "loss": 1.4771, "step": 1007 }, { "epoch": 2.67, "learning_rate": 3.0700222069132422e-06, "loss": 1.4248, "step": 1008 }, { "epoch": 2.68, "learning_rate": 3.020812886746477e-06, "loss": 1.4778, "step": 1009 }, { "epoch": 2.68, "learning_rate": 2.9719888600875713e-06, "loss": 1.4952, "step": 1010 }, { "epoch": 2.68, "learning_rate": 2.923550527360247e-06, "loss": 1.4907, "step": 1011 }, { "epoch": 2.68, "learning_rate": 2.875498285824979e-06, "loss": 1.4447, "step": 1012 }, { "epoch": 2.69, "learning_rate": 2.8278325295758134e-06, "loss": 1.4653, "step": 1013 }, { "epoch": 2.69, "learning_rate": 2.7805536495370375e-06, "loss": 1.4342, "step": 1014 }, { "epoch": 2.69, "learning_rate": 2.7336620334600604e-06, "loss": 1.4871, "step": 1015 }, { "epoch": 2.69, "learning_rate": 2.687158065920192e-06, "loss": 1.4212, "step": 1016 }, { "epoch": 2.7, "learning_rate": 2.6410421283134866e-06, "loss": 1.4416, "step": 1017 }, { "epoch": 2.7, "learning_rate": 2.595314598853632e-06, "loss": 1.5976, "step": 1018 }, { "epoch": 2.7, "learning_rate": 2.54997585256882e-06, "loss": 1.4335, "step": 1019 }, { "epoch": 2.71, "learning_rate": 2.5050262612987206e-06, "loss": 1.4337, "step": 1020 }, { "epoch": 2.71, "learning_rate": 2.4604661936913687e-06, "loss": 1.4384, "step": 1021 }, { "epoch": 2.71, "learning_rate": 2.416296015200198e-06, "loss": 1.4824, "step": 1022 }, { "epoch": 2.71, "learning_rate": 2.372516088081006e-06, "loss": 1.4162, "step": 1023 }, { "epoch": 2.72, "learning_rate": 2.3291267713889953e-06, "loss": 1.4193, "step": 1024 }, { "epoch": 2.72, "learning_rate": 2.286128420975836e-06, "loss": 1.442, "step": 1025 }, { "epoch": 2.72, "learning_rate": 2.2435213894867303e-06, "loss": 1.4323, "step": 1026 }, { "epoch": 2.72, "learning_rate": 2.2013060263575415e-06, "loss": 1.4348, "step": 1027 }, { "epoch": 2.73, "learning_rate": 2.159482677811919e-06, "loss": 1.4858, "step": 1028 }, { "epoch": 2.73, "learning_rate": 2.1180516868584467e-06, "loss": 1.3906, "step": 1029 }, { "epoch": 2.73, "learning_rate": 2.0770133932878412e-06, "loss": 1.374, "step": 1030 }, { "epoch": 2.73, "learning_rate": 2.0363681336701746e-06, "loss": 1.4733, "step": 1031 }, { "epoch": 2.74, "learning_rate": 1.996116241352092e-06, "loss": 1.4614, "step": 1032 }, { "epoch": 2.74, "learning_rate": 1.9562580464541014e-06, "loss": 1.5179, "step": 1033 }, { "epoch": 2.74, "learning_rate": 1.9167938758678394e-06, "loss": 1.3801, "step": 1034 }, { "epoch": 2.75, "learning_rate": 1.8777240532534212e-06, "loss": 1.4825, "step": 1035 }, { "epoch": 2.75, "learning_rate": 1.8390488990367493e-06, "loss": 1.3541, "step": 1036 }, { "epoch": 2.75, "learning_rate": 1.8007687304069375e-06, "loss": 1.4733, "step": 1037 }, { "epoch": 2.75, "learning_rate": 1.7628838613136412e-06, "loss": 1.4835, "step": 1038 }, { "epoch": 2.76, "learning_rate": 1.7253946024645473e-06, "loss": 1.4677, "step": 1039 }, { "epoch": 2.76, "learning_rate": 1.6883012613227778e-06, "loss": 1.446, "step": 1040 }, { "epoch": 2.76, "learning_rate": 1.6516041421044127e-06, "loss": 1.461, "step": 1041 }, { "epoch": 2.76, "learning_rate": 1.6153035457759536e-06, "loss": 1.4779, "step": 1042 }, { "epoch": 2.77, "learning_rate": 1.579399770051876e-06, "loss": 1.4892, "step": 1043 }, { "epoch": 2.77, "learning_rate": 1.5438931093921805e-06, "loss": 1.4253, "step": 1044 }, { "epoch": 2.77, "learning_rate": 1.5087838549999956e-06, "loss": 1.4644, "step": 1045 }, { "epoch": 2.77, "learning_rate": 1.474072294819162e-06, "loss": 1.4157, "step": 1046 }, { "epoch": 2.78, "learning_rate": 1.4397587135318857e-06, "loss": 1.4122, "step": 1047 }, { "epoch": 2.78, "learning_rate": 1.4058433925564107e-06, "loss": 1.5177, "step": 1048 }, { "epoch": 2.78, "learning_rate": 1.3723266100447053e-06, "loss": 1.4533, "step": 1049 }, { "epoch": 2.79, "learning_rate": 1.3392086408801518e-06, "loss": 1.4155, "step": 1050 }, { "epoch": 2.79, "learning_rate": 1.3064897566753442e-06, "loss": 1.4637, "step": 1051 }, { "epoch": 2.79, "learning_rate": 1.2741702257698273e-06, "loss": 1.395, "step": 1052 }, { "epoch": 2.79, "learning_rate": 1.242250313227905e-06, "loss": 1.4807, "step": 1053 }, { "epoch": 2.8, "learning_rate": 1.210730280836464e-06, "loss": 1.4223, "step": 1054 }, { "epoch": 2.8, "learning_rate": 1.1796103871028196e-06, "loss": 1.461, "step": 1055 }, { "epoch": 2.8, "learning_rate": 1.1488908872526183e-06, "loss": 1.4161, "step": 1056 }, { "epoch": 2.8, "learning_rate": 1.1185720332277162e-06, "loss": 1.4039, "step": 1057 }, { "epoch": 2.81, "learning_rate": 1.0886540736841311e-06, "loss": 1.4827, "step": 1058 }, { "epoch": 2.81, "learning_rate": 1.0591372539900058e-06, "loss": 1.4112, "step": 1059 }, { "epoch": 2.81, "learning_rate": 1.0300218162235752e-06, "loss": 1.3737, "step": 1060 }, { "epoch": 2.81, "learning_rate": 1.0013079991711972e-06, "loss": 1.4186, "step": 1061 }, { "epoch": 2.82, "learning_rate": 9.729960383254134e-07, "loss": 1.4583, "step": 1062 }, { "epoch": 2.82, "learning_rate": 9.450861658829469e-07, "loss": 1.4222, "step": 1063 }, { "epoch": 2.82, "learning_rate": 9.175786107429085e-07, "loss": 1.4342, "step": 1064 }, { "epoch": 2.82, "learning_rate": 8.90473598504804e-07, "loss": 1.4549, "step": 1065 }, { "epoch": 2.83, "learning_rate": 8.637713514667634e-07, "loss": 1.4385, "step": 1066 }, { "epoch": 2.83, "learning_rate": 8.37472088623692e-07, "loss": 1.447, "step": 1067 }, { "epoch": 2.83, "learning_rate": 8.115760256654669e-07, "loss": 1.5006, "step": 1068 }, { "epoch": 2.84, "learning_rate": 7.860833749751773e-07, "loss": 1.4861, "step": 1069 }, { "epoch": 2.84, "learning_rate": 7.60994345627386e-07, "loss": 1.4061, "step": 1070 }, { "epoch": 2.84, "learning_rate": 7.363091433864044e-07, "loss": 1.4612, "step": 1071 }, { "epoch": 2.84, "learning_rate": 7.120279707046096e-07, "loss": 1.432, "step": 1072 }, { "epoch": 2.85, "learning_rate": 6.881510267207846e-07, "loss": 1.4702, "step": 1073 }, { "epoch": 2.85, "learning_rate": 6.646785072584872e-07, "loss": 1.5084, "step": 1074 }, { "epoch": 2.85, "learning_rate": 6.416106048244386e-07, "loss": 1.4661, "step": 1075 }, { "epoch": 2.85, "learning_rate": 6.189475086069485e-07, "loss": 1.3731, "step": 1076 }, { "epoch": 2.86, "learning_rate": 5.966894044743709e-07, "loss": 1.455, "step": 1077 }, { "epoch": 2.86, "learning_rate": 5.748364749735613e-07, "loss": 1.4169, "step": 1078 }, { "epoch": 2.86, "learning_rate": 5.533888993283831e-07, "loss": 1.3907, "step": 1079 }, { "epoch": 2.86, "learning_rate": 5.323468534382703e-07, "loss": 1.4668, "step": 1080 }, { "epoch": 2.87, "learning_rate": 5.117105098767283e-07, "loss": 1.4628, "step": 1081 }, { "epoch": 2.87, "learning_rate": 4.914800378899687e-07, "loss": 1.3697, "step": 1082 }, { "epoch": 2.87, "learning_rate": 4.7165560339549886e-07, "loss": 1.5115, "step": 1083 }, { "epoch": 2.88, "learning_rate": 4.522373689807624e-07, "loss": 1.4415, "step": 1084 }, { "epoch": 2.88, "learning_rate": 4.33225493901801e-07, "loss": 1.4368, "step": 1085 }, { "epoch": 2.88, "learning_rate": 4.1462013408196664e-07, "loss": 1.4338, "step": 1086 }, { "epoch": 2.88, "learning_rate": 3.9642144211061714e-07, "loss": 1.3896, "step": 1087 }, { "epoch": 2.89, "learning_rate": 3.7862956724190045e-07, "loss": 1.4796, "step": 1088 }, { "epoch": 2.89, "learning_rate": 3.612446553934723e-07, "loss": 1.4081, "step": 1089 }, { "epoch": 2.89, "learning_rate": 3.4426684914538045e-07, "loss": 1.3993, "step": 1090 }, { "epoch": 2.89, "learning_rate": 3.276962877388157e-07, "loss": 1.4188, "step": 1091 }, { "epoch": 2.9, "learning_rate": 3.115331070750127e-07, "loss": 1.4747, "step": 1092 }, { "epoch": 2.9, "learning_rate": 2.957774397141455e-07, "loss": 1.4073, "step": 1093 }, { "epoch": 2.9, "learning_rate": 2.8042941487419483e-07, "loss": 1.4816, "step": 1094 }, { "epoch": 2.9, "learning_rate": 2.6548915842993793e-07, "loss": 1.4085, "step": 1095 }, { "epoch": 2.91, "learning_rate": 2.5095679291188833e-07, "loss": 1.3901, "step": 1096 }, { "epoch": 2.91, "learning_rate": 2.368324375052855e-07, "loss": 1.4146, "step": 1097 }, { "epoch": 2.91, "learning_rate": 2.2311620804914002e-07, "loss": 1.4465, "step": 1098 }, { "epoch": 2.92, "learning_rate": 2.0980821703527886e-07, "loss": 1.4298, "step": 1099 }, { "epoch": 2.92, "learning_rate": 1.9690857360739612e-07, "loss": 1.5303, "step": 1100 }, { "epoch": 2.92, "learning_rate": 1.8441738356019256e-07, "loss": 1.5039, "step": 1101 }, { "epoch": 2.92, "learning_rate": 1.7233474933849303e-07, "loss": 1.4276, "step": 1102 }, { "epoch": 2.93, "learning_rate": 1.6066077003639714e-07, "loss": 1.4591, "step": 1103 }, { "epoch": 2.93, "learning_rate": 1.4939554139648537e-07, "loss": 1.4353, "step": 1104 }, { "epoch": 2.93, "learning_rate": 1.3853915580901988e-07, "loss": 1.3791, "step": 1105 }, { "epoch": 2.93, "learning_rate": 1.2809170231118938e-07, "loss": 1.4862, "step": 1106 }, { "epoch": 2.94, "learning_rate": 1.1805326658639316e-07, "loss": 1.3861, "step": 1107 }, { "epoch": 2.94, "learning_rate": 1.0842393096350823e-07, "loss": 1.4563, "step": 1108 }, { "epoch": 2.94, "learning_rate": 9.920377441623996e-08, "loss": 1.4102, "step": 1109 }, { "epoch": 2.94, "learning_rate": 9.039287256247253e-08, "loss": 1.4573, "step": 1110 }, { "epoch": 2.95, "learning_rate": 8.199129766363056e-08, "loss": 1.4369, "step": 1111 }, { "epoch": 2.95, "learning_rate": 7.399911862410735e-08, "loss": 1.5305, "step": 1112 }, { "epoch": 2.95, "learning_rate": 6.641640099068758e-08, "loss": 1.4532, "step": 1113 }, { "epoch": 2.95, "learning_rate": 5.9243206952019904e-08, "loss": 1.473, "step": 1114 }, { "epoch": 2.96, "learning_rate": 5.247959533808966e-08, "loss": 1.3916, "step": 1115 }, { "epoch": 2.96, "learning_rate": 4.612562161974698e-08, "loss": 1.3834, "step": 1116 }, { "epoch": 2.96, "learning_rate": 4.018133790826273e-08, "loss": 1.4524, "step": 1117 }, { "epoch": 2.97, "learning_rate": 3.464679295487328e-08, "loss": 1.428, "step": 1118 }, { "epoch": 2.97, "learning_rate": 2.952203215041971e-08, "loss": 1.4697, "step": 1119 }, { "epoch": 2.97, "learning_rate": 2.480709752493704e-08, "loss": 1.3832, "step": 1120 }, { "epoch": 2.97, "learning_rate": 2.050202774732668e-08, "loss": 1.5032, "step": 1121 }, { "epoch": 2.98, "learning_rate": 1.6606858125040038e-08, "loss": 1.4712, "step": 1122 }, { "epoch": 2.98, "learning_rate": 1.3121620603795404e-08, "loss": 1.4285, "step": 1123 }, { "epoch": 2.98, "learning_rate": 1.0046343767294852e-08, "loss": 1.4409, "step": 1124 }, { "epoch": 2.98, "learning_rate": 7.381052837013291e-09, "loss": 1.4037, "step": 1125 }, { "epoch": 2.99, "learning_rate": 5.125769671976421e-09, "loss": 1.4707, "step": 1126 }, { "epoch": 2.99, "learning_rate": 3.2805127685886504e-09, "loss": 1.4793, "step": 1127 }, { "epoch": 2.99, "learning_rate": 1.845297260472112e-09, "loss": 1.4402, "step": 1128 }, { "epoch": 2.99, "learning_rate": 8.201349183611928e-10, "loss": 1.4417, "step": 1129 }, { "epoch": 3.0, "learning_rate": 2.0503414998040982e-10, "loss": 1.4528, "step": 1130 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 1.4566, "step": 1131 }, { "epoch": 3.0, "step": 1131, "total_flos": 2.4511080029906534e+17, "train_loss": 1.623349694105295, "train_runtime": 8800.864, "train_samples_per_second": 16.443, "train_steps_per_second": 0.129 } ], "max_steps": 1131, "num_train_epochs": 3, "total_flos": 2.4511080029906534e+17, "trial_name": null, "trial_params": null }