{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009009009009009009, "grad_norm": 34.85325259337485, "learning_rate": 0.0, "loss": 4.425543785095215, "step": 1 }, { "epoch": 0.0018018018018018018, "grad_norm": 41.49064024559189, "learning_rate": 3.003003003003003e-08, "loss": 3.991971731185913, "step": 2 }, { "epoch": 0.002702702702702703, "grad_norm": 33.404734390062586, "learning_rate": 6.006006006006006e-08, "loss": 3.3135690689086914, "step": 3 }, { "epoch": 0.0036036036036036037, "grad_norm": 31.301531140485675, "learning_rate": 9.00900900900901e-08, "loss": 3.384368419647217, "step": 4 }, { "epoch": 0.0045045045045045045, "grad_norm": 37.16273793726143, "learning_rate": 1.2012012012012013e-07, "loss": 4.710058212280273, "step": 5 }, { "epoch": 0.005405405405405406, "grad_norm": 41.04288621830464, "learning_rate": 1.5015015015015016e-07, "loss": 4.387257099151611, "step": 6 }, { "epoch": 0.006306306306306306, "grad_norm": 38.30018644703421, "learning_rate": 1.801801801801802e-07, "loss": 3.776132583618164, "step": 7 }, { "epoch": 0.007207207207207207, "grad_norm": 33.68929257403917, "learning_rate": 2.1021021021021025e-07, "loss": 3.8114027976989746, "step": 8 }, { "epoch": 0.008108108108108109, "grad_norm": 36.25630264757824, "learning_rate": 2.4024024024024026e-07, "loss": 4.490676403045654, "step": 9 }, { "epoch": 0.009009009009009009, "grad_norm": 32.57110428951466, "learning_rate": 2.702702702702703e-07, "loss": 3.3798646926879883, "step": 10 }, { "epoch": 0.00990990990990991, "grad_norm": 43.18767365474111, "learning_rate": 3.003003003003003e-07, "loss": 4.468938827514648, "step": 11 }, { "epoch": 0.010810810810810811, "grad_norm": 35.94354919359712, "learning_rate": 3.3033033033033036e-07, "loss": 4.262247562408447, "step": 12 }, { "epoch": 0.011711711711711712, "grad_norm": 32.245799614821415, "learning_rate": 3.603603603603604e-07, "loss": 3.8364014625549316, "step": 13 }, { "epoch": 0.012612612612612612, "grad_norm": 66.58620222655038, "learning_rate": 3.903903903903904e-07, "loss": 3.873539924621582, "step": 14 }, { "epoch": 0.013513513513513514, "grad_norm": 37.19568116981887, "learning_rate": 4.204204204204205e-07, "loss": 4.2311906814575195, "step": 15 }, { "epoch": 0.014414414414414415, "grad_norm": 43.73716775935688, "learning_rate": 4.504504504504505e-07, "loss": 4.178625106811523, "step": 16 }, { "epoch": 0.015315315315315315, "grad_norm": 36.675332239183064, "learning_rate": 4.804804804804805e-07, "loss": 3.9813666343688965, "step": 17 }, { "epoch": 0.016216216216216217, "grad_norm": 32.97482553933009, "learning_rate": 5.105105105105106e-07, "loss": 4.1125054359436035, "step": 18 }, { "epoch": 0.017117117117117116, "grad_norm": 32.436790195213625, "learning_rate": 5.405405405405406e-07, "loss": 4.283469200134277, "step": 19 }, { "epoch": 0.018018018018018018, "grad_norm": 40.09255214449685, "learning_rate": 5.705705705705706e-07, "loss": 3.9226396083831787, "step": 20 }, { "epoch": 0.01891891891891892, "grad_norm": 32.93413919366413, "learning_rate": 6.006006006006006e-07, "loss": 3.7610692977905273, "step": 21 }, { "epoch": 0.01981981981981982, "grad_norm": 34.359848332773865, "learning_rate": 6.306306306306306e-07, "loss": 4.637616157531738, "step": 22 }, { "epoch": 0.02072072072072072, "grad_norm": 34.849607178010444, "learning_rate": 6.606606606606607e-07, "loss": 4.552515029907227, "step": 23 }, { "epoch": 0.021621621621621623, "grad_norm": 29.055408730985356, "learning_rate": 6.906906906906907e-07, "loss": 3.446662425994873, "step": 24 }, { "epoch": 0.02252252252252252, "grad_norm": 34.963765245521756, "learning_rate": 7.207207207207208e-07, "loss": 3.9494271278381348, "step": 25 }, { "epoch": 0.023423423423423424, "grad_norm": 25.42005234312663, "learning_rate": 7.507507507507509e-07, "loss": 4.037813186645508, "step": 26 }, { "epoch": 0.024324324324324326, "grad_norm": 30.34189312777092, "learning_rate": 7.807807807807808e-07, "loss": 4.118837356567383, "step": 27 }, { "epoch": 0.025225225225225224, "grad_norm": 27.915079938660245, "learning_rate": 8.108108108108109e-07, "loss": 3.494497537612915, "step": 28 }, { "epoch": 0.026126126126126126, "grad_norm": 23.500481293588344, "learning_rate": 8.40840840840841e-07, "loss": 3.9746358394622803, "step": 29 }, { "epoch": 0.02702702702702703, "grad_norm": 23.091569375485957, "learning_rate": 8.708708708708709e-07, "loss": 3.6937849521636963, "step": 30 }, { "epoch": 0.027927927927927927, "grad_norm": 22.312260784488128, "learning_rate": 9.00900900900901e-07, "loss": 3.7990822792053223, "step": 31 }, { "epoch": 0.02882882882882883, "grad_norm": 18.55715539198881, "learning_rate": 9.30930930930931e-07, "loss": 3.547938823699951, "step": 32 }, { "epoch": 0.02972972972972973, "grad_norm": 18.684298354977482, "learning_rate": 9.60960960960961e-07, "loss": 3.9032235145568848, "step": 33 }, { "epoch": 0.03063063063063063, "grad_norm": 18.16824230200826, "learning_rate": 9.909909909909911e-07, "loss": 3.70247483253479, "step": 34 }, { "epoch": 0.03153153153153153, "grad_norm": 22.98521490510086, "learning_rate": 1.0210210210210212e-06, "loss": 3.7687439918518066, "step": 35 }, { "epoch": 0.032432432432432434, "grad_norm": 17.91413496069695, "learning_rate": 1.051051051051051e-06, "loss": 4.087740421295166, "step": 36 }, { "epoch": 0.03333333333333333, "grad_norm": 13.573837184927248, "learning_rate": 1.0810810810810812e-06, "loss": 3.230196952819824, "step": 37 }, { "epoch": 0.03423423423423423, "grad_norm": 14.997176911133973, "learning_rate": 1.111111111111111e-06, "loss": 3.3304688930511475, "step": 38 }, { "epoch": 0.03513513513513514, "grad_norm": 13.83464233131776, "learning_rate": 1.1411411411411411e-06, "loss": 3.6968789100646973, "step": 39 }, { "epoch": 0.036036036036036036, "grad_norm": 17.303369460859294, "learning_rate": 1.1711711711711712e-06, "loss": 4.100399971008301, "step": 40 }, { "epoch": 0.036936936936936934, "grad_norm": 13.424397850983052, "learning_rate": 1.2012012012012013e-06, "loss": 3.976811408996582, "step": 41 }, { "epoch": 0.03783783783783784, "grad_norm": 22.736171499764858, "learning_rate": 1.2312312312312314e-06, "loss": 3.5594120025634766, "step": 42 }, { "epoch": 0.03873873873873874, "grad_norm": 15.927485843698738, "learning_rate": 1.2612612612612613e-06, "loss": 3.7348382472991943, "step": 43 }, { "epoch": 0.03963963963963964, "grad_norm": 22.403551454918947, "learning_rate": 1.2912912912912913e-06, "loss": 3.6704657077789307, "step": 44 }, { "epoch": 0.04054054054054054, "grad_norm": 12.316481332596359, "learning_rate": 1.3213213213213214e-06, "loss": 3.402862071990967, "step": 45 }, { "epoch": 0.04144144144144144, "grad_norm": 19.038151364071236, "learning_rate": 1.3513513513513515e-06, "loss": 3.531310558319092, "step": 46 }, { "epoch": 0.04234234234234234, "grad_norm": 25.790143129271133, "learning_rate": 1.3813813813813814e-06, "loss": 3.7117223739624023, "step": 47 }, { "epoch": 0.043243243243243246, "grad_norm": 13.170821661869951, "learning_rate": 1.4114114114114117e-06, "loss": 3.381789445877075, "step": 48 }, { "epoch": 0.044144144144144144, "grad_norm": 13.885610227951034, "learning_rate": 1.4414414414414416e-06, "loss": 3.204735040664673, "step": 49 }, { "epoch": 0.04504504504504504, "grad_norm": 31.267336865046115, "learning_rate": 1.4714714714714714e-06, "loss": 4.8206329345703125, "step": 50 }, { "epoch": 0.04594594594594595, "grad_norm": 15.877465954218119, "learning_rate": 1.5015015015015017e-06, "loss": 2.7905378341674805, "step": 51 }, { "epoch": 0.04684684684684685, "grad_norm": 16.5338114322775, "learning_rate": 1.5315315315315316e-06, "loss": 3.382842779159546, "step": 52 }, { "epoch": 0.047747747747747746, "grad_norm": 10.673008991068937, "learning_rate": 1.5615615615615617e-06, "loss": 2.8675003051757812, "step": 53 }, { "epoch": 0.04864864864864865, "grad_norm": 11.673726166435003, "learning_rate": 1.5915915915915916e-06, "loss": 3.3539891242980957, "step": 54 }, { "epoch": 0.04954954954954955, "grad_norm": 39.391244386486385, "learning_rate": 1.6216216216216219e-06, "loss": 3.5359396934509277, "step": 55 }, { "epoch": 0.05045045045045045, "grad_norm": 10.618909178532993, "learning_rate": 1.6516516516516517e-06, "loss": 2.644918203353882, "step": 56 }, { "epoch": 0.051351351351351354, "grad_norm": 13.068552382087416, "learning_rate": 1.681681681681682e-06, "loss": 3.2725865840911865, "step": 57 }, { "epoch": 0.05225225225225225, "grad_norm": 10.546533163309212, "learning_rate": 1.711711711711712e-06, "loss": 2.2234914302825928, "step": 58 }, { "epoch": 0.05315315315315315, "grad_norm": 10.810516847147817, "learning_rate": 1.7417417417417418e-06, "loss": 3.6590347290039062, "step": 59 }, { "epoch": 0.05405405405405406, "grad_norm": 12.179326338196333, "learning_rate": 1.7717717717717719e-06, "loss": 3.5190892219543457, "step": 60 }, { "epoch": 0.054954954954954956, "grad_norm": 12.55991185112027, "learning_rate": 1.801801801801802e-06, "loss": 3.250088691711426, "step": 61 }, { "epoch": 0.055855855855855854, "grad_norm": 16.16156820096353, "learning_rate": 1.831831831831832e-06, "loss": 3.724785804748535, "step": 62 }, { "epoch": 0.05675675675675676, "grad_norm": 8.10725681546285, "learning_rate": 1.861861861861862e-06, "loss": 3.1755051612854004, "step": 63 }, { "epoch": 0.05765765765765766, "grad_norm": 10.026358518437815, "learning_rate": 1.8918918918918922e-06, "loss": 3.3634328842163086, "step": 64 }, { "epoch": 0.05855855855855856, "grad_norm": 11.749792278768123, "learning_rate": 1.921921921921922e-06, "loss": 3.3400256633758545, "step": 65 }, { "epoch": 0.05945945945945946, "grad_norm": 20.134196874596437, "learning_rate": 1.951951951951952e-06, "loss": 3.6804957389831543, "step": 66 }, { "epoch": 0.06036036036036036, "grad_norm": 9.269868267334294, "learning_rate": 1.9819819819819822e-06, "loss": 3.1135306358337402, "step": 67 }, { "epoch": 0.06126126126126126, "grad_norm": 7.429607972998909, "learning_rate": 2.012012012012012e-06, "loss": 3.148690700531006, "step": 68 }, { "epoch": 0.062162162162162166, "grad_norm": 19.62825204948393, "learning_rate": 2.0420420420420424e-06, "loss": 3.147704601287842, "step": 69 }, { "epoch": 0.06306306306306306, "grad_norm": 16.19832242781432, "learning_rate": 2.0720720720720723e-06, "loss": 2.7632508277893066, "step": 70 }, { "epoch": 0.06396396396396396, "grad_norm": 13.199429470705226, "learning_rate": 2.102102102102102e-06, "loss": 3.867487907409668, "step": 71 }, { "epoch": 0.06486486486486487, "grad_norm": 13.39571462897039, "learning_rate": 2.1321321321321325e-06, "loss": 3.193864345550537, "step": 72 }, { "epoch": 0.06576576576576576, "grad_norm": 12.419152003530359, "learning_rate": 2.1621621621621623e-06, "loss": 3.754601001739502, "step": 73 }, { "epoch": 0.06666666666666667, "grad_norm": 11.957234193555557, "learning_rate": 2.192192192192192e-06, "loss": 3.5533010959625244, "step": 74 }, { "epoch": 0.06756756756756757, "grad_norm": 9.1627396522264, "learning_rate": 2.222222222222222e-06, "loss": 3.5297141075134277, "step": 75 }, { "epoch": 0.06846846846846846, "grad_norm": 8.288605162110578, "learning_rate": 2.2522522522522524e-06, "loss": 3.120265483856201, "step": 76 }, { "epoch": 0.06936936936936937, "grad_norm": 23.854354911904238, "learning_rate": 2.2822822822822822e-06, "loss": 3.385438919067383, "step": 77 }, { "epoch": 0.07027027027027027, "grad_norm": 11.830332846970656, "learning_rate": 2.3123123123123125e-06, "loss": 2.939337730407715, "step": 78 }, { "epoch": 0.07117117117117117, "grad_norm": 12.413943458644699, "learning_rate": 2.3423423423423424e-06, "loss": 3.3958535194396973, "step": 79 }, { "epoch": 0.07207207207207207, "grad_norm": 8.897920164810204, "learning_rate": 2.3723723723723727e-06, "loss": 2.494640588760376, "step": 80 }, { "epoch": 0.07297297297297298, "grad_norm": 12.477196615096766, "learning_rate": 2.4024024024024026e-06, "loss": 3.469362258911133, "step": 81 }, { "epoch": 0.07387387387387387, "grad_norm": 14.471330569401308, "learning_rate": 2.432432432432433e-06, "loss": 3.149597406387329, "step": 82 }, { "epoch": 0.07477477477477477, "grad_norm": 11.472229000158672, "learning_rate": 2.4624624624624628e-06, "loss": 3.11580228805542, "step": 83 }, { "epoch": 0.07567567567567568, "grad_norm": 21.257089734183978, "learning_rate": 2.4924924924924926e-06, "loss": 4.018277168273926, "step": 84 }, { "epoch": 0.07657657657657657, "grad_norm": 11.037133037583047, "learning_rate": 2.5225225225225225e-06, "loss": 3.0106778144836426, "step": 85 }, { "epoch": 0.07747747747747748, "grad_norm": 11.701268717670866, "learning_rate": 2.552552552552553e-06, "loss": 2.9505202770233154, "step": 86 }, { "epoch": 0.07837837837837838, "grad_norm": 13.206351745245932, "learning_rate": 2.5825825825825827e-06, "loss": 2.924464702606201, "step": 87 }, { "epoch": 0.07927927927927927, "grad_norm": 13.158912537609984, "learning_rate": 2.612612612612613e-06, "loss": 2.4891014099121094, "step": 88 }, { "epoch": 0.08018018018018018, "grad_norm": 13.2529767687829, "learning_rate": 2.642642642642643e-06, "loss": 3.5574870109558105, "step": 89 }, { "epoch": 0.08108108108108109, "grad_norm": 12.879307015129848, "learning_rate": 2.672672672672673e-06, "loss": 3.6157870292663574, "step": 90 }, { "epoch": 0.08198198198198198, "grad_norm": 7.643527481100194, "learning_rate": 2.702702702702703e-06, "loss": 3.330392360687256, "step": 91 }, { "epoch": 0.08288288288288288, "grad_norm": 19.597955391317893, "learning_rate": 2.732732732732733e-06, "loss": 3.15685772895813, "step": 92 }, { "epoch": 0.08378378378378379, "grad_norm": 12.343179251305617, "learning_rate": 2.7627627627627628e-06, "loss": 2.9495744705200195, "step": 93 }, { "epoch": 0.08468468468468468, "grad_norm": 10.291955830387405, "learning_rate": 2.7927927927927926e-06, "loss": 3.066584825515747, "step": 94 }, { "epoch": 0.08558558558558559, "grad_norm": 12.014607383069372, "learning_rate": 2.8228228228228234e-06, "loss": 3.218724250793457, "step": 95 }, { "epoch": 0.08648648648648649, "grad_norm": 10.294642412471823, "learning_rate": 2.8528528528528532e-06, "loss": 2.9423789978027344, "step": 96 }, { "epoch": 0.08738738738738738, "grad_norm": 16.625230696179347, "learning_rate": 2.882882882882883e-06, "loss": 3.0145962238311768, "step": 97 }, { "epoch": 0.08828828828828829, "grad_norm": 13.380056321484588, "learning_rate": 2.912912912912913e-06, "loss": 3.7270960807800293, "step": 98 }, { "epoch": 0.0891891891891892, "grad_norm": 6.099162968769779, "learning_rate": 2.942942942942943e-06, "loss": 2.628577709197998, "step": 99 }, { "epoch": 0.09009009009009009, "grad_norm": 15.60668918864251, "learning_rate": 2.9729729729729736e-06, "loss": 3.4101738929748535, "step": 100 }, { "epoch": 0.09099099099099099, "grad_norm": 13.899724524081691, "learning_rate": 3.0030030030030034e-06, "loss": 2.78098201751709, "step": 101 }, { "epoch": 0.0918918918918919, "grad_norm": 9.955567061468747, "learning_rate": 3.0330330330330333e-06, "loss": 3.241696834564209, "step": 102 }, { "epoch": 0.09279279279279279, "grad_norm": 13.804683010117481, "learning_rate": 3.063063063063063e-06, "loss": 2.9352574348449707, "step": 103 }, { "epoch": 0.0936936936936937, "grad_norm": 22.137954195453922, "learning_rate": 3.0930930930930935e-06, "loss": 3.422001838684082, "step": 104 }, { "epoch": 0.0945945945945946, "grad_norm": 13.171905875372056, "learning_rate": 3.1231231231231234e-06, "loss": 2.9945342540740967, "step": 105 }, { "epoch": 0.09549549549549549, "grad_norm": 12.787823101874702, "learning_rate": 3.1531531531531532e-06, "loss": 3.4306979179382324, "step": 106 }, { "epoch": 0.0963963963963964, "grad_norm": 8.719903774005465, "learning_rate": 3.183183183183183e-06, "loss": 3.015371799468994, "step": 107 }, { "epoch": 0.0972972972972973, "grad_norm": 8.620974890669817, "learning_rate": 3.2132132132132134e-06, "loss": 2.8143250942230225, "step": 108 }, { "epoch": 0.0981981981981982, "grad_norm": 8.71040069570512, "learning_rate": 3.2432432432432437e-06, "loss": 3.1959874629974365, "step": 109 }, { "epoch": 0.0990990990990991, "grad_norm": 14.16224898204618, "learning_rate": 3.2732732732732736e-06, "loss": 3.4496989250183105, "step": 110 }, { "epoch": 0.1, "grad_norm": 12.935856548843718, "learning_rate": 3.3033033033033035e-06, "loss": 3.289576768875122, "step": 111 }, { "epoch": 0.1009009009009009, "grad_norm": 13.810744939226538, "learning_rate": 3.3333333333333333e-06, "loss": 3.128309965133667, "step": 112 }, { "epoch": 0.1018018018018018, "grad_norm": 9.221082446996164, "learning_rate": 3.363363363363364e-06, "loss": 2.374311923980713, "step": 113 }, { "epoch": 0.10270270270270271, "grad_norm": 10.86065731811132, "learning_rate": 3.393393393393394e-06, "loss": 3.3044633865356445, "step": 114 }, { "epoch": 0.1036036036036036, "grad_norm": 14.862448426052204, "learning_rate": 3.423423423423424e-06, "loss": 3.6216392517089844, "step": 115 }, { "epoch": 0.1045045045045045, "grad_norm": 8.179463296485958, "learning_rate": 3.4534534534534537e-06, "loss": 2.308753728866577, "step": 116 }, { "epoch": 0.10540540540540541, "grad_norm": 10.032980511614772, "learning_rate": 3.4834834834834835e-06, "loss": 2.679088592529297, "step": 117 }, { "epoch": 0.1063063063063063, "grad_norm": 10.103046641973926, "learning_rate": 3.513513513513514e-06, "loss": 3.0781192779541016, "step": 118 }, { "epoch": 0.10720720720720721, "grad_norm": 11.12850524281396, "learning_rate": 3.5435435435435437e-06, "loss": 3.381030797958374, "step": 119 }, { "epoch": 0.10810810810810811, "grad_norm": 15.983530195410662, "learning_rate": 3.573573573573574e-06, "loss": 3.3236331939697266, "step": 120 }, { "epoch": 0.109009009009009, "grad_norm": 12.254133581497408, "learning_rate": 3.603603603603604e-06, "loss": 3.121483564376831, "step": 121 }, { "epoch": 0.10990990990990991, "grad_norm": 16.128153623744875, "learning_rate": 3.633633633633634e-06, "loss": 2.8502209186553955, "step": 122 }, { "epoch": 0.11081081081081082, "grad_norm": 11.018669577448238, "learning_rate": 3.663663663663664e-06, "loss": 3.017697334289551, "step": 123 }, { "epoch": 0.11171171171171171, "grad_norm": 16.557843608556773, "learning_rate": 3.693693693693694e-06, "loss": 3.119530439376831, "step": 124 }, { "epoch": 0.11261261261261261, "grad_norm": 20.247862827736306, "learning_rate": 3.723723723723724e-06, "loss": 3.4490034580230713, "step": 125 }, { "epoch": 0.11351351351351352, "grad_norm": 11.866991364178078, "learning_rate": 3.7537537537537537e-06, "loss": 2.71677565574646, "step": 126 }, { "epoch": 0.11441441441441441, "grad_norm": 13.654608561170818, "learning_rate": 3.7837837837837844e-06, "loss": 3.1837260723114014, "step": 127 }, { "epoch": 0.11531531531531532, "grad_norm": 7.888677445935044, "learning_rate": 3.8138138138138143e-06, "loss": 2.5487513542175293, "step": 128 }, { "epoch": 0.11621621621621622, "grad_norm": 10.107606792947632, "learning_rate": 3.843843843843844e-06, "loss": 3.1575422286987305, "step": 129 }, { "epoch": 0.11711711711711711, "grad_norm": 22.246801953565882, "learning_rate": 3.8738738738738744e-06, "loss": 2.3211145401000977, "step": 130 }, { "epoch": 0.11801801801801802, "grad_norm": 10.113692255019279, "learning_rate": 3.903903903903904e-06, "loss": 2.823888063430786, "step": 131 }, { "epoch": 0.11891891891891893, "grad_norm": 15.495899836725995, "learning_rate": 3.933933933933934e-06, "loss": 2.629729747772217, "step": 132 }, { "epoch": 0.11981981981981982, "grad_norm": 14.895112568402705, "learning_rate": 3.9639639639639645e-06, "loss": 3.0849714279174805, "step": 133 }, { "epoch": 0.12072072072072072, "grad_norm": 8.701812516734513, "learning_rate": 3.993993993993994e-06, "loss": 3.189228057861328, "step": 134 }, { "epoch": 0.12162162162162163, "grad_norm": 17.51105059025981, "learning_rate": 4.024024024024024e-06, "loss": 3.5474631786346436, "step": 135 }, { "epoch": 0.12252252252252252, "grad_norm": 14.232975595458623, "learning_rate": 4.0540540540540545e-06, "loss": 2.9190244674682617, "step": 136 }, { "epoch": 0.12342342342342343, "grad_norm": 11.321214340628257, "learning_rate": 4.084084084084085e-06, "loss": 2.755706787109375, "step": 137 }, { "epoch": 0.12432432432432433, "grad_norm": 10.713962083603612, "learning_rate": 4.114114114114114e-06, "loss": 2.820930004119873, "step": 138 }, { "epoch": 0.12522522522522522, "grad_norm": 20.831702291364305, "learning_rate": 4.1441441441441446e-06, "loss": 3.517151355743408, "step": 139 }, { "epoch": 0.12612612612612611, "grad_norm": 17.371083213632783, "learning_rate": 4.174174174174174e-06, "loss": 3.032097578048706, "step": 140 }, { "epoch": 0.12702702702702703, "grad_norm": 11.438025273609616, "learning_rate": 4.204204204204204e-06, "loss": 2.9336793422698975, "step": 141 }, { "epoch": 0.12792792792792793, "grad_norm": 15.581995340545928, "learning_rate": 4.234234234234235e-06, "loss": 3.2077693939208984, "step": 142 }, { "epoch": 0.12882882882882882, "grad_norm": 9.595652945396042, "learning_rate": 4.264264264264265e-06, "loss": 2.4009509086608887, "step": 143 }, { "epoch": 0.12972972972972974, "grad_norm": 12.73963901565, "learning_rate": 4.294294294294294e-06, "loss": 3.3759877681732178, "step": 144 }, { "epoch": 0.13063063063063063, "grad_norm": 8.56503387459497, "learning_rate": 4.324324324324325e-06, "loss": 2.4320008754730225, "step": 145 }, { "epoch": 0.13153153153153152, "grad_norm": 13.57118369124668, "learning_rate": 4.354354354354355e-06, "loss": 3.0062925815582275, "step": 146 }, { "epoch": 0.13243243243243244, "grad_norm": 12.135823373871956, "learning_rate": 4.384384384384384e-06, "loss": 3.525376319885254, "step": 147 }, { "epoch": 0.13333333333333333, "grad_norm": 13.861297251288068, "learning_rate": 4.414414414414415e-06, "loss": 3.0181641578674316, "step": 148 }, { "epoch": 0.13423423423423422, "grad_norm": 20.567236637049717, "learning_rate": 4.444444444444444e-06, "loss": 2.6696677207946777, "step": 149 }, { "epoch": 0.13513513513513514, "grad_norm": 11.490193587124834, "learning_rate": 4.474474474474475e-06, "loss": 3.0199146270751953, "step": 150 }, { "epoch": 0.13603603603603603, "grad_norm": 7.573235089594336, "learning_rate": 4.504504504504505e-06, "loss": 3.1178090572357178, "step": 151 }, { "epoch": 0.13693693693693693, "grad_norm": 7.5936293707160045, "learning_rate": 4.534534534534535e-06, "loss": 3.1521480083465576, "step": 152 }, { "epoch": 0.13783783783783785, "grad_norm": 13.939422708113943, "learning_rate": 4.5645645645645645e-06, "loss": 2.7968058586120605, "step": 153 }, { "epoch": 0.13873873873873874, "grad_norm": 12.151567813914179, "learning_rate": 4.594594594594596e-06, "loss": 2.4209094047546387, "step": 154 }, { "epoch": 0.13963963963963963, "grad_norm": 8.324430236189901, "learning_rate": 4.624624624624625e-06, "loss": 2.684305191040039, "step": 155 }, { "epoch": 0.14054054054054055, "grad_norm": 7.014662285936094, "learning_rate": 4.654654654654655e-06, "loss": 2.868997097015381, "step": 156 }, { "epoch": 0.14144144144144144, "grad_norm": 11.634045875682444, "learning_rate": 4.684684684684685e-06, "loss": 3.4053492546081543, "step": 157 }, { "epoch": 0.14234234234234233, "grad_norm": 17.085383352071734, "learning_rate": 4.714714714714715e-06, "loss": 2.8980045318603516, "step": 158 }, { "epoch": 0.14324324324324325, "grad_norm": 9.406901847325535, "learning_rate": 4.7447447447447454e-06, "loss": 3.1916074752807617, "step": 159 }, { "epoch": 0.14414414414414414, "grad_norm": 14.368442790302291, "learning_rate": 4.774774774774775e-06, "loss": 3.0805845260620117, "step": 160 }, { "epoch": 0.14504504504504503, "grad_norm": 8.668855178764803, "learning_rate": 4.804804804804805e-06, "loss": 3.4004087448120117, "step": 161 }, { "epoch": 0.14594594594594595, "grad_norm": 13.043494784682139, "learning_rate": 4.8348348348348355e-06, "loss": 3.1242763996124268, "step": 162 }, { "epoch": 0.14684684684684685, "grad_norm": 10.178653496544205, "learning_rate": 4.864864864864866e-06, "loss": 2.9134225845336914, "step": 163 }, { "epoch": 0.14774774774774774, "grad_norm": 6.342120643856489, "learning_rate": 4.894894894894895e-06, "loss": 3.055790662765503, "step": 164 }, { "epoch": 0.14864864864864866, "grad_norm": 12.129340765287669, "learning_rate": 4.9249249249249255e-06, "loss": 2.9754109382629395, "step": 165 }, { "epoch": 0.14954954954954955, "grad_norm": 22.73014606592528, "learning_rate": 4.954954954954955e-06, "loss": 3.805633068084717, "step": 166 }, { "epoch": 0.15045045045045044, "grad_norm": 9.118927644346341, "learning_rate": 4.984984984984985e-06, "loss": 2.2361717224121094, "step": 167 }, { "epoch": 0.15135135135135136, "grad_norm": 15.801499172740884, "learning_rate": 5.0150150150150156e-06, "loss": 2.229874610900879, "step": 168 }, { "epoch": 0.15225225225225225, "grad_norm": 11.189094205311491, "learning_rate": 5.045045045045045e-06, "loss": 3.1760482788085938, "step": 169 }, { "epoch": 0.15315315315315314, "grad_norm": 13.188780981566062, "learning_rate": 5.075075075075075e-06, "loss": 2.8448102474212646, "step": 170 }, { "epoch": 0.15405405405405406, "grad_norm": 7.481996418572799, "learning_rate": 5.105105105105106e-06, "loss": 2.7560243606567383, "step": 171 }, { "epoch": 0.15495495495495495, "grad_norm": 21.437670538552602, "learning_rate": 5.135135135135135e-06, "loss": 2.64109468460083, "step": 172 }, { "epoch": 0.15585585585585585, "grad_norm": 19.055452737668237, "learning_rate": 5.165165165165165e-06, "loss": 3.4684371948242188, "step": 173 }, { "epoch": 0.15675675675675677, "grad_norm": 8.827278141715077, "learning_rate": 5.195195195195195e-06, "loss": 2.9278182983398438, "step": 174 }, { "epoch": 0.15765765765765766, "grad_norm": 11.375037513358095, "learning_rate": 5.225225225225226e-06, "loss": 2.990676164627075, "step": 175 }, { "epoch": 0.15855855855855855, "grad_norm": 9.420956828643389, "learning_rate": 5.255255255255256e-06, "loss": 2.0962235927581787, "step": 176 }, { "epoch": 0.15945945945945947, "grad_norm": 8.729852916984361, "learning_rate": 5.285285285285286e-06, "loss": 2.8978538513183594, "step": 177 }, { "epoch": 0.16036036036036036, "grad_norm": 14.666603722531908, "learning_rate": 5.315315315315316e-06, "loss": 3.186276435852051, "step": 178 }, { "epoch": 0.16126126126126125, "grad_norm": 6.984437023251985, "learning_rate": 5.345345345345346e-06, "loss": 3.3925909996032715, "step": 179 }, { "epoch": 0.16216216216216217, "grad_norm": 10.905901577860856, "learning_rate": 5.375375375375376e-06, "loss": 2.7547173500061035, "step": 180 }, { "epoch": 0.16306306306306306, "grad_norm": 11.292530700947578, "learning_rate": 5.405405405405406e-06, "loss": 3.129422664642334, "step": 181 }, { "epoch": 0.16396396396396395, "grad_norm": 9.809577115554335, "learning_rate": 5.4354354354354355e-06, "loss": 3.009068489074707, "step": 182 }, { "epoch": 0.16486486486486487, "grad_norm": 7.844265809521192, "learning_rate": 5.465465465465466e-06, "loss": 3.0007176399230957, "step": 183 }, { "epoch": 0.16576576576576577, "grad_norm": 20.260864872887325, "learning_rate": 5.495495495495496e-06, "loss": 2.944118022918701, "step": 184 }, { "epoch": 0.16666666666666666, "grad_norm": 13.536886226756584, "learning_rate": 5.5255255255255255e-06, "loss": 3.304983615875244, "step": 185 }, { "epoch": 0.16756756756756758, "grad_norm": 24.2324837682505, "learning_rate": 5.555555555555557e-06, "loss": 2.6285665035247803, "step": 186 }, { "epoch": 0.16846846846846847, "grad_norm": 20.428575340848823, "learning_rate": 5.585585585585585e-06, "loss": 3.5311875343322754, "step": 187 }, { "epoch": 0.16936936936936936, "grad_norm": 8.628152741585867, "learning_rate": 5.615615615615616e-06, "loss": 2.199131965637207, "step": 188 }, { "epoch": 0.17027027027027028, "grad_norm": 9.494224411854544, "learning_rate": 5.645645645645647e-06, "loss": 3.9044899940490723, "step": 189 }, { "epoch": 0.17117117117117117, "grad_norm": 10.448079373150058, "learning_rate": 5.675675675675676e-06, "loss": 2.909975528717041, "step": 190 }, { "epoch": 0.17207207207207206, "grad_norm": 14.728245606199566, "learning_rate": 5.7057057057057065e-06, "loss": 2.8709239959716797, "step": 191 }, { "epoch": 0.17297297297297298, "grad_norm": 14.629062202703784, "learning_rate": 5.735735735735736e-06, "loss": 2.678546190261841, "step": 192 }, { "epoch": 0.17387387387387387, "grad_norm": 18.935077283233756, "learning_rate": 5.765765765765766e-06, "loss": 3.197597026824951, "step": 193 }, { "epoch": 0.17477477477477477, "grad_norm": 17.667465284169104, "learning_rate": 5.7957957957957965e-06, "loss": 2.366365909576416, "step": 194 }, { "epoch": 0.17567567567567569, "grad_norm": 17.576062581395792, "learning_rate": 5.825825825825826e-06, "loss": 2.9731223583221436, "step": 195 }, { "epoch": 0.17657657657657658, "grad_norm": 11.088082103017733, "learning_rate": 5.855855855855856e-06, "loss": 2.6038565635681152, "step": 196 }, { "epoch": 0.17747747747747747, "grad_norm": 7.860840897402412, "learning_rate": 5.885885885885886e-06, "loss": 2.7844109535217285, "step": 197 }, { "epoch": 0.1783783783783784, "grad_norm": 9.110969545022156, "learning_rate": 5.915915915915916e-06, "loss": 2.762868642807007, "step": 198 }, { "epoch": 0.17927927927927928, "grad_norm": 18.196233055366637, "learning_rate": 5.945945945945947e-06, "loss": 3.618750810623169, "step": 199 }, { "epoch": 0.18018018018018017, "grad_norm": 11.116332907963825, "learning_rate": 5.975975975975976e-06, "loss": 3.071817398071289, "step": 200 }, { "epoch": 0.1810810810810811, "grad_norm": 13.460412830835155, "learning_rate": 6.006006006006007e-06, "loss": 2.9415009021759033, "step": 201 }, { "epoch": 0.18198198198198198, "grad_norm": 12.230300033958319, "learning_rate": 6.036036036036037e-06, "loss": 2.904818534851074, "step": 202 }, { "epoch": 0.18288288288288287, "grad_norm": 14.440108065299071, "learning_rate": 6.066066066066067e-06, "loss": 3.1651129722595215, "step": 203 }, { "epoch": 0.1837837837837838, "grad_norm": 9.206420867526731, "learning_rate": 6.096096096096097e-06, "loss": 2.672524929046631, "step": 204 }, { "epoch": 0.18468468468468469, "grad_norm": 8.45012457455799, "learning_rate": 6.126126126126126e-06, "loss": 2.4215810298919678, "step": 205 }, { "epoch": 0.18558558558558558, "grad_norm": 13.722693744341788, "learning_rate": 6.156156156156157e-06, "loss": 3.09840726852417, "step": 206 }, { "epoch": 0.1864864864864865, "grad_norm": 11.63142498028212, "learning_rate": 6.186186186186187e-06, "loss": 3.1762261390686035, "step": 207 }, { "epoch": 0.1873873873873874, "grad_norm": 10.926226524721915, "learning_rate": 6.2162162162162164e-06, "loss": 3.021667242050171, "step": 208 }, { "epoch": 0.18828828828828828, "grad_norm": 21.89247663108361, "learning_rate": 6.246246246246247e-06, "loss": 3.9234015941619873, "step": 209 }, { "epoch": 0.1891891891891892, "grad_norm": 8.168441579655488, "learning_rate": 6.276276276276276e-06, "loss": 2.78212833404541, "step": 210 }, { "epoch": 0.1900900900900901, "grad_norm": 13.350560462462122, "learning_rate": 6.3063063063063065e-06, "loss": 3.166926145553589, "step": 211 }, { "epoch": 0.19099099099099098, "grad_norm": 9.207901682088472, "learning_rate": 6.336336336336338e-06, "loss": 3.0401434898376465, "step": 212 }, { "epoch": 0.1918918918918919, "grad_norm": 17.33282946556218, "learning_rate": 6.366366366366366e-06, "loss": 3.134824514389038, "step": 213 }, { "epoch": 0.1927927927927928, "grad_norm": 14.61459028506562, "learning_rate": 6.396396396396397e-06, "loss": 3.093085527420044, "step": 214 }, { "epoch": 0.19369369369369369, "grad_norm": 28.18324111564279, "learning_rate": 6.426426426426427e-06, "loss": 2.892199993133545, "step": 215 }, { "epoch": 0.1945945945945946, "grad_norm": 11.472836781772417, "learning_rate": 6.456456456456457e-06, "loss": 2.6956958770751953, "step": 216 }, { "epoch": 0.1954954954954955, "grad_norm": 9.299172880498764, "learning_rate": 6.486486486486487e-06, "loss": 3.0531160831451416, "step": 217 }, { "epoch": 0.1963963963963964, "grad_norm": 9.086493974514497, "learning_rate": 6.516516516516517e-06, "loss": 3.7178831100463867, "step": 218 }, { "epoch": 0.1972972972972973, "grad_norm": 17.851046987553843, "learning_rate": 6.546546546546547e-06, "loss": 2.9834225177764893, "step": 219 }, { "epoch": 0.1981981981981982, "grad_norm": 15.359167786383372, "learning_rate": 6.5765765765765775e-06, "loss": 2.7804837226867676, "step": 220 }, { "epoch": 0.1990990990990991, "grad_norm": 9.470229603191179, "learning_rate": 6.606606606606607e-06, "loss": 2.727168083190918, "step": 221 }, { "epoch": 0.2, "grad_norm": 8.050118994726809, "learning_rate": 6.636636636636637e-06, "loss": 3.027698278427124, "step": 222 }, { "epoch": 0.2009009009009009, "grad_norm": 9.701821344008287, "learning_rate": 6.666666666666667e-06, "loss": 3.0183022022247314, "step": 223 }, { "epoch": 0.2018018018018018, "grad_norm": 13.605979491933239, "learning_rate": 6.696696696696697e-06, "loss": 2.932844877243042, "step": 224 }, { "epoch": 0.20270270270270271, "grad_norm": 15.56590784626651, "learning_rate": 6.726726726726728e-06, "loss": 2.8303544521331787, "step": 225 }, { "epoch": 0.2036036036036036, "grad_norm": 6.809800820822605, "learning_rate": 6.7567567567567575e-06, "loss": 2.799440860748291, "step": 226 }, { "epoch": 0.2045045045045045, "grad_norm": 12.594818924759007, "learning_rate": 6.786786786786788e-06, "loss": 2.9847114086151123, "step": 227 }, { "epoch": 0.20540540540540542, "grad_norm": 9.128656924708121, "learning_rate": 6.816816816816817e-06, "loss": 3.242035388946533, "step": 228 }, { "epoch": 0.2063063063063063, "grad_norm": 14.073242699932655, "learning_rate": 6.846846846846848e-06, "loss": 2.743699550628662, "step": 229 }, { "epoch": 0.2072072072072072, "grad_norm": 24.485071339847323, "learning_rate": 6.876876876876878e-06, "loss": 2.5092387199401855, "step": 230 }, { "epoch": 0.20810810810810812, "grad_norm": 14.048562783521968, "learning_rate": 6.906906906906907e-06, "loss": 3.34120774269104, "step": 231 }, { "epoch": 0.209009009009009, "grad_norm": 14.000061322325454, "learning_rate": 6.936936936936938e-06, "loss": 2.793375015258789, "step": 232 }, { "epoch": 0.2099099099099099, "grad_norm": 17.43089543234574, "learning_rate": 6.966966966966967e-06, "loss": 3.4899823665618896, "step": 233 }, { "epoch": 0.21081081081081082, "grad_norm": 11.566273600684262, "learning_rate": 6.996996996996997e-06, "loss": 2.355807065963745, "step": 234 }, { "epoch": 0.21171171171171171, "grad_norm": 7.409607190052066, "learning_rate": 7.027027027027028e-06, "loss": 2.8509583473205566, "step": 235 }, { "epoch": 0.2126126126126126, "grad_norm": 8.533556541298525, "learning_rate": 7.057057057057057e-06, "loss": 2.945148229598999, "step": 236 }, { "epoch": 0.21351351351351353, "grad_norm": 11.872695772014, "learning_rate": 7.087087087087087e-06, "loss": 2.959815502166748, "step": 237 }, { "epoch": 0.21441441441441442, "grad_norm": 8.96114701476462, "learning_rate": 7.117117117117117e-06, "loss": 2.9691104888916016, "step": 238 }, { "epoch": 0.2153153153153153, "grad_norm": 16.361730848996025, "learning_rate": 7.147147147147148e-06, "loss": 3.477224111557007, "step": 239 }, { "epoch": 0.21621621621621623, "grad_norm": 10.277008215567028, "learning_rate": 7.177177177177178e-06, "loss": 2.737469434738159, "step": 240 }, { "epoch": 0.21711711711711712, "grad_norm": 9.913033317953237, "learning_rate": 7.207207207207208e-06, "loss": 3.0314159393310547, "step": 241 }, { "epoch": 0.218018018018018, "grad_norm": 11.01638420875838, "learning_rate": 7.237237237237238e-06, "loss": 2.6725852489471436, "step": 242 }, { "epoch": 0.21891891891891893, "grad_norm": 16.416028513538844, "learning_rate": 7.267267267267268e-06, "loss": 3.2464849948883057, "step": 243 }, { "epoch": 0.21981981981981982, "grad_norm": 10.231502040024115, "learning_rate": 7.297297297297298e-06, "loss": 2.8645577430725098, "step": 244 }, { "epoch": 0.22072072072072071, "grad_norm": 11.10775422396275, "learning_rate": 7.327327327327328e-06, "loss": 3.2867000102996826, "step": 245 }, { "epoch": 0.22162162162162163, "grad_norm": 11.317639611848502, "learning_rate": 7.3573573573573575e-06, "loss": 2.9636595249176025, "step": 246 }, { "epoch": 0.22252252252252253, "grad_norm": 8.974086359979514, "learning_rate": 7.387387387387388e-06, "loss": 2.8476545810699463, "step": 247 }, { "epoch": 0.22342342342342342, "grad_norm": 21.21552137551918, "learning_rate": 7.417417417417418e-06, "loss": 1.956771731376648, "step": 248 }, { "epoch": 0.22432432432432434, "grad_norm": 23.44471075215663, "learning_rate": 7.447447447447448e-06, "loss": 3.0224685668945312, "step": 249 }, { "epoch": 0.22522522522522523, "grad_norm": 10.507608150607815, "learning_rate": 7.477477477477479e-06, "loss": 2.8447585105895996, "step": 250 }, { "epoch": 0.22612612612612612, "grad_norm": 8.406551181013327, "learning_rate": 7.507507507507507e-06, "loss": 3.091822624206543, "step": 251 }, { "epoch": 0.22702702702702704, "grad_norm": 15.439471955279746, "learning_rate": 7.5375375375375385e-06, "loss": 2.596545696258545, "step": 252 }, { "epoch": 0.22792792792792793, "grad_norm": 32.28371488979198, "learning_rate": 7.567567567567569e-06, "loss": 2.685854434967041, "step": 253 }, { "epoch": 0.22882882882882882, "grad_norm": 8.850471884422483, "learning_rate": 7.597597597597598e-06, "loss": 2.3421695232391357, "step": 254 }, { "epoch": 0.22972972972972974, "grad_norm": 11.577982513842546, "learning_rate": 7.6276276276276285e-06, "loss": 2.624809741973877, "step": 255 }, { "epoch": 0.23063063063063063, "grad_norm": 11.15519392482013, "learning_rate": 7.657657657657658e-06, "loss": 3.2881715297698975, "step": 256 }, { "epoch": 0.23153153153153153, "grad_norm": 8.044318807407215, "learning_rate": 7.687687687687688e-06, "loss": 3.0412468910217285, "step": 257 }, { "epoch": 0.23243243243243245, "grad_norm": 9.730521210200404, "learning_rate": 7.717717717717719e-06, "loss": 2.589629650115967, "step": 258 }, { "epoch": 0.23333333333333334, "grad_norm": 10.635194903021821, "learning_rate": 7.747747747747749e-06, "loss": 2.9791548252105713, "step": 259 }, { "epoch": 0.23423423423423423, "grad_norm": 16.18630342454132, "learning_rate": 7.77777777777778e-06, "loss": 2.757498025894165, "step": 260 }, { "epoch": 0.23513513513513515, "grad_norm": 14.686981686819545, "learning_rate": 7.807807807807808e-06, "loss": 3.445099115371704, "step": 261 }, { "epoch": 0.23603603603603604, "grad_norm": 12.096856501251967, "learning_rate": 7.837837837837838e-06, "loss": 2.058835029602051, "step": 262 }, { "epoch": 0.23693693693693693, "grad_norm": 13.50183691044128, "learning_rate": 7.867867867867868e-06, "loss": 2.9921302795410156, "step": 263 }, { "epoch": 0.23783783783783785, "grad_norm": 7.67222528315355, "learning_rate": 7.897897897897899e-06, "loss": 2.6035046577453613, "step": 264 }, { "epoch": 0.23873873873873874, "grad_norm": 14.639662719034007, "learning_rate": 7.927927927927929e-06, "loss": 2.464315414428711, "step": 265 }, { "epoch": 0.23963963963963963, "grad_norm": 14.682415018493252, "learning_rate": 7.95795795795796e-06, "loss": 2.6213533878326416, "step": 266 }, { "epoch": 0.24054054054054055, "grad_norm": 11.196079589876906, "learning_rate": 7.987987987987988e-06, "loss": 3.0618228912353516, "step": 267 }, { "epoch": 0.24144144144144145, "grad_norm": 11.835065916234305, "learning_rate": 8.018018018018018e-06, "loss": 2.7994627952575684, "step": 268 }, { "epoch": 0.24234234234234234, "grad_norm": 8.459248820549243, "learning_rate": 8.048048048048048e-06, "loss": 2.644664764404297, "step": 269 }, { "epoch": 0.24324324324324326, "grad_norm": 56.00595141573437, "learning_rate": 8.078078078078079e-06, "loss": 3.402913808822632, "step": 270 }, { "epoch": 0.24414414414414415, "grad_norm": 12.388902204738192, "learning_rate": 8.108108108108109e-06, "loss": 3.0297579765319824, "step": 271 }, { "epoch": 0.24504504504504504, "grad_norm": 17.5746724225366, "learning_rate": 8.13813813813814e-06, "loss": 3.002163887023926, "step": 272 }, { "epoch": 0.24594594594594596, "grad_norm": 12.411588847757736, "learning_rate": 8.16816816816817e-06, "loss": 2.925816774368286, "step": 273 }, { "epoch": 0.24684684684684685, "grad_norm": 10.7631886033697, "learning_rate": 8.198198198198198e-06, "loss": 3.161365509033203, "step": 274 }, { "epoch": 0.24774774774774774, "grad_norm": 19.59729985285342, "learning_rate": 8.228228228228229e-06, "loss": 3.6167702674865723, "step": 275 }, { "epoch": 0.24864864864864866, "grad_norm": 8.56052894464566, "learning_rate": 8.258258258258259e-06, "loss": 3.3377461433410645, "step": 276 }, { "epoch": 0.24954954954954955, "grad_norm": 12.5188207748136, "learning_rate": 8.288288288288289e-06, "loss": 1.945539116859436, "step": 277 }, { "epoch": 0.25045045045045045, "grad_norm": 10.559017592650845, "learning_rate": 8.31831831831832e-06, "loss": 3.231947898864746, "step": 278 }, { "epoch": 0.25135135135135134, "grad_norm": 11.363006986404962, "learning_rate": 8.348348348348348e-06, "loss": 2.5976619720458984, "step": 279 }, { "epoch": 0.25225225225225223, "grad_norm": 9.456506874294009, "learning_rate": 8.378378378378378e-06, "loss": 3.0002479553222656, "step": 280 }, { "epoch": 0.2531531531531532, "grad_norm": 7.233256385798434, "learning_rate": 8.408408408408409e-06, "loss": 2.9631714820861816, "step": 281 }, { "epoch": 0.25405405405405407, "grad_norm": 12.740994539211504, "learning_rate": 8.438438438438439e-06, "loss": 3.342411518096924, "step": 282 }, { "epoch": 0.25495495495495496, "grad_norm": 16.975791181138625, "learning_rate": 8.46846846846847e-06, "loss": 3.3739967346191406, "step": 283 }, { "epoch": 0.25585585585585585, "grad_norm": 8.000096311194891, "learning_rate": 8.4984984984985e-06, "loss": 3.274104118347168, "step": 284 }, { "epoch": 0.25675675675675674, "grad_norm": 7.915871738254484, "learning_rate": 8.52852852852853e-06, "loss": 2.7849321365356445, "step": 285 }, { "epoch": 0.25765765765765763, "grad_norm": 14.470173491798448, "learning_rate": 8.55855855855856e-06, "loss": 2.959172248840332, "step": 286 }, { "epoch": 0.2585585585585586, "grad_norm": 13.094243355506956, "learning_rate": 8.588588588588589e-06, "loss": 2.1489405632019043, "step": 287 }, { "epoch": 0.2594594594594595, "grad_norm": 10.28241813049188, "learning_rate": 8.618618618618619e-06, "loss": 2.9859347343444824, "step": 288 }, { "epoch": 0.26036036036036037, "grad_norm": 21.002045335848507, "learning_rate": 8.64864864864865e-06, "loss": 2.8425345420837402, "step": 289 }, { "epoch": 0.26126126126126126, "grad_norm": 6.556019554933329, "learning_rate": 8.67867867867868e-06, "loss": 2.2806668281555176, "step": 290 }, { "epoch": 0.26216216216216215, "grad_norm": 10.248207037492623, "learning_rate": 8.70870870870871e-06, "loss": 2.6596124172210693, "step": 291 }, { "epoch": 0.26306306306306304, "grad_norm": 14.058370075599388, "learning_rate": 8.738738738738739e-06, "loss": 3.0518083572387695, "step": 292 }, { "epoch": 0.263963963963964, "grad_norm": 14.981022948019561, "learning_rate": 8.768768768768769e-06, "loss": 2.9386940002441406, "step": 293 }, { "epoch": 0.2648648648648649, "grad_norm": 9.126114352264482, "learning_rate": 8.798798798798799e-06, "loss": 2.7580277919769287, "step": 294 }, { "epoch": 0.26576576576576577, "grad_norm": 24.283955472813055, "learning_rate": 8.82882882882883e-06, "loss": 3.1863598823547363, "step": 295 }, { "epoch": 0.26666666666666666, "grad_norm": 13.512487486565014, "learning_rate": 8.85885885885886e-06, "loss": 2.6051082611083984, "step": 296 }, { "epoch": 0.26756756756756755, "grad_norm": 8.476380947832192, "learning_rate": 8.888888888888888e-06, "loss": 2.938279867172241, "step": 297 }, { "epoch": 0.26846846846846845, "grad_norm": 8.989581462618583, "learning_rate": 8.91891891891892e-06, "loss": 2.6073546409606934, "step": 298 }, { "epoch": 0.2693693693693694, "grad_norm": 6.791346567014697, "learning_rate": 8.94894894894895e-06, "loss": 2.4681522846221924, "step": 299 }, { "epoch": 0.2702702702702703, "grad_norm": 7.570697729070026, "learning_rate": 8.97897897897898e-06, "loss": 2.572007656097412, "step": 300 }, { "epoch": 0.2711711711711712, "grad_norm": 12.873385521201621, "learning_rate": 9.00900900900901e-06, "loss": 3.200517177581787, "step": 301 }, { "epoch": 0.27207207207207207, "grad_norm": 9.919996447413737, "learning_rate": 9.03903903903904e-06, "loss": 3.6645684242248535, "step": 302 }, { "epoch": 0.27297297297297296, "grad_norm": 20.811230325398483, "learning_rate": 9.06906906906907e-06, "loss": 3.234799385070801, "step": 303 }, { "epoch": 0.27387387387387385, "grad_norm": 11.509767162102126, "learning_rate": 9.0990990990991e-06, "loss": 2.761482000350952, "step": 304 }, { "epoch": 0.2747747747747748, "grad_norm": 14.033212157225655, "learning_rate": 9.129129129129129e-06, "loss": 2.790647268295288, "step": 305 }, { "epoch": 0.2756756756756757, "grad_norm": 7.497673527220171, "learning_rate": 9.15915915915916e-06, "loss": 2.823512554168701, "step": 306 }, { "epoch": 0.2765765765765766, "grad_norm": 8.811148929818787, "learning_rate": 9.189189189189191e-06, "loss": 2.7691304683685303, "step": 307 }, { "epoch": 0.2774774774774775, "grad_norm": 14.432072629353575, "learning_rate": 9.21921921921922e-06, "loss": 2.9669785499572754, "step": 308 }, { "epoch": 0.27837837837837837, "grad_norm": 11.676462619066314, "learning_rate": 9.24924924924925e-06, "loss": 2.898813247680664, "step": 309 }, { "epoch": 0.27927927927927926, "grad_norm": 11.938478478590634, "learning_rate": 9.27927927927928e-06, "loss": 2.7341861724853516, "step": 310 }, { "epoch": 0.2801801801801802, "grad_norm": 7.771546309843594, "learning_rate": 9.30930930930931e-06, "loss": 3.296088218688965, "step": 311 }, { "epoch": 0.2810810810810811, "grad_norm": 15.101892303227435, "learning_rate": 9.339339339339341e-06, "loss": 2.7351455688476562, "step": 312 }, { "epoch": 0.281981981981982, "grad_norm": 15.354103308230707, "learning_rate": 9.36936936936937e-06, "loss": 2.7281081676483154, "step": 313 }, { "epoch": 0.2828828828828829, "grad_norm": 10.32489615719569, "learning_rate": 9.3993993993994e-06, "loss": 3.0136618614196777, "step": 314 }, { "epoch": 0.28378378378378377, "grad_norm": 10.964373335445094, "learning_rate": 9.42942942942943e-06, "loss": 2.7031970024108887, "step": 315 }, { "epoch": 0.28468468468468466, "grad_norm": 13.771411818974284, "learning_rate": 9.45945945945946e-06, "loss": 3.1682801246643066, "step": 316 }, { "epoch": 0.2855855855855856, "grad_norm": 17.41638771477687, "learning_rate": 9.489489489489491e-06, "loss": 2.969046115875244, "step": 317 }, { "epoch": 0.2864864864864865, "grad_norm": 8.761151924702018, "learning_rate": 9.51951951951952e-06, "loss": 2.702993392944336, "step": 318 }, { "epoch": 0.2873873873873874, "grad_norm": 19.846208771744315, "learning_rate": 9.54954954954955e-06, "loss": 3.4983010292053223, "step": 319 }, { "epoch": 0.2882882882882883, "grad_norm": 10.962444668108311, "learning_rate": 9.57957957957958e-06, "loss": 2.782130718231201, "step": 320 }, { "epoch": 0.2891891891891892, "grad_norm": 8.817707648077757, "learning_rate": 9.60960960960961e-06, "loss": 2.8512470722198486, "step": 321 }, { "epoch": 0.29009009009009007, "grad_norm": 25.911846990827954, "learning_rate": 9.63963963963964e-06, "loss": 2.665433645248413, "step": 322 }, { "epoch": 0.290990990990991, "grad_norm": 8.934711465258212, "learning_rate": 9.669669669669671e-06, "loss": 3.3899407386779785, "step": 323 }, { "epoch": 0.2918918918918919, "grad_norm": 19.379009729221924, "learning_rate": 9.699699699699701e-06, "loss": 2.7415874004364014, "step": 324 }, { "epoch": 0.2927927927927928, "grad_norm": 7.27095865249855, "learning_rate": 9.729729729729732e-06, "loss": 2.793801784515381, "step": 325 }, { "epoch": 0.2936936936936937, "grad_norm": 10.159554727073187, "learning_rate": 9.75975975975976e-06, "loss": 3.259864091873169, "step": 326 }, { "epoch": 0.2945945945945946, "grad_norm": 10.694615892664855, "learning_rate": 9.78978978978979e-06, "loss": 2.6160967350006104, "step": 327 }, { "epoch": 0.2954954954954955, "grad_norm": 9.5064054134046, "learning_rate": 9.81981981981982e-06, "loss": 2.9735093116760254, "step": 328 }, { "epoch": 0.2963963963963964, "grad_norm": 9.9763523327945, "learning_rate": 9.849849849849851e-06, "loss": 2.9225847721099854, "step": 329 }, { "epoch": 0.2972972972972973, "grad_norm": 15.841275634630133, "learning_rate": 9.879879879879881e-06, "loss": 3.438632011413574, "step": 330 }, { "epoch": 0.2981981981981982, "grad_norm": 13.39081074070422, "learning_rate": 9.90990990990991e-06, "loss": 2.254317283630371, "step": 331 }, { "epoch": 0.2990990990990991, "grad_norm": 6.984182721517865, "learning_rate": 9.93993993993994e-06, "loss": 3.190903425216675, "step": 332 }, { "epoch": 0.3, "grad_norm": 9.309658146682933, "learning_rate": 9.96996996996997e-06, "loss": 3.059541702270508, "step": 333 }, { "epoch": 0.3009009009009009, "grad_norm": 14.937607378881662, "learning_rate": 1e-05, "loss": 2.9330413341522217, "step": 334 }, { "epoch": 0.30180180180180183, "grad_norm": 13.51602566312119, "learning_rate": 9.999997252952125e-06, "loss": 2.683208703994751, "step": 335 }, { "epoch": 0.3027027027027027, "grad_norm": 12.800960609903019, "learning_rate": 9.999989011811516e-06, "loss": 2.7787554264068604, "step": 336 }, { "epoch": 0.3036036036036036, "grad_norm": 13.71715277590532, "learning_rate": 9.99997527658723e-06, "loss": 2.9442594051361084, "step": 337 }, { "epoch": 0.3045045045045045, "grad_norm": 8.545557973152443, "learning_rate": 9.99995604729436e-06, "loss": 2.9743549823760986, "step": 338 }, { "epoch": 0.3054054054054054, "grad_norm": 21.6858655013613, "learning_rate": 9.999931323954033e-06, "loss": 3.2620232105255127, "step": 339 }, { "epoch": 0.3063063063063063, "grad_norm": 8.534962992180814, "learning_rate": 9.999901106593418e-06, "loss": 3.381075620651245, "step": 340 }, { "epoch": 0.30720720720720723, "grad_norm": 11.249096787047085, "learning_rate": 9.999865395245715e-06, "loss": 2.0019235610961914, "step": 341 }, { "epoch": 0.3081081081081081, "grad_norm": 28.871822700019855, "learning_rate": 9.999824189950168e-06, "loss": 3.112780809402466, "step": 342 }, { "epoch": 0.309009009009009, "grad_norm": 12.415582532638952, "learning_rate": 9.999777490752056e-06, "loss": 1.8625688552856445, "step": 343 }, { "epoch": 0.3099099099099099, "grad_norm": 11.271771743558347, "learning_rate": 9.999725297702687e-06, "loss": 3.349148750305176, "step": 344 }, { "epoch": 0.3108108108108108, "grad_norm": 23.20299513286698, "learning_rate": 9.999667610859416e-06, "loss": 3.0870251655578613, "step": 345 }, { "epoch": 0.3117117117117117, "grad_norm": 6.300127629414208, "learning_rate": 9.999604430285628e-06, "loss": 2.8080344200134277, "step": 346 }, { "epoch": 0.31261261261261264, "grad_norm": 14.291296171659212, "learning_rate": 9.999535756050749e-06, "loss": 2.659015655517578, "step": 347 }, { "epoch": 0.31351351351351353, "grad_norm": 9.428101003216518, "learning_rate": 9.999461588230238e-06, "loss": 2.754297971725464, "step": 348 }, { "epoch": 0.3144144144144144, "grad_norm": 17.324448052496745, "learning_rate": 9.999381926905592e-06, "loss": 2.883801221847534, "step": 349 }, { "epoch": 0.3153153153153153, "grad_norm": 44.62966117466432, "learning_rate": 9.999296772164347e-06, "loss": 2.813298225402832, "step": 350 }, { "epoch": 0.3162162162162162, "grad_norm": 8.03713677990249, "learning_rate": 9.99920612410007e-06, "loss": 2.9977688789367676, "step": 351 }, { "epoch": 0.3171171171171171, "grad_norm": 12.171635869334452, "learning_rate": 9.999109982812368e-06, "loss": 2.778214693069458, "step": 352 }, { "epoch": 0.31801801801801804, "grad_norm": 8.896688386600355, "learning_rate": 9.99900834840688e-06, "loss": 2.320204496383667, "step": 353 }, { "epoch": 0.31891891891891894, "grad_norm": 31.70762213957997, "learning_rate": 9.998901220995288e-06, "loss": 2.4865245819091797, "step": 354 }, { "epoch": 0.31981981981981983, "grad_norm": 11.559063762294295, "learning_rate": 9.998788600695304e-06, "loss": 2.8191728591918945, "step": 355 }, { "epoch": 0.3207207207207207, "grad_norm": 8.430927558151229, "learning_rate": 9.998670487630677e-06, "loss": 3.1708900928497314, "step": 356 }, { "epoch": 0.3216216216216216, "grad_norm": 9.369043149871663, "learning_rate": 9.998546881931193e-06, "loss": 3.329425811767578, "step": 357 }, { "epoch": 0.3225225225225225, "grad_norm": 18.13050608374191, "learning_rate": 9.99841778373267e-06, "loss": 2.7754788398742676, "step": 358 }, { "epoch": 0.32342342342342345, "grad_norm": 9.164569001044327, "learning_rate": 9.998283193176965e-06, "loss": 2.8335437774658203, "step": 359 }, { "epoch": 0.32432432432432434, "grad_norm": 14.19126210540288, "learning_rate": 9.99814311041197e-06, "loss": 2.9259860515594482, "step": 360 }, { "epoch": 0.32522522522522523, "grad_norm": 14.102586629742616, "learning_rate": 9.99799753559161e-06, "loss": 2.768043041229248, "step": 361 }, { "epoch": 0.3261261261261261, "grad_norm": 13.742512069399446, "learning_rate": 9.997846468875842e-06, "loss": 2.8981881141662598, "step": 362 }, { "epoch": 0.327027027027027, "grad_norm": 9.548610700595049, "learning_rate": 9.997689910430665e-06, "loss": 3.3076324462890625, "step": 363 }, { "epoch": 0.3279279279279279, "grad_norm": 13.513857277910285, "learning_rate": 9.997527860428108e-06, "loss": 3.220381736755371, "step": 364 }, { "epoch": 0.32882882882882886, "grad_norm": 14.10797346437793, "learning_rate": 9.997360319046234e-06, "loss": 3.3665030002593994, "step": 365 }, { "epoch": 0.32972972972972975, "grad_norm": 26.072002812869076, "learning_rate": 9.997187286469139e-06, "loss": 3.1026194095611572, "step": 366 }, { "epoch": 0.33063063063063064, "grad_norm": 18.001123799541062, "learning_rate": 9.997008762886957e-06, "loss": 3.223040819168091, "step": 367 }, { "epoch": 0.33153153153153153, "grad_norm": 22.442381380443265, "learning_rate": 9.996824748495852e-06, "loss": 3.1964216232299805, "step": 368 }, { "epoch": 0.3324324324324324, "grad_norm": 11.751647444389079, "learning_rate": 9.996635243498023e-06, "loss": 2.9581570625305176, "step": 369 }, { "epoch": 0.3333333333333333, "grad_norm": 8.490228785197973, "learning_rate": 9.9964402481017e-06, "loss": 2.4892563819885254, "step": 370 }, { "epoch": 0.3342342342342342, "grad_norm": 22.120876400061814, "learning_rate": 9.996239762521152e-06, "loss": 2.7092278003692627, "step": 371 }, { "epoch": 0.33513513513513515, "grad_norm": 49.07721434544736, "learning_rate": 9.99603378697667e-06, "loss": 2.9483840465545654, "step": 372 }, { "epoch": 0.33603603603603605, "grad_norm": 10.959565962421676, "learning_rate": 9.99582232169459e-06, "loss": 2.596356153488159, "step": 373 }, { "epoch": 0.33693693693693694, "grad_norm": 10.446306364592443, "learning_rate": 9.995605366907271e-06, "loss": 2.5975446701049805, "step": 374 }, { "epoch": 0.33783783783783783, "grad_norm": 10.807935753392998, "learning_rate": 9.995382922853106e-06, "loss": 2.6463375091552734, "step": 375 }, { "epoch": 0.3387387387387387, "grad_norm": 14.468879138447939, "learning_rate": 9.995154989776523e-06, "loss": 2.805997371673584, "step": 376 }, { "epoch": 0.3396396396396396, "grad_norm": 12.627586198607087, "learning_rate": 9.994921567927979e-06, "loss": 2.336535930633545, "step": 377 }, { "epoch": 0.34054054054054056, "grad_norm": 14.897162080513253, "learning_rate": 9.99468265756396e-06, "loss": 2.5676686763763428, "step": 378 }, { "epoch": 0.34144144144144145, "grad_norm": 29.61898488885318, "learning_rate": 9.99443825894699e-06, "loss": 3.419049024581909, "step": 379 }, { "epoch": 0.34234234234234234, "grad_norm": 10.532926204598702, "learning_rate": 9.994188372345615e-06, "loss": 2.9493656158447266, "step": 380 }, { "epoch": 0.34324324324324323, "grad_norm": 14.91480845391336, "learning_rate": 9.993932998034417e-06, "loss": 3.0369420051574707, "step": 381 }, { "epoch": 0.3441441441441441, "grad_norm": 30.467746807491423, "learning_rate": 9.993672136294004e-06, "loss": 3.0565989017486572, "step": 382 }, { "epoch": 0.345045045045045, "grad_norm": 11.506205536111601, "learning_rate": 9.993405787411017e-06, "loss": 2.8990392684936523, "step": 383 }, { "epoch": 0.34594594594594597, "grad_norm": 19.374470886381832, "learning_rate": 9.993133951678126e-06, "loss": 2.2662172317504883, "step": 384 }, { "epoch": 0.34684684684684686, "grad_norm": 15.790913391354422, "learning_rate": 9.99285662939403e-06, "loss": 3.020148277282715, "step": 385 }, { "epoch": 0.34774774774774775, "grad_norm": 14.007809354711547, "learning_rate": 9.992573820863455e-06, "loss": 3.2606804370880127, "step": 386 }, { "epoch": 0.34864864864864864, "grad_norm": 11.517630489972202, "learning_rate": 9.992285526397156e-06, "loss": 2.9776859283447266, "step": 387 }, { "epoch": 0.34954954954954953, "grad_norm": 14.821112490097487, "learning_rate": 9.991991746311916e-06, "loss": 2.882371425628662, "step": 388 }, { "epoch": 0.3504504504504504, "grad_norm": 8.489287848634739, "learning_rate": 9.991692480930548e-06, "loss": 3.1440539360046387, "step": 389 }, { "epoch": 0.35135135135135137, "grad_norm": 16.962765685771142, "learning_rate": 9.99138773058189e-06, "loss": 3.225944995880127, "step": 390 }, { "epoch": 0.35225225225225226, "grad_norm": 9.881647023432432, "learning_rate": 9.991077495600806e-06, "loss": 2.9112517833709717, "step": 391 }, { "epoch": 0.35315315315315315, "grad_norm": 12.774530733801422, "learning_rate": 9.990761776328188e-06, "loss": 2.7864723205566406, "step": 392 }, { "epoch": 0.35405405405405405, "grad_norm": 11.246619416376717, "learning_rate": 9.990440573110959e-06, "loss": 2.6985116004943848, "step": 393 }, { "epoch": 0.35495495495495494, "grad_norm": 14.885147823780443, "learning_rate": 9.990113886302057e-06, "loss": 3.236449956893921, "step": 394 }, { "epoch": 0.35585585585585583, "grad_norm": 10.90873280846296, "learning_rate": 9.989781716260456e-06, "loss": 2.6477572917938232, "step": 395 }, { "epoch": 0.3567567567567568, "grad_norm": 11.210641614835065, "learning_rate": 9.989444063351148e-06, "loss": 2.5131397247314453, "step": 396 }, { "epoch": 0.35765765765765767, "grad_norm": 7.430746275852679, "learning_rate": 9.989100927945155e-06, "loss": 2.6379480361938477, "step": 397 }, { "epoch": 0.35855855855855856, "grad_norm": 9.221946585921508, "learning_rate": 9.988752310419518e-06, "loss": 2.8938801288604736, "step": 398 }, { "epoch": 0.35945945945945945, "grad_norm": 14.126719322984972, "learning_rate": 9.988398211157308e-06, "loss": 2.8114590644836426, "step": 399 }, { "epoch": 0.36036036036036034, "grad_norm": 20.861023641973205, "learning_rate": 9.988038630547613e-06, "loss": 2.9054737091064453, "step": 400 }, { "epoch": 0.36126126126126124, "grad_norm": 13.948322199293036, "learning_rate": 9.98767356898555e-06, "loss": 3.147998571395874, "step": 401 }, { "epoch": 0.3621621621621622, "grad_norm": 25.70114904388452, "learning_rate": 9.987303026872252e-06, "loss": 2.8002514839172363, "step": 402 }, { "epoch": 0.3630630630630631, "grad_norm": 9.063663563693485, "learning_rate": 9.986927004614881e-06, "loss": 3.302854299545288, "step": 403 }, { "epoch": 0.36396396396396397, "grad_norm": 14.173772835618514, "learning_rate": 9.986545502626616e-06, "loss": 3.1031603813171387, "step": 404 }, { "epoch": 0.36486486486486486, "grad_norm": 17.929471705345165, "learning_rate": 9.986158521326659e-06, "loss": 3.3072774410247803, "step": 405 }, { "epoch": 0.36576576576576575, "grad_norm": 10.510314331851996, "learning_rate": 9.985766061140233e-06, "loss": 3.089600086212158, "step": 406 }, { "epoch": 0.36666666666666664, "grad_norm": 16.161964021767908, "learning_rate": 9.98536812249858e-06, "loss": 3.111833095550537, "step": 407 }, { "epoch": 0.3675675675675676, "grad_norm": 12.299022871195842, "learning_rate": 9.98496470583896e-06, "loss": 2.920365333557129, "step": 408 }, { "epoch": 0.3684684684684685, "grad_norm": 11.919251390163824, "learning_rate": 9.984555811604662e-06, "loss": 2.449800491333008, "step": 409 }, { "epoch": 0.36936936936936937, "grad_norm": 9.051532590643848, "learning_rate": 9.984141440244978e-06, "loss": 2.639411449432373, "step": 410 }, { "epoch": 0.37027027027027026, "grad_norm": 10.664942728836646, "learning_rate": 9.983721592215235e-06, "loss": 2.888113498687744, "step": 411 }, { "epoch": 0.37117117117117115, "grad_norm": 8.057489950991233, "learning_rate": 9.983296267976766e-06, "loss": 2.8491649627685547, "step": 412 }, { "epoch": 0.37207207207207205, "grad_norm": 6.843779683988893, "learning_rate": 9.982865467996925e-06, "loss": 2.389925003051758, "step": 413 }, { "epoch": 0.372972972972973, "grad_norm": 14.58676875082707, "learning_rate": 9.982429192749085e-06, "loss": 2.9418718814849854, "step": 414 }, { "epoch": 0.3738738738738739, "grad_norm": 31.64837532039199, "learning_rate": 9.981987442712634e-06, "loss": 2.4360032081604004, "step": 415 }, { "epoch": 0.3747747747747748, "grad_norm": 11.906904188716325, "learning_rate": 9.981540218372973e-06, "loss": 2.563695192337036, "step": 416 }, { "epoch": 0.37567567567567567, "grad_norm": 11.99370443118536, "learning_rate": 9.981087520221522e-06, "loss": 2.920173406600952, "step": 417 }, { "epoch": 0.37657657657657656, "grad_norm": 8.474036221130298, "learning_rate": 9.980629348755714e-06, "loss": 2.159977912902832, "step": 418 }, { "epoch": 0.37747747747747745, "grad_norm": 8.909386719480173, "learning_rate": 9.980165704478999e-06, "loss": 2.4905076026916504, "step": 419 }, { "epoch": 0.3783783783783784, "grad_norm": 10.712045905675597, "learning_rate": 9.979696587900836e-06, "loss": 3.200435161590576, "step": 420 }, { "epoch": 0.3792792792792793, "grad_norm": 21.35121092434891, "learning_rate": 9.9792219995367e-06, "loss": 3.889805555343628, "step": 421 }, { "epoch": 0.3801801801801802, "grad_norm": 8.791945223524177, "learning_rate": 9.978741939908076e-06, "loss": 2.8661367893218994, "step": 422 }, { "epoch": 0.3810810810810811, "grad_norm": 12.216964876719446, "learning_rate": 9.978256409542463e-06, "loss": 2.6770262718200684, "step": 423 }, { "epoch": 0.38198198198198197, "grad_norm": 10.595853465936564, "learning_rate": 9.977765408973374e-06, "loss": 2.4664907455444336, "step": 424 }, { "epoch": 0.38288288288288286, "grad_norm": 12.598600276222705, "learning_rate": 9.977268938740328e-06, "loss": 2.2553420066833496, "step": 425 }, { "epoch": 0.3837837837837838, "grad_norm": 8.339940756295746, "learning_rate": 9.976766999388854e-06, "loss": 2.898794174194336, "step": 426 }, { "epoch": 0.3846846846846847, "grad_norm": 50.14623659446829, "learning_rate": 9.976259591470496e-06, "loss": 2.727290391921997, "step": 427 }, { "epoch": 0.3855855855855856, "grad_norm": 11.631026669444639, "learning_rate": 9.975746715542803e-06, "loss": 3.2811455726623535, "step": 428 }, { "epoch": 0.3864864864864865, "grad_norm": 11.7664612995829, "learning_rate": 9.97522837216933e-06, "loss": 2.847942590713501, "step": 429 }, { "epoch": 0.38738738738738737, "grad_norm": 14.446151150330858, "learning_rate": 9.974704561919645e-06, "loss": 3.5835790634155273, "step": 430 }, { "epoch": 0.38828828828828826, "grad_norm": 7.375059114002889, "learning_rate": 9.97417528536932e-06, "loss": 2.6707923412323, "step": 431 }, { "epoch": 0.3891891891891892, "grad_norm": 20.214165459908816, "learning_rate": 9.973640543099936e-06, "loss": 2.7764248847961426, "step": 432 }, { "epoch": 0.3900900900900901, "grad_norm": 8.803838609901794, "learning_rate": 9.973100335699075e-06, "loss": 2.204397201538086, "step": 433 }, { "epoch": 0.390990990990991, "grad_norm": 10.738546550577066, "learning_rate": 9.97255466376033e-06, "loss": 2.7971489429473877, "step": 434 }, { "epoch": 0.3918918918918919, "grad_norm": 10.776977738087297, "learning_rate": 9.972003527883295e-06, "loss": 2.568075656890869, "step": 435 }, { "epoch": 0.3927927927927928, "grad_norm": 6.921417468230944, "learning_rate": 9.971446928673566e-06, "loss": 2.9334769248962402, "step": 436 }, { "epoch": 0.39369369369369367, "grad_norm": 12.307022988169349, "learning_rate": 9.970884866742748e-06, "loss": 2.8420519828796387, "step": 437 }, { "epoch": 0.3945945945945946, "grad_norm": 10.81190899170059, "learning_rate": 9.970317342708444e-06, "loss": 2.915928602218628, "step": 438 }, { "epoch": 0.3954954954954955, "grad_norm": 15.502005814041977, "learning_rate": 9.969744357194262e-06, "loss": 2.37294864654541, "step": 439 }, { "epoch": 0.3963963963963964, "grad_norm": 18.933632612278064, "learning_rate": 9.969165910829807e-06, "loss": 2.4669623374938965, "step": 440 }, { "epoch": 0.3972972972972973, "grad_norm": 9.062906848264465, "learning_rate": 9.96858200425069e-06, "loss": 2.9604015350341797, "step": 441 }, { "epoch": 0.3981981981981982, "grad_norm": 11.843111873808983, "learning_rate": 9.967992638098517e-06, "loss": 2.989811658859253, "step": 442 }, { "epoch": 0.3990990990990991, "grad_norm": 7.270834648998459, "learning_rate": 9.967397813020892e-06, "loss": 2.972478151321411, "step": 443 }, { "epoch": 0.4, "grad_norm": 16.076482807125995, "learning_rate": 9.966797529671424e-06, "loss": 2.6020092964172363, "step": 444 }, { "epoch": 0.4009009009009009, "grad_norm": 11.032887388520376, "learning_rate": 9.966191788709716e-06, "loss": 2.7426490783691406, "step": 445 }, { "epoch": 0.4018018018018018, "grad_norm": 10.299569964339257, "learning_rate": 9.965580590801364e-06, "loss": 3.3576645851135254, "step": 446 }, { "epoch": 0.4027027027027027, "grad_norm": 11.75702071005093, "learning_rate": 9.96496393661797e-06, "loss": 2.7325360774993896, "step": 447 }, { "epoch": 0.4036036036036036, "grad_norm": 11.199429295188697, "learning_rate": 9.96434182683712e-06, "loss": 3.0111613273620605, "step": 448 }, { "epoch": 0.4045045045045045, "grad_norm": 12.438250339217415, "learning_rate": 9.963714262142402e-06, "loss": 2.617116689682007, "step": 449 }, { "epoch": 0.40540540540540543, "grad_norm": 6.967396475542938, "learning_rate": 9.963081243223396e-06, "loss": 2.620596408843994, "step": 450 }, { "epoch": 0.4063063063063063, "grad_norm": 11.90892747992436, "learning_rate": 9.962442770775675e-06, "loss": 2.8448939323425293, "step": 451 }, { "epoch": 0.4072072072072072, "grad_norm": 8.872921289600368, "learning_rate": 9.961798845500808e-06, "loss": 2.4782495498657227, "step": 452 }, { "epoch": 0.4081081081081081, "grad_norm": 11.848919914627832, "learning_rate": 9.961149468106346e-06, "loss": 2.8514678478240967, "step": 453 }, { "epoch": 0.409009009009009, "grad_norm": 14.842331093483242, "learning_rate": 9.960494639305843e-06, "loss": 3.000009775161743, "step": 454 }, { "epoch": 0.4099099099099099, "grad_norm": 15.653102268407324, "learning_rate": 9.959834359818836e-06, "loss": 3.2245235443115234, "step": 455 }, { "epoch": 0.41081081081081083, "grad_norm": 8.003740413363506, "learning_rate": 9.95916863037085e-06, "loss": 2.044393539428711, "step": 456 }, { "epoch": 0.4117117117117117, "grad_norm": 15.394222737473838, "learning_rate": 9.958497451693406e-06, "loss": 3.533689022064209, "step": 457 }, { "epoch": 0.4126126126126126, "grad_norm": 9.791143790822147, "learning_rate": 9.957820824524003e-06, "loss": 2.6153087615966797, "step": 458 }, { "epoch": 0.4135135135135135, "grad_norm": 9.589947001463136, "learning_rate": 9.957138749606134e-06, "loss": 2.8632655143737793, "step": 459 }, { "epoch": 0.4144144144144144, "grad_norm": 13.966272316065108, "learning_rate": 9.956451227689278e-06, "loss": 3.260765790939331, "step": 460 }, { "epoch": 0.4153153153153153, "grad_norm": 8.677815370787325, "learning_rate": 9.955758259528895e-06, "loss": 2.7139787673950195, "step": 461 }, { "epoch": 0.41621621621621624, "grad_norm": 7.5992044736880695, "learning_rate": 9.955059845886432e-06, "loss": 2.8179216384887695, "step": 462 }, { "epoch": 0.41711711711711713, "grad_norm": 20.189192906766145, "learning_rate": 9.954355987529319e-06, "loss": 1.8593086004257202, "step": 463 }, { "epoch": 0.418018018018018, "grad_norm": 9.294365372544148, "learning_rate": 9.95364668523097e-06, "loss": 3.129530668258667, "step": 464 }, { "epoch": 0.4189189189189189, "grad_norm": 11.10229210239617, "learning_rate": 9.95293193977078e-06, "loss": 2.4548749923706055, "step": 465 }, { "epoch": 0.4198198198198198, "grad_norm": 12.303290597408346, "learning_rate": 9.952211751934125e-06, "loss": 2.833526611328125, "step": 466 }, { "epoch": 0.4207207207207207, "grad_norm": 18.39720072416737, "learning_rate": 9.951486122512358e-06, "loss": 3.272202253341675, "step": 467 }, { "epoch": 0.42162162162162165, "grad_norm": 10.962511020348291, "learning_rate": 9.950755052302819e-06, "loss": 2.5285682678222656, "step": 468 }, { "epoch": 0.42252252252252254, "grad_norm": 11.343724042754738, "learning_rate": 9.950018542108818e-06, "loss": 3.135453939437866, "step": 469 }, { "epoch": 0.42342342342342343, "grad_norm": 7.162285143051499, "learning_rate": 9.949276592739652e-06, "loss": 2.7263050079345703, "step": 470 }, { "epoch": 0.4243243243243243, "grad_norm": 8.271823810331844, "learning_rate": 9.948529205010583e-06, "loss": 2.757145404815674, "step": 471 }, { "epoch": 0.4252252252252252, "grad_norm": 18.888485463079643, "learning_rate": 9.94777637974286e-06, "loss": 3.6016228199005127, "step": 472 }, { "epoch": 0.4261261261261261, "grad_norm": 13.705038817017533, "learning_rate": 9.947018117763698e-06, "loss": 3.1560139656066895, "step": 473 }, { "epoch": 0.42702702702702705, "grad_norm": 8.131771829742979, "learning_rate": 9.946254419906293e-06, "loss": 2.994487762451172, "step": 474 }, { "epoch": 0.42792792792792794, "grad_norm": 8.192010903051235, "learning_rate": 9.945485287009808e-06, "loss": 2.746253728866577, "step": 475 }, { "epoch": 0.42882882882882883, "grad_norm": 21.46422155759696, "learning_rate": 9.944710719919381e-06, "loss": 3.2966389656066895, "step": 476 }, { "epoch": 0.4297297297297297, "grad_norm": 12.75970618129319, "learning_rate": 9.943930719486123e-06, "loss": 2.8765969276428223, "step": 477 }, { "epoch": 0.4306306306306306, "grad_norm": 11.278838807173917, "learning_rate": 9.943145286567114e-06, "loss": 2.933793067932129, "step": 478 }, { "epoch": 0.4315315315315315, "grad_norm": 17.89762340765894, "learning_rate": 9.942354422025402e-06, "loss": 2.363278865814209, "step": 479 }, { "epoch": 0.43243243243243246, "grad_norm": 13.648964264768017, "learning_rate": 9.94155812673e-06, "loss": 2.65312123298645, "step": 480 }, { "epoch": 0.43333333333333335, "grad_norm": 20.414504599109517, "learning_rate": 9.940756401555899e-06, "loss": 2.994137763977051, "step": 481 }, { "epoch": 0.43423423423423424, "grad_norm": 8.091874246038742, "learning_rate": 9.939949247384046e-06, "loss": 2.7260451316833496, "step": 482 }, { "epoch": 0.43513513513513513, "grad_norm": 11.05681404847883, "learning_rate": 9.939136665101359e-06, "loss": 3.468167781829834, "step": 483 }, { "epoch": 0.436036036036036, "grad_norm": 8.502576465292318, "learning_rate": 9.938318655600716e-06, "loss": 2.5464885234832764, "step": 484 }, { "epoch": 0.4369369369369369, "grad_norm": 29.646600363505314, "learning_rate": 9.937495219780968e-06, "loss": 3.2312614917755127, "step": 485 }, { "epoch": 0.43783783783783786, "grad_norm": 14.764378214629252, "learning_rate": 9.936666358546915e-06, "loss": 2.948831796646118, "step": 486 }, { "epoch": 0.43873873873873875, "grad_norm": 10.897389925985015, "learning_rate": 9.935832072809329e-06, "loss": 2.6337313652038574, "step": 487 }, { "epoch": 0.43963963963963965, "grad_norm": 29.19644238276464, "learning_rate": 9.93499236348494e-06, "loss": 3.6373133659362793, "step": 488 }, { "epoch": 0.44054054054054054, "grad_norm": 14.453672964670863, "learning_rate": 9.934147231496434e-06, "loss": 2.855248212814331, "step": 489 }, { "epoch": 0.44144144144144143, "grad_norm": 27.842608083205935, "learning_rate": 9.933296677772462e-06, "loss": 3.682950019836426, "step": 490 }, { "epoch": 0.4423423423423423, "grad_norm": 11.201091380440992, "learning_rate": 9.932440703247623e-06, "loss": 2.6520917415618896, "step": 491 }, { "epoch": 0.44324324324324327, "grad_norm": 8.675835563073338, "learning_rate": 9.931579308862484e-06, "loss": 2.5899949073791504, "step": 492 }, { "epoch": 0.44414414414414416, "grad_norm": 36.134451593989255, "learning_rate": 9.930712495563559e-06, "loss": 2.4611892700195312, "step": 493 }, { "epoch": 0.44504504504504505, "grad_norm": 17.104554033233082, "learning_rate": 9.929840264303318e-06, "loss": 2.9234981536865234, "step": 494 }, { "epoch": 0.44594594594594594, "grad_norm": 11.425194577937127, "learning_rate": 9.928962616040187e-06, "loss": 3.180088996887207, "step": 495 }, { "epoch": 0.44684684684684683, "grad_norm": 14.72422917409879, "learning_rate": 9.928079551738542e-06, "loss": 3.261711835861206, "step": 496 }, { "epoch": 0.4477477477477477, "grad_norm": 18.04668472832457, "learning_rate": 9.927191072368714e-06, "loss": 2.876741886138916, "step": 497 }, { "epoch": 0.4486486486486487, "grad_norm": 10.242062741938264, "learning_rate": 9.926297178906976e-06, "loss": 3.0803256034851074, "step": 498 }, { "epoch": 0.44954954954954957, "grad_norm": 9.858260384743199, "learning_rate": 9.925397872335558e-06, "loss": 1.8135777711868286, "step": 499 }, { "epoch": 0.45045045045045046, "grad_norm": 16.353432239840764, "learning_rate": 9.924493153642636e-06, "loss": 2.367913246154785, "step": 500 }, { "epoch": 0.45135135135135135, "grad_norm": 12.678071821463053, "learning_rate": 9.92358302382233e-06, "loss": 3.121692180633545, "step": 501 }, { "epoch": 0.45225225225225224, "grad_norm": 11.986141950956682, "learning_rate": 9.92266748387471e-06, "loss": 2.9279327392578125, "step": 502 }, { "epoch": 0.45315315315315313, "grad_norm": 17.26673357479762, "learning_rate": 9.921746534805789e-06, "loss": 2.9483375549316406, "step": 503 }, { "epoch": 0.4540540540540541, "grad_norm": 12.8745885638904, "learning_rate": 9.920820177627522e-06, "loss": 2.940941095352173, "step": 504 }, { "epoch": 0.45495495495495497, "grad_norm": 13.512081445743542, "learning_rate": 9.919888413357808e-06, "loss": 2.897225856781006, "step": 505 }, { "epoch": 0.45585585585585586, "grad_norm": 11.575210720999843, "learning_rate": 9.918951243020489e-06, "loss": 2.8494417667388916, "step": 506 }, { "epoch": 0.45675675675675675, "grad_norm": 27.703000681652096, "learning_rate": 9.918008667645344e-06, "loss": 2.347216844558716, "step": 507 }, { "epoch": 0.45765765765765765, "grad_norm": 8.180610209870624, "learning_rate": 9.917060688268094e-06, "loss": 3.1521518230438232, "step": 508 }, { "epoch": 0.45855855855855854, "grad_norm": 13.417460116546783, "learning_rate": 9.916107305930397e-06, "loss": 2.6805672645568848, "step": 509 }, { "epoch": 0.4594594594594595, "grad_norm": 8.35493484028015, "learning_rate": 9.915148521679848e-06, "loss": 2.8929569721221924, "step": 510 }, { "epoch": 0.4603603603603604, "grad_norm": 18.11039317600781, "learning_rate": 9.914184336569973e-06, "loss": 1.871511459350586, "step": 511 }, { "epoch": 0.46126126126126127, "grad_norm": 16.339316213391253, "learning_rate": 9.913214751660244e-06, "loss": 3.3061211109161377, "step": 512 }, { "epoch": 0.46216216216216216, "grad_norm": 17.68052245730394, "learning_rate": 9.912239768016057e-06, "loss": 2.3791470527648926, "step": 513 }, { "epoch": 0.46306306306306305, "grad_norm": 11.245574736173385, "learning_rate": 9.911259386708742e-06, "loss": 2.556948184967041, "step": 514 }, { "epoch": 0.46396396396396394, "grad_norm": 8.915739531623856, "learning_rate": 9.91027360881556e-06, "loss": 2.971670150756836, "step": 515 }, { "epoch": 0.4648648648648649, "grad_norm": 5.929578462713964, "learning_rate": 9.909282435419703e-06, "loss": 2.971108913421631, "step": 516 }, { "epoch": 0.4657657657657658, "grad_norm": 10.688852817948094, "learning_rate": 9.908285867610292e-06, "loss": 2.6687498092651367, "step": 517 }, { "epoch": 0.4666666666666667, "grad_norm": 18.48653768523157, "learning_rate": 9.907283906482374e-06, "loss": 3.280163526535034, "step": 518 }, { "epoch": 0.46756756756756757, "grad_norm": 20.344949506929094, "learning_rate": 9.906276553136924e-06, "loss": 2.7002642154693604, "step": 519 }, { "epoch": 0.46846846846846846, "grad_norm": 13.383833626515251, "learning_rate": 9.90526380868084e-06, "loss": 2.652203321456909, "step": 520 }, { "epoch": 0.46936936936936935, "grad_norm": 22.95836676576381, "learning_rate": 9.904245674226948e-06, "loss": 3.252615451812744, "step": 521 }, { "epoch": 0.4702702702702703, "grad_norm": 9.730269158871646, "learning_rate": 9.90322215089399e-06, "loss": 2.8116307258605957, "step": 522 }, { "epoch": 0.4711711711711712, "grad_norm": 10.828805525393937, "learning_rate": 9.902193239806634e-06, "loss": 2.7895991802215576, "step": 523 }, { "epoch": 0.4720720720720721, "grad_norm": 12.86168869720143, "learning_rate": 9.901158942095468e-06, "loss": 2.8977463245391846, "step": 524 }, { "epoch": 0.47297297297297297, "grad_norm": 11.771464906214417, "learning_rate": 9.900119258896998e-06, "loss": 2.2192132472991943, "step": 525 }, { "epoch": 0.47387387387387386, "grad_norm": 11.30141430142043, "learning_rate": 9.899074191353649e-06, "loss": 3.1374659538269043, "step": 526 }, { "epoch": 0.47477477477477475, "grad_norm": 13.897590331761034, "learning_rate": 9.898023740613758e-06, "loss": 3.357194423675537, "step": 527 }, { "epoch": 0.4756756756756757, "grad_norm": 7.449387794038159, "learning_rate": 9.896967907831581e-06, "loss": 2.854480266571045, "step": 528 }, { "epoch": 0.4765765765765766, "grad_norm": 8.878874765157995, "learning_rate": 9.895906694167291e-06, "loss": 2.738018035888672, "step": 529 }, { "epoch": 0.4774774774774775, "grad_norm": 38.00148277537286, "learning_rate": 9.894840100786966e-06, "loss": 2.8189690113067627, "step": 530 }, { "epoch": 0.4783783783783784, "grad_norm": 17.3491501566159, "learning_rate": 9.893768128862601e-06, "loss": 2.9495034217834473, "step": 531 }, { "epoch": 0.47927927927927927, "grad_norm": 16.50728344098491, "learning_rate": 9.892690779572098e-06, "loss": 2.541929244995117, "step": 532 }, { "epoch": 0.48018018018018016, "grad_norm": 13.275241172557463, "learning_rate": 9.891608054099271e-06, "loss": 2.7305493354797363, "step": 533 }, { "epoch": 0.4810810810810811, "grad_norm": 10.577891946131855, "learning_rate": 9.89051995363384e-06, "loss": 3.0362510681152344, "step": 534 }, { "epoch": 0.481981981981982, "grad_norm": 9.552120076240959, "learning_rate": 9.889426479371427e-06, "loss": 3.040802001953125, "step": 535 }, { "epoch": 0.4828828828828829, "grad_norm": 23.697875741603628, "learning_rate": 9.888327632513563e-06, "loss": 3.7922098636627197, "step": 536 }, { "epoch": 0.4837837837837838, "grad_norm": 8.507680677535628, "learning_rate": 9.887223414267686e-06, "loss": 2.7465715408325195, "step": 537 }, { "epoch": 0.4846846846846847, "grad_norm": 15.26647905177071, "learning_rate": 9.88611382584713e-06, "loss": 2.9226627349853516, "step": 538 }, { "epoch": 0.48558558558558557, "grad_norm": 16.897882002125424, "learning_rate": 9.88499886847113e-06, "loss": 2.804058074951172, "step": 539 }, { "epoch": 0.4864864864864865, "grad_norm": 7.317461306084804, "learning_rate": 9.883878543364824e-06, "loss": 2.7797727584838867, "step": 540 }, { "epoch": 0.4873873873873874, "grad_norm": 14.644555104597616, "learning_rate": 9.882752851759247e-06, "loss": 3.074551820755005, "step": 541 }, { "epoch": 0.4882882882882883, "grad_norm": 10.955052355379598, "learning_rate": 9.881621794891332e-06, "loss": 3.0798258781433105, "step": 542 }, { "epoch": 0.4891891891891892, "grad_norm": 7.009829826155904, "learning_rate": 9.880485374003902e-06, "loss": 2.5292856693267822, "step": 543 }, { "epoch": 0.4900900900900901, "grad_norm": 8.328587975820678, "learning_rate": 9.879343590345682e-06, "loss": 2.8059566020965576, "step": 544 }, { "epoch": 0.49099099099099097, "grad_norm": 8.910851525333204, "learning_rate": 9.878196445171281e-06, "loss": 2.900643825531006, "step": 545 }, { "epoch": 0.4918918918918919, "grad_norm": 8.920847529814719, "learning_rate": 9.877043939741211e-06, "loss": 2.661362648010254, "step": 546 }, { "epoch": 0.4927927927927928, "grad_norm": 19.722150751686048, "learning_rate": 9.87588607532186e-06, "loss": 2.3552534580230713, "step": 547 }, { "epoch": 0.4936936936936937, "grad_norm": 11.697970053360999, "learning_rate": 9.874722853185519e-06, "loss": 2.4047014713287354, "step": 548 }, { "epoch": 0.4945945945945946, "grad_norm": 22.169788506007976, "learning_rate": 9.87355427461035e-06, "loss": 2.575777530670166, "step": 549 }, { "epoch": 0.4954954954954955, "grad_norm": 12.80578817852263, "learning_rate": 9.872380340880416e-06, "loss": 2.420854330062866, "step": 550 }, { "epoch": 0.4963963963963964, "grad_norm": 11.369342456286857, "learning_rate": 9.871201053285658e-06, "loss": 2.5156991481781006, "step": 551 }, { "epoch": 0.4972972972972973, "grad_norm": 12.316050038110454, "learning_rate": 9.870016413121894e-06, "loss": 3.14607310295105, "step": 552 }, { "epoch": 0.4981981981981982, "grad_norm": 16.239571696929776, "learning_rate": 9.868826421690835e-06, "loss": 3.397555351257324, "step": 553 }, { "epoch": 0.4990990990990991, "grad_norm": 8.854516430326528, "learning_rate": 9.867631080300063e-06, "loss": 2.690509796142578, "step": 554 }, { "epoch": 0.5, "grad_norm": 9.376344141476533, "learning_rate": 9.866430390263044e-06, "loss": 2.900862216949463, "step": 555 }, { "epoch": 0.5009009009009009, "grad_norm": 9.423648915581877, "learning_rate": 9.86522435289912e-06, "loss": 2.855050563812256, "step": 556 }, { "epoch": 0.5018018018018018, "grad_norm": 8.101093503468741, "learning_rate": 9.864012969533505e-06, "loss": 3.2957873344421387, "step": 557 }, { "epoch": 0.5027027027027027, "grad_norm": 10.100404830767033, "learning_rate": 9.862796241497291e-06, "loss": 2.6753034591674805, "step": 558 }, { "epoch": 0.5036036036036036, "grad_norm": 6.802573845910635, "learning_rate": 9.861574170127446e-06, "loss": 2.527581214904785, "step": 559 }, { "epoch": 0.5045045045045045, "grad_norm": 7.109905831565619, "learning_rate": 9.8603467567668e-06, "loss": 2.5860238075256348, "step": 560 }, { "epoch": 0.5054054054054054, "grad_norm": 77.48281332985208, "learning_rate": 9.85911400276406e-06, "loss": 2.9344871044158936, "step": 561 }, { "epoch": 0.5063063063063064, "grad_norm": 17.204696265849268, "learning_rate": 9.857875909473801e-06, "loss": 2.4911441802978516, "step": 562 }, { "epoch": 0.5072072072072072, "grad_norm": 33.333109411986456, "learning_rate": 9.856632478256465e-06, "loss": 3.2600035667419434, "step": 563 }, { "epoch": 0.5081081081081081, "grad_norm": 18.927800261993358, "learning_rate": 9.855383710478353e-06, "loss": 3.0671844482421875, "step": 564 }, { "epoch": 0.509009009009009, "grad_norm": 11.659851933898024, "learning_rate": 9.85412960751164e-06, "loss": 2.565697193145752, "step": 565 }, { "epoch": 0.5099099099099099, "grad_norm": 17.64516165490412, "learning_rate": 9.852870170734354e-06, "loss": 3.133984088897705, "step": 566 }, { "epoch": 0.5108108108108108, "grad_norm": 12.590469859202411, "learning_rate": 9.851605401530391e-06, "loss": 2.9112048149108887, "step": 567 }, { "epoch": 0.5117117117117117, "grad_norm": 9.161002149446931, "learning_rate": 9.850335301289504e-06, "loss": 2.987330913543701, "step": 568 }, { "epoch": 0.5126126126126126, "grad_norm": 13.323533234558495, "learning_rate": 9.849059871407303e-06, "loss": 2.5533223152160645, "step": 569 }, { "epoch": 0.5135135135135135, "grad_norm": 6.344853206795556, "learning_rate": 9.847779113285254e-06, "loss": 2.8108911514282227, "step": 570 }, { "epoch": 0.5144144144144144, "grad_norm": 16.14781390884599, "learning_rate": 9.846493028330678e-06, "loss": 3.010362148284912, "step": 571 }, { "epoch": 0.5153153153153153, "grad_norm": 13.724464009158758, "learning_rate": 9.845201617956752e-06, "loss": 2.64815354347229, "step": 572 }, { "epoch": 0.5162162162162162, "grad_norm": 13.755497363882526, "learning_rate": 9.8439048835825e-06, "loss": 2.952627658843994, "step": 573 }, { "epoch": 0.5171171171171172, "grad_norm": 12.747606100104184, "learning_rate": 9.842602826632799e-06, "loss": 2.591431140899658, "step": 574 }, { "epoch": 0.5180180180180181, "grad_norm": 21.745555288655588, "learning_rate": 9.841295448538377e-06, "loss": 3.343477487564087, "step": 575 }, { "epoch": 0.518918918918919, "grad_norm": 9.598784757276308, "learning_rate": 9.839982750735804e-06, "loss": 2.577162742614746, "step": 576 }, { "epoch": 0.5198198198198198, "grad_norm": 9.254856439318155, "learning_rate": 9.838664734667496e-06, "loss": 2.9017882347106934, "step": 577 }, { "epoch": 0.5207207207207207, "grad_norm": 8.530699856357325, "learning_rate": 9.837341401781715e-06, "loss": 2.5971288681030273, "step": 578 }, { "epoch": 0.5216216216216216, "grad_norm": 6.5891797705212625, "learning_rate": 9.836012753532566e-06, "loss": 2.6294662952423096, "step": 579 }, { "epoch": 0.5225225225225225, "grad_norm": 12.609165447431351, "learning_rate": 9.834678791379992e-06, "loss": 2.7069082260131836, "step": 580 }, { "epoch": 0.5234234234234234, "grad_norm": 12.163112172436126, "learning_rate": 9.833339516789778e-06, "loss": 2.796908378601074, "step": 581 }, { "epoch": 0.5243243243243243, "grad_norm": 12.871569991902243, "learning_rate": 9.831994931233542e-06, "loss": 3.122313976287842, "step": 582 }, { "epoch": 0.5252252252252252, "grad_norm": 14.596195641635775, "learning_rate": 9.83064503618874e-06, "loss": 3.2716140747070312, "step": 583 }, { "epoch": 0.5261261261261261, "grad_norm": 12.543798988950169, "learning_rate": 9.829289833138667e-06, "loss": 3.1897201538085938, "step": 584 }, { "epoch": 0.527027027027027, "grad_norm": 13.507227668169904, "learning_rate": 9.827929323572441e-06, "loss": 4.371722221374512, "step": 585 }, { "epoch": 0.527927927927928, "grad_norm": 6.877900168304943, "learning_rate": 9.826563508985017e-06, "loss": 2.0487256050109863, "step": 586 }, { "epoch": 0.5288288288288289, "grad_norm": 19.668259971076097, "learning_rate": 9.82519239087718e-06, "loss": 3.2878916263580322, "step": 587 }, { "epoch": 0.5297297297297298, "grad_norm": 11.170925627790286, "learning_rate": 9.823815970755542e-06, "loss": 3.1439385414123535, "step": 588 }, { "epoch": 0.5306306306306307, "grad_norm": 10.390653138517692, "learning_rate": 9.822434250132535e-06, "loss": 2.6878585815429688, "step": 589 }, { "epoch": 0.5315315315315315, "grad_norm": 7.073174266005421, "learning_rate": 9.821047230526425e-06, "loss": 2.1680750846862793, "step": 590 }, { "epoch": 0.5324324324324324, "grad_norm": 9.093168029084392, "learning_rate": 9.819654913461292e-06, "loss": 2.9089152812957764, "step": 591 }, { "epoch": 0.5333333333333333, "grad_norm": 12.850144871048814, "learning_rate": 9.818257300467045e-06, "loss": 2.6065568923950195, "step": 592 }, { "epoch": 0.5342342342342342, "grad_norm": 8.32330763975957, "learning_rate": 9.816854393079402e-06, "loss": 3.0583200454711914, "step": 593 }, { "epoch": 0.5351351351351351, "grad_norm": 12.069825241338958, "learning_rate": 9.815446192839908e-06, "loss": 3.030487060546875, "step": 594 }, { "epoch": 0.536036036036036, "grad_norm": 10.949716889539568, "learning_rate": 9.814032701295923e-06, "loss": 2.8246138095855713, "step": 595 }, { "epoch": 0.5369369369369369, "grad_norm": 26.699912146577983, "learning_rate": 9.812613920000613e-06, "loss": 3.470756769180298, "step": 596 }, { "epoch": 0.5378378378378378, "grad_norm": 16.392118624059524, "learning_rate": 9.811189850512965e-06, "loss": 3.1673455238342285, "step": 597 }, { "epoch": 0.5387387387387388, "grad_norm": 9.259366588758454, "learning_rate": 9.809760494397776e-06, "loss": 2.5202136039733887, "step": 598 }, { "epoch": 0.5396396396396397, "grad_norm": 9.007590254502421, "learning_rate": 9.808325853225645e-06, "loss": 3.080294132232666, "step": 599 }, { "epoch": 0.5405405405405406, "grad_norm": 13.32109150521752, "learning_rate": 9.806885928572984e-06, "loss": 2.2382287979125977, "step": 600 }, { "epoch": 0.5414414414414415, "grad_norm": 7.902638433971261, "learning_rate": 9.805440722022015e-06, "loss": 2.7696096897125244, "step": 601 }, { "epoch": 0.5423423423423424, "grad_norm": 9.29351832414914, "learning_rate": 9.803990235160753e-06, "loss": 3.026676654815674, "step": 602 }, { "epoch": 0.5432432432432432, "grad_norm": 15.78925805226816, "learning_rate": 9.802534469583022e-06, "loss": 3.3322205543518066, "step": 603 }, { "epoch": 0.5441441441441441, "grad_norm": 17.1472967796494, "learning_rate": 9.801073426888447e-06, "loss": 2.838545799255371, "step": 604 }, { "epoch": 0.545045045045045, "grad_norm": 13.725380880291375, "learning_rate": 9.79960710868245e-06, "loss": 2.4575085639953613, "step": 605 }, { "epoch": 0.5459459459459459, "grad_norm": 11.343607830029514, "learning_rate": 9.798135516576246e-06, "loss": 2.8615031242370605, "step": 606 }, { "epoch": 0.5468468468468468, "grad_norm": 8.149097053351092, "learning_rate": 9.796658652186852e-06, "loss": 2.528751850128174, "step": 607 }, { "epoch": 0.5477477477477477, "grad_norm": 10.722467414146747, "learning_rate": 9.795176517137072e-06, "loss": 2.9100944995880127, "step": 608 }, { "epoch": 0.5486486486486486, "grad_norm": 13.849500897141981, "learning_rate": 9.793689113055507e-06, "loss": 3.3990378379821777, "step": 609 }, { "epoch": 0.5495495495495496, "grad_norm": 44.885020827823546, "learning_rate": 9.792196441576544e-06, "loss": 2.527492046356201, "step": 610 }, { "epoch": 0.5504504504504505, "grad_norm": 9.276202543493147, "learning_rate": 9.79069850434036e-06, "loss": 2.643711805343628, "step": 611 }, { "epoch": 0.5513513513513514, "grad_norm": 9.985398691316842, "learning_rate": 9.789195302992914e-06, "loss": 2.638700485229492, "step": 612 }, { "epoch": 0.5522522522522523, "grad_norm": 8.124092970725826, "learning_rate": 9.787686839185954e-06, "loss": 3.1427161693573, "step": 613 }, { "epoch": 0.5531531531531532, "grad_norm": 11.9797011769303, "learning_rate": 9.786173114577012e-06, "loss": 1.6746983528137207, "step": 614 }, { "epoch": 0.5540540540540541, "grad_norm": 10.760629779017707, "learning_rate": 9.784654130829394e-06, "loss": 2.682283639907837, "step": 615 }, { "epoch": 0.554954954954955, "grad_norm": 8.134445546653057, "learning_rate": 9.78312988961219e-06, "loss": 2.845862865447998, "step": 616 }, { "epoch": 0.5558558558558558, "grad_norm": 13.050361292942267, "learning_rate": 9.781600392600264e-06, "loss": 3.153568744659424, "step": 617 }, { "epoch": 0.5567567567567567, "grad_norm": 31.881308227440655, "learning_rate": 9.780065641474257e-06, "loss": 2.7752227783203125, "step": 618 }, { "epoch": 0.5576576576576576, "grad_norm": 17.67219029389856, "learning_rate": 9.778525637920587e-06, "loss": 2.7249202728271484, "step": 619 }, { "epoch": 0.5585585585585585, "grad_norm": 12.22272888684173, "learning_rate": 9.776980383631432e-06, "loss": 2.489539623260498, "step": 620 }, { "epoch": 0.5594594594594594, "grad_norm": 8.729293975204598, "learning_rate": 9.775429880304753e-06, "loss": 2.6470212936401367, "step": 621 }, { "epoch": 0.5603603603603604, "grad_norm": 12.057004192073059, "learning_rate": 9.773874129644268e-06, "loss": 2.4962947368621826, "step": 622 }, { "epoch": 0.5612612612612613, "grad_norm": 15.658988410504156, "learning_rate": 9.77231313335947e-06, "loss": 2.730250120162964, "step": 623 }, { "epoch": 0.5621621621621622, "grad_norm": 9.968299463426975, "learning_rate": 9.77074689316561e-06, "loss": 2.25225830078125, "step": 624 }, { "epoch": 0.5630630630630631, "grad_norm": 9.657069192556015, "learning_rate": 9.769175410783703e-06, "loss": 3.269899845123291, "step": 625 }, { "epoch": 0.563963963963964, "grad_norm": 19.251856009582813, "learning_rate": 9.767598687940523e-06, "loss": 2.8722891807556152, "step": 626 }, { "epoch": 0.5648648648648649, "grad_norm": 14.428545606993785, "learning_rate": 9.766016726368604e-06, "loss": 3.03021502494812, "step": 627 }, { "epoch": 0.5657657657657658, "grad_norm": 14.16030624645081, "learning_rate": 9.764429527806233e-06, "loss": 2.89723539352417, "step": 628 }, { "epoch": 0.5666666666666667, "grad_norm": 8.32130764827128, "learning_rate": 9.76283709399746e-06, "loss": 2.671215772628784, "step": 629 }, { "epoch": 0.5675675675675675, "grad_norm": 11.716176911854502, "learning_rate": 9.761239426692077e-06, "loss": 2.804887056350708, "step": 630 }, { "epoch": 0.5684684684684684, "grad_norm": 11.769213920438558, "learning_rate": 9.759636527645633e-06, "loss": 3.2259230613708496, "step": 631 }, { "epoch": 0.5693693693693693, "grad_norm": 12.718068774896336, "learning_rate": 9.758028398619423e-06, "loss": 2.9922332763671875, "step": 632 }, { "epoch": 0.5702702702702702, "grad_norm": 24.023232094252386, "learning_rate": 9.756415041380493e-06, "loss": 2.8729424476623535, "step": 633 }, { "epoch": 0.5711711711711712, "grad_norm": 12.166461400696965, "learning_rate": 9.754796457701628e-06, "loss": 2.605339527130127, "step": 634 }, { "epoch": 0.5720720720720721, "grad_norm": 10.402685081844133, "learning_rate": 9.753172649361358e-06, "loss": 2.934504270553589, "step": 635 }, { "epoch": 0.572972972972973, "grad_norm": 10.272109745887343, "learning_rate": 9.751543618143958e-06, "loss": 2.570463180541992, "step": 636 }, { "epoch": 0.5738738738738739, "grad_norm": 13.711719588706229, "learning_rate": 9.749909365839436e-06, "loss": 3.169706344604492, "step": 637 }, { "epoch": 0.5747747747747748, "grad_norm": 33.59789891103926, "learning_rate": 9.748269894243541e-06, "loss": 2.3556222915649414, "step": 638 }, { "epoch": 0.5756756756756757, "grad_norm": 7.975103574740825, "learning_rate": 9.746625205157755e-06, "loss": 1.1413840055465698, "step": 639 }, { "epoch": 0.5765765765765766, "grad_norm": 22.735513423510085, "learning_rate": 9.744975300389295e-06, "loss": 2.070692300796509, "step": 640 }, { "epoch": 0.5774774774774775, "grad_norm": 16.36291204510473, "learning_rate": 9.743320181751105e-06, "loss": 3.360299825668335, "step": 641 }, { "epoch": 0.5783783783783784, "grad_norm": 13.992378484017673, "learning_rate": 9.741659851061866e-06, "loss": 2.6247687339782715, "step": 642 }, { "epoch": 0.5792792792792792, "grad_norm": 10.452818390696052, "learning_rate": 9.739994310145977e-06, "loss": 2.7434468269348145, "step": 643 }, { "epoch": 0.5801801801801801, "grad_norm": 14.842864249759991, "learning_rate": 9.73832356083357e-06, "loss": 2.6327593326568604, "step": 644 }, { "epoch": 0.581081081081081, "grad_norm": 11.334382859755081, "learning_rate": 9.736647604960492e-06, "loss": 2.6178431510925293, "step": 645 }, { "epoch": 0.581981981981982, "grad_norm": 10.990080398430045, "learning_rate": 9.734966444368317e-06, "loss": 2.822669744491577, "step": 646 }, { "epoch": 0.5828828828828829, "grad_norm": 11.880254008410231, "learning_rate": 9.733280080904337e-06, "loss": 2.7857983112335205, "step": 647 }, { "epoch": 0.5837837837837838, "grad_norm": 11.920915789197798, "learning_rate": 9.731588516421562e-06, "loss": 3.316427230834961, "step": 648 }, { "epoch": 0.5846846846846847, "grad_norm": 14.414450806441018, "learning_rate": 9.729891752778712e-06, "loss": 3.260469436645508, "step": 649 }, { "epoch": 0.5855855855855856, "grad_norm": 11.80034688226506, "learning_rate": 9.728189791840227e-06, "loss": 2.9232394695281982, "step": 650 }, { "epoch": 0.5864864864864865, "grad_norm": 15.725196900408367, "learning_rate": 9.726482635476252e-06, "loss": 2.745842456817627, "step": 651 }, { "epoch": 0.5873873873873874, "grad_norm": 8.0300708831056, "learning_rate": 9.724770285562642e-06, "loss": 2.5746424198150635, "step": 652 }, { "epoch": 0.5882882882882883, "grad_norm": 17.331180975895887, "learning_rate": 9.723052743980963e-06, "loss": 2.8071985244750977, "step": 653 }, { "epoch": 0.5891891891891892, "grad_norm": 13.071760646959074, "learning_rate": 9.72133001261848e-06, "loss": 2.8666656017303467, "step": 654 }, { "epoch": 0.5900900900900901, "grad_norm": 8.582467250156132, "learning_rate": 9.719602093368165e-06, "loss": 2.601950168609619, "step": 655 }, { "epoch": 0.590990990990991, "grad_norm": 9.076812716735017, "learning_rate": 9.717868988128688e-06, "loss": 3.026968479156494, "step": 656 }, { "epoch": 0.5918918918918918, "grad_norm": 10.619332033928004, "learning_rate": 9.716130698804418e-06, "loss": 1.9140194654464722, "step": 657 }, { "epoch": 0.5927927927927928, "grad_norm": 28.80427013899261, "learning_rate": 9.714387227305422e-06, "loss": 2.499068021774292, "step": 658 }, { "epoch": 0.5936936936936937, "grad_norm": 9.552137001045008, "learning_rate": 9.712638575547458e-06, "loss": 3.0285205841064453, "step": 659 }, { "epoch": 0.5945945945945946, "grad_norm": 44.90766293089647, "learning_rate": 9.710884745451979e-06, "loss": 3.3245625495910645, "step": 660 }, { "epoch": 0.5954954954954955, "grad_norm": 9.12125793416734, "learning_rate": 9.709125738946126e-06, "loss": 2.6970860958099365, "step": 661 }, { "epoch": 0.5963963963963964, "grad_norm": 19.76964852034373, "learning_rate": 9.707361557962728e-06, "loss": 2.7420012950897217, "step": 662 }, { "epoch": 0.5972972972972973, "grad_norm": 28.95380432258598, "learning_rate": 9.705592204440306e-06, "loss": 3.1635239124298096, "step": 663 }, { "epoch": 0.5981981981981982, "grad_norm": 12.148321113338309, "learning_rate": 9.703817680323055e-06, "loss": 2.6524462699890137, "step": 664 }, { "epoch": 0.5990990990990991, "grad_norm": 22.42931971897512, "learning_rate": 9.702037987560859e-06, "loss": 1.996198058128357, "step": 665 }, { "epoch": 0.6, "grad_norm": 9.760729582324164, "learning_rate": 9.700253128109275e-06, "loss": 2.959993839263916, "step": 666 }, { "epoch": 0.6009009009009009, "grad_norm": 11.039657436693238, "learning_rate": 9.698463103929542e-06, "loss": 2.4604763984680176, "step": 667 }, { "epoch": 0.6018018018018018, "grad_norm": 12.591538232890576, "learning_rate": 9.696667916988576e-06, "loss": 2.6671557426452637, "step": 668 }, { "epoch": 0.6027027027027027, "grad_norm": 13.018778973089516, "learning_rate": 9.694867569258957e-06, "loss": 2.5060935020446777, "step": 669 }, { "epoch": 0.6036036036036037, "grad_norm": 24.812787978360838, "learning_rate": 9.693062062718947e-06, "loss": 2.6539478302001953, "step": 670 }, { "epoch": 0.6045045045045045, "grad_norm": 9.28865529697274, "learning_rate": 9.691251399352468e-06, "loss": 3.0585227012634277, "step": 671 }, { "epoch": 0.6054054054054054, "grad_norm": 12.489042706136434, "learning_rate": 9.689435581149114e-06, "loss": 2.2748842239379883, "step": 672 }, { "epoch": 0.6063063063063063, "grad_norm": 12.788764883935377, "learning_rate": 9.687614610104137e-06, "loss": 2.742896795272827, "step": 673 }, { "epoch": 0.6072072072072072, "grad_norm": 10.400479488548925, "learning_rate": 9.68578848821846e-06, "loss": 2.913947582244873, "step": 674 }, { "epoch": 0.6081081081081081, "grad_norm": 10.356969659317713, "learning_rate": 9.683957217498657e-06, "loss": 3.195772647857666, "step": 675 }, { "epoch": 0.609009009009009, "grad_norm": 11.177217351847228, "learning_rate": 9.682120799956961e-06, "loss": 2.561089515686035, "step": 676 }, { "epoch": 0.6099099099099099, "grad_norm": 11.776725641662114, "learning_rate": 9.68027923761127e-06, "loss": 2.9913177490234375, "step": 677 }, { "epoch": 0.6108108108108108, "grad_norm": 20.340971853249492, "learning_rate": 9.678432532485122e-06, "loss": 2.9433279037475586, "step": 678 }, { "epoch": 0.6117117117117117, "grad_norm": 13.062691511533304, "learning_rate": 9.676580686607714e-06, "loss": 2.7461581230163574, "step": 679 }, { "epoch": 0.6126126126126126, "grad_norm": 10.550831289023261, "learning_rate": 9.67472370201389e-06, "loss": 2.2804391384124756, "step": 680 }, { "epoch": 0.6135135135135135, "grad_norm": 14.04652689177321, "learning_rate": 9.672861580744142e-06, "loss": 2.3337719440460205, "step": 681 }, { "epoch": 0.6144144144144145, "grad_norm": 11.726406448121304, "learning_rate": 9.6709943248446e-06, "loss": 3.0193614959716797, "step": 682 }, { "epoch": 0.6153153153153154, "grad_norm": 7.698304365102145, "learning_rate": 9.669121936367043e-06, "loss": 2.7147281169891357, "step": 683 }, { "epoch": 0.6162162162162163, "grad_norm": 10.794221131633092, "learning_rate": 9.667244417368888e-06, "loss": 2.859605073928833, "step": 684 }, { "epoch": 0.6171171171171171, "grad_norm": 10.03370727731534, "learning_rate": 9.665361769913187e-06, "loss": 2.639362096786499, "step": 685 }, { "epoch": 0.618018018018018, "grad_norm": 15.312131543900419, "learning_rate": 9.663473996068631e-06, "loss": 2.8004603385925293, "step": 686 }, { "epoch": 0.6189189189189189, "grad_norm": 12.006983237628706, "learning_rate": 9.661581097909542e-06, "loss": 2.2756056785583496, "step": 687 }, { "epoch": 0.6198198198198198, "grad_norm": 7.178982852760417, "learning_rate": 9.659683077515871e-06, "loss": 2.5561704635620117, "step": 688 }, { "epoch": 0.6207207207207207, "grad_norm": 17.558612337023416, "learning_rate": 9.6577799369732e-06, "loss": 2.2039928436279297, "step": 689 }, { "epoch": 0.6216216216216216, "grad_norm": 12.664437522074346, "learning_rate": 9.655871678372735e-06, "loss": 2.7276079654693604, "step": 690 }, { "epoch": 0.6225225225225225, "grad_norm": 9.598029782608371, "learning_rate": 9.65395830381131e-06, "loss": 3.5972723960876465, "step": 691 }, { "epoch": 0.6234234234234234, "grad_norm": 10.084799153198551, "learning_rate": 9.652039815391376e-06, "loss": 2.7443714141845703, "step": 692 }, { "epoch": 0.6243243243243243, "grad_norm": 11.19197717861924, "learning_rate": 9.650116215221006e-06, "loss": 3.679194688796997, "step": 693 }, { "epoch": 0.6252252252252253, "grad_norm": 13.429386304946776, "learning_rate": 9.648187505413887e-06, "loss": 2.4218192100524902, "step": 694 }, { "epoch": 0.6261261261261262, "grad_norm": 9.445783315531548, "learning_rate": 9.646253688089321e-06, "loss": 2.255427122116089, "step": 695 }, { "epoch": 0.6270270270270271, "grad_norm": 13.604349677903247, "learning_rate": 9.644314765372227e-06, "loss": 2.967381000518799, "step": 696 }, { "epoch": 0.627927927927928, "grad_norm": 13.267927862733156, "learning_rate": 9.64237073939313e-06, "loss": 2.0716745853424072, "step": 697 }, { "epoch": 0.6288288288288288, "grad_norm": 20.78254766537362, "learning_rate": 9.64042161228816e-06, "loss": 2.4729197025299072, "step": 698 }, { "epoch": 0.6297297297297297, "grad_norm": 10.735150542974305, "learning_rate": 9.638467386199057e-06, "loss": 2.4997925758361816, "step": 699 }, { "epoch": 0.6306306306306306, "grad_norm": 9.654671058509456, "learning_rate": 9.636508063273161e-06, "loss": 2.097930908203125, "step": 700 }, { "epoch": 0.6315315315315315, "grad_norm": 8.12843638998296, "learning_rate": 9.634543645663417e-06, "loss": 3.2768378257751465, "step": 701 }, { "epoch": 0.6324324324324324, "grad_norm": 9.035173235571136, "learning_rate": 9.63257413552836e-06, "loss": 2.5695743560791016, "step": 702 }, { "epoch": 0.6333333333333333, "grad_norm": 10.465715380961658, "learning_rate": 9.63059953503213e-06, "loss": 2.83211088180542, "step": 703 }, { "epoch": 0.6342342342342342, "grad_norm": 8.531194596728515, "learning_rate": 9.628619846344453e-06, "loss": 3.39132022857666, "step": 704 }, { "epoch": 0.6351351351351351, "grad_norm": 10.723427942794773, "learning_rate": 9.626635071640648e-06, "loss": 2.4200494289398193, "step": 705 }, { "epoch": 0.6360360360360361, "grad_norm": 9.667355089191147, "learning_rate": 9.624645213101627e-06, "loss": 2.2413747310638428, "step": 706 }, { "epoch": 0.636936936936937, "grad_norm": 7.524620829749232, "learning_rate": 9.62265027291388e-06, "loss": 2.371335029602051, "step": 707 }, { "epoch": 0.6378378378378379, "grad_norm": 10.477549393486395, "learning_rate": 9.62065025326949e-06, "loss": 3.217474937438965, "step": 708 }, { "epoch": 0.6387387387387388, "grad_norm": 11.236334668871198, "learning_rate": 9.618645156366113e-06, "loss": 2.601595640182495, "step": 709 }, { "epoch": 0.6396396396396397, "grad_norm": 12.085562514268764, "learning_rate": 9.616634984406992e-06, "loss": 3.323373794555664, "step": 710 }, { "epoch": 0.6405405405405405, "grad_norm": 7.864682656572068, "learning_rate": 9.61461973960094e-06, "loss": 2.608567476272583, "step": 711 }, { "epoch": 0.6414414414414414, "grad_norm": 11.02629949211748, "learning_rate": 9.612599424162344e-06, "loss": 3.1566262245178223, "step": 712 }, { "epoch": 0.6423423423423423, "grad_norm": 7.927060068649259, "learning_rate": 9.61057404031117e-06, "loss": 2.9061496257781982, "step": 713 }, { "epoch": 0.6432432432432432, "grad_norm": 16.4660133303867, "learning_rate": 9.608543590272947e-06, "loss": 2.9211766719818115, "step": 714 }, { "epoch": 0.6441441441441441, "grad_norm": 10.311485765880613, "learning_rate": 9.606508076278772e-06, "loss": 2.5923843383789062, "step": 715 }, { "epoch": 0.645045045045045, "grad_norm": 11.027862149795704, "learning_rate": 9.604467500565305e-06, "loss": 2.2774062156677246, "step": 716 }, { "epoch": 0.6459459459459459, "grad_norm": 10.9047541264129, "learning_rate": 9.602421865374774e-06, "loss": 2.7341184616088867, "step": 717 }, { "epoch": 0.6468468468468469, "grad_norm": 26.06312075565098, "learning_rate": 9.600371172954957e-06, "loss": 2.2477073669433594, "step": 718 }, { "epoch": 0.6477477477477478, "grad_norm": 9.903233360954676, "learning_rate": 9.598315425559199e-06, "loss": 2.342525005340576, "step": 719 }, { "epoch": 0.6486486486486487, "grad_norm": 18.65596381212552, "learning_rate": 9.596254625446391e-06, "loss": 2.6345980167388916, "step": 720 }, { "epoch": 0.6495495495495496, "grad_norm": 20.153467171902868, "learning_rate": 9.594188774880981e-06, "loss": 2.5606279373168945, "step": 721 }, { "epoch": 0.6504504504504505, "grad_norm": 6.441711489089664, "learning_rate": 9.592117876132965e-06, "loss": 2.866764783859253, "step": 722 }, { "epoch": 0.6513513513513514, "grad_norm": 9.81198891053071, "learning_rate": 9.590041931477887e-06, "loss": 3.1564278602600098, "step": 723 }, { "epoch": 0.6522522522522523, "grad_norm": 17.684208371078892, "learning_rate": 9.587960943196834e-06, "loss": 2.7963242530822754, "step": 724 }, { "epoch": 0.6531531531531531, "grad_norm": 15.342811240268174, "learning_rate": 9.585874913576435e-06, "loss": 3.0183424949645996, "step": 725 }, { "epoch": 0.654054054054054, "grad_norm": 14.623353023945262, "learning_rate": 9.583783844908861e-06, "loss": 3.3839101791381836, "step": 726 }, { "epoch": 0.6549549549549549, "grad_norm": 8.398692776443163, "learning_rate": 9.581687739491816e-06, "loss": 2.6206607818603516, "step": 727 }, { "epoch": 0.6558558558558558, "grad_norm": 12.37961380763781, "learning_rate": 9.579586599628542e-06, "loss": 2.7061638832092285, "step": 728 }, { "epoch": 0.6567567567567567, "grad_norm": 10.678438728195196, "learning_rate": 9.577480427627814e-06, "loss": 2.508704900741577, "step": 729 }, { "epoch": 0.6576576576576577, "grad_norm": 25.932935898496996, "learning_rate": 9.57536922580393e-06, "loss": 1.9583587646484375, "step": 730 }, { "epoch": 0.6585585585585586, "grad_norm": 27.66965432336236, "learning_rate": 9.573252996476722e-06, "loss": 3.129885196685791, "step": 731 }, { "epoch": 0.6594594594594595, "grad_norm": 9.735259990519928, "learning_rate": 9.571131741971543e-06, "loss": 2.7763378620147705, "step": 732 }, { "epoch": 0.6603603603603604, "grad_norm": 8.032203064987039, "learning_rate": 9.569005464619267e-06, "loss": 2.2813687324523926, "step": 733 }, { "epoch": 0.6612612612612613, "grad_norm": 8.041170678642661, "learning_rate": 9.566874166756288e-06, "loss": 2.840353012084961, "step": 734 }, { "epoch": 0.6621621621621622, "grad_norm": 28.158206672083676, "learning_rate": 9.564737850724518e-06, "loss": 2.8474009037017822, "step": 735 }, { "epoch": 0.6630630630630631, "grad_norm": 5.815000573446517, "learning_rate": 9.562596518871382e-06, "loss": 2.8207173347473145, "step": 736 }, { "epoch": 0.663963963963964, "grad_norm": 34.14413849056543, "learning_rate": 9.560450173549816e-06, "loss": 3.46616792678833, "step": 737 }, { "epoch": 0.6648648648648648, "grad_norm": 9.09455757919389, "learning_rate": 9.558298817118263e-06, "loss": 3.2472267150878906, "step": 738 }, { "epoch": 0.6657657657657657, "grad_norm": 11.745734417740652, "learning_rate": 9.55614245194068e-06, "loss": 2.990772247314453, "step": 739 }, { "epoch": 0.6666666666666666, "grad_norm": 10.126106707154532, "learning_rate": 9.553981080386517e-06, "loss": 2.621983766555786, "step": 740 }, { "epoch": 0.6675675675675675, "grad_norm": 10.013984136877077, "learning_rate": 9.551814704830734e-06, "loss": 2.8704495429992676, "step": 741 }, { "epoch": 0.6684684684684684, "grad_norm": 17.553893999524107, "learning_rate": 9.549643327653784e-06, "loss": 2.6300485134124756, "step": 742 }, { "epoch": 0.6693693693693694, "grad_norm": 8.905039694665708, "learning_rate": 9.54746695124162e-06, "loss": 2.985562801361084, "step": 743 }, { "epoch": 0.6702702702702703, "grad_norm": 19.481838616003518, "learning_rate": 9.545285577985683e-06, "loss": 3.3478264808654785, "step": 744 }, { "epoch": 0.6711711711711712, "grad_norm": 10.728554308127086, "learning_rate": 9.543099210282911e-06, "loss": 2.5533034801483154, "step": 745 }, { "epoch": 0.6720720720720721, "grad_norm": 6.0652069579138645, "learning_rate": 9.540907850535723e-06, "loss": 2.541428565979004, "step": 746 }, { "epoch": 0.672972972972973, "grad_norm": 17.782409649378508, "learning_rate": 9.53871150115203e-06, "loss": 3.083484172821045, "step": 747 }, { "epoch": 0.6738738738738739, "grad_norm": 28.609892675952906, "learning_rate": 9.536510164545223e-06, "loss": 2.3928310871124268, "step": 748 }, { "epoch": 0.6747747747747748, "grad_norm": 16.4195531284808, "learning_rate": 9.534303843134171e-06, "loss": 2.2515366077423096, "step": 749 }, { "epoch": 0.6756756756756757, "grad_norm": 7.548044911736927, "learning_rate": 9.532092539343221e-06, "loss": 2.632566452026367, "step": 750 }, { "epoch": 0.6765765765765765, "grad_norm": 14.289205489681361, "learning_rate": 9.5298762556022e-06, "loss": 2.8126754760742188, "step": 751 }, { "epoch": 0.6774774774774774, "grad_norm": 16.292231519879333, "learning_rate": 9.5276549943464e-06, "loss": 2.814695358276367, "step": 752 }, { "epoch": 0.6783783783783783, "grad_norm": 25.04265360040195, "learning_rate": 9.525428758016586e-06, "loss": 2.972036123275757, "step": 753 }, { "epoch": 0.6792792792792792, "grad_norm": 23.661749713598404, "learning_rate": 9.523197549058992e-06, "loss": 2.619868755340576, "step": 754 }, { "epoch": 0.6801801801801802, "grad_norm": 17.407043836170345, "learning_rate": 9.520961369925308e-06, "loss": 4.805351257324219, "step": 755 }, { "epoch": 0.6810810810810811, "grad_norm": 10.468744127830053, "learning_rate": 9.518720223072693e-06, "loss": 3.144011974334717, "step": 756 }, { "epoch": 0.681981981981982, "grad_norm": 9.704633564788162, "learning_rate": 9.516474110963762e-06, "loss": 2.9169135093688965, "step": 757 }, { "epoch": 0.6828828828828829, "grad_norm": 7.024733080156754, "learning_rate": 9.514223036066587e-06, "loss": 2.531320571899414, "step": 758 }, { "epoch": 0.6837837837837838, "grad_norm": 9.295575693758463, "learning_rate": 9.511967000854691e-06, "loss": 2.128255844116211, "step": 759 }, { "epoch": 0.6846846846846847, "grad_norm": 11.08585967285018, "learning_rate": 9.50970600780705e-06, "loss": 2.904832363128662, "step": 760 }, { "epoch": 0.6855855855855856, "grad_norm": 8.611078781100211, "learning_rate": 9.507440059408081e-06, "loss": 3.224320888519287, "step": 761 }, { "epoch": 0.6864864864864865, "grad_norm": 19.933373813383568, "learning_rate": 9.50516915814766e-06, "loss": 2.992894172668457, "step": 762 }, { "epoch": 0.6873873873873874, "grad_norm": 17.629980801907795, "learning_rate": 9.502893306521092e-06, "loss": 2.5204880237579346, "step": 763 }, { "epoch": 0.6882882882882883, "grad_norm": 8.426783809290349, "learning_rate": 9.500612507029128e-06, "loss": 2.420421838760376, "step": 764 }, { "epoch": 0.6891891891891891, "grad_norm": 21.42446866639724, "learning_rate": 9.498326762177952e-06, "loss": 2.415316581726074, "step": 765 }, { "epoch": 0.69009009009009, "grad_norm": 11.706220780072188, "learning_rate": 9.496036074479184e-06, "loss": 2.944035053253174, "step": 766 }, { "epoch": 0.690990990990991, "grad_norm": 9.802219173432976, "learning_rate": 9.49374044644988e-06, "loss": 2.377190589904785, "step": 767 }, { "epoch": 0.6918918918918919, "grad_norm": 4.558892645831176, "learning_rate": 9.491439880612513e-06, "loss": 1.1236885786056519, "step": 768 }, { "epoch": 0.6927927927927928, "grad_norm": 7.80491444951165, "learning_rate": 9.489134379494996e-06, "loss": 3.0552773475646973, "step": 769 }, { "epoch": 0.6936936936936937, "grad_norm": 18.374039901625448, "learning_rate": 9.486823945630654e-06, "loss": 3.0332956314086914, "step": 770 }, { "epoch": 0.6945945945945946, "grad_norm": 31.234558943180797, "learning_rate": 9.484508581558236e-06, "loss": 2.4991304874420166, "step": 771 }, { "epoch": 0.6954954954954955, "grad_norm": 13.190032949087422, "learning_rate": 9.48218828982191e-06, "loss": 3.1494534015655518, "step": 772 }, { "epoch": 0.6963963963963964, "grad_norm": 5.571744569472321, "learning_rate": 9.479863072971254e-06, "loss": 2.624263048171997, "step": 773 }, { "epoch": 0.6972972972972973, "grad_norm": 15.149735769109464, "learning_rate": 9.477532933561264e-06, "loss": 2.8947343826293945, "step": 774 }, { "epoch": 0.6981981981981982, "grad_norm": 13.33064496622505, "learning_rate": 9.47519787415234e-06, "loss": 2.5602176189422607, "step": 775 }, { "epoch": 0.6990990990990991, "grad_norm": 15.786645382596577, "learning_rate": 9.47285789731029e-06, "loss": 2.399599552154541, "step": 776 }, { "epoch": 0.7, "grad_norm": 8.671795375113534, "learning_rate": 9.470513005606327e-06, "loss": 3.5607237815856934, "step": 777 }, { "epoch": 0.7009009009009008, "grad_norm": 9.359263464644386, "learning_rate": 9.468163201617063e-06, "loss": 2.7475228309631348, "step": 778 }, { "epoch": 0.7018018018018019, "grad_norm": 10.50780876599671, "learning_rate": 9.465808487924503e-06, "loss": 3.2652931213378906, "step": 779 }, { "epoch": 0.7027027027027027, "grad_norm": 9.381052152519086, "learning_rate": 9.463448867116057e-06, "loss": 2.7883598804473877, "step": 780 }, { "epoch": 0.7036036036036036, "grad_norm": 10.272838500948234, "learning_rate": 9.461084341784519e-06, "loss": 2.684918165206909, "step": 781 }, { "epoch": 0.7045045045045045, "grad_norm": 21.94060348072659, "learning_rate": 9.458714914528076e-06, "loss": 2.7922565937042236, "step": 782 }, { "epoch": 0.7054054054054054, "grad_norm": 16.382327778672522, "learning_rate": 9.4563405879503e-06, "loss": 2.7418885231018066, "step": 783 }, { "epoch": 0.7063063063063063, "grad_norm": 11.490590318903909, "learning_rate": 9.453961364660143e-06, "loss": 2.6633377075195312, "step": 784 }, { "epoch": 0.7072072072072072, "grad_norm": 12.885767656647163, "learning_rate": 9.451577247271945e-06, "loss": 2.512943983078003, "step": 785 }, { "epoch": 0.7081081081081081, "grad_norm": 12.034839864833085, "learning_rate": 9.449188238405417e-06, "loss": 2.8916306495666504, "step": 786 }, { "epoch": 0.709009009009009, "grad_norm": 10.52612701188939, "learning_rate": 9.446794340685653e-06, "loss": 2.993307590484619, "step": 787 }, { "epoch": 0.7099099099099099, "grad_norm": 6.9540578958384165, "learning_rate": 9.444395556743106e-06, "loss": 2.479743242263794, "step": 788 }, { "epoch": 0.7108108108108108, "grad_norm": 10.290522905456008, "learning_rate": 9.441991889213613e-06, "loss": 2.46384596824646, "step": 789 }, { "epoch": 0.7117117117117117, "grad_norm": 6.698040329706862, "learning_rate": 9.439583340738365e-06, "loss": 2.5311758518218994, "step": 790 }, { "epoch": 0.7126126126126127, "grad_norm": 7.906102550506939, "learning_rate": 9.437169913963924e-06, "loss": 2.1800713539123535, "step": 791 }, { "epoch": 0.7135135135135136, "grad_norm": 15.987084764349607, "learning_rate": 9.434751611542208e-06, "loss": 2.688724994659424, "step": 792 }, { "epoch": 0.7144144144144144, "grad_norm": 8.655721817445965, "learning_rate": 9.432328436130493e-06, "loss": 2.7366106510162354, "step": 793 }, { "epoch": 0.7153153153153153, "grad_norm": 7.824353996578687, "learning_rate": 9.429900390391415e-06, "loss": 2.5832180976867676, "step": 794 }, { "epoch": 0.7162162162162162, "grad_norm": 19.995905128135163, "learning_rate": 9.42746747699295e-06, "loss": 2.926548957824707, "step": 795 }, { "epoch": 0.7171171171171171, "grad_norm": 14.474967706949847, "learning_rate": 9.425029698608438e-06, "loss": 2.584516763687134, "step": 796 }, { "epoch": 0.718018018018018, "grad_norm": 9.194292118045457, "learning_rate": 9.42258705791655e-06, "loss": 2.5635998249053955, "step": 797 }, { "epoch": 0.7189189189189189, "grad_norm": 22.858682720554423, "learning_rate": 9.42013955760131e-06, "loss": 2.6104507446289062, "step": 798 }, { "epoch": 0.7198198198198198, "grad_norm": 10.425797078712982, "learning_rate": 9.417687200352077e-06, "loss": 2.803596019744873, "step": 799 }, { "epoch": 0.7207207207207207, "grad_norm": 8.694257724524336, "learning_rate": 9.415229988863548e-06, "loss": 2.6353211402893066, "step": 800 }, { "epoch": 0.7216216216216216, "grad_norm": 9.41087506713879, "learning_rate": 9.412767925835753e-06, "loss": 3.070380210876465, "step": 801 }, { "epoch": 0.7225225225225225, "grad_norm": 12.144455674929114, "learning_rate": 9.410301013974056e-06, "loss": 2.652477741241455, "step": 802 }, { "epoch": 0.7234234234234235, "grad_norm": 26.120427483925482, "learning_rate": 9.40782925598915e-06, "loss": 2.8056678771972656, "step": 803 }, { "epoch": 0.7243243243243244, "grad_norm": 9.277478281120864, "learning_rate": 9.405352654597042e-06, "loss": 3.2002205848693848, "step": 804 }, { "epoch": 0.7252252252252253, "grad_norm": 20.25434885403409, "learning_rate": 9.402871212519074e-06, "loss": 2.261554718017578, "step": 805 }, { "epoch": 0.7261261261261261, "grad_norm": 20.2554986876722, "learning_rate": 9.400384932481902e-06, "loss": 2.0507774353027344, "step": 806 }, { "epoch": 0.727027027027027, "grad_norm": 16.07366356988039, "learning_rate": 9.397893817217497e-06, "loss": 2.8768179416656494, "step": 807 }, { "epoch": 0.7279279279279279, "grad_norm": 48.89208536633896, "learning_rate": 9.395397869463145e-06, "loss": 2.4264047145843506, "step": 808 }, { "epoch": 0.7288288288288288, "grad_norm": 17.83944897537209, "learning_rate": 9.392897091961442e-06, "loss": 3.395022392272949, "step": 809 }, { "epoch": 0.7297297297297297, "grad_norm": 15.946872351454127, "learning_rate": 9.390391487460286e-06, "loss": 3.188384532928467, "step": 810 }, { "epoch": 0.7306306306306306, "grad_norm": 8.178557885088305, "learning_rate": 9.387881058712888e-06, "loss": 3.1175644397735596, "step": 811 }, { "epoch": 0.7315315315315315, "grad_norm": 16.724591664532905, "learning_rate": 9.385365808477755e-06, "loss": 2.8816864490509033, "step": 812 }, { "epoch": 0.7324324324324324, "grad_norm": 14.780641087159573, "learning_rate": 9.382845739518688e-06, "loss": 2.7165169715881348, "step": 813 }, { "epoch": 0.7333333333333333, "grad_norm": 20.646142007780085, "learning_rate": 9.380320854604792e-06, "loss": 2.3554203510284424, "step": 814 }, { "epoch": 0.7342342342342343, "grad_norm": 18.385887494793863, "learning_rate": 9.377791156510456e-06, "loss": 3.113914966583252, "step": 815 }, { "epoch": 0.7351351351351352, "grad_norm": 28.416780525776396, "learning_rate": 9.37525664801536e-06, "loss": 2.461071491241455, "step": 816 }, { "epoch": 0.7360360360360361, "grad_norm": 8.120471090329623, "learning_rate": 9.372717331904472e-06, "loss": 2.44002103805542, "step": 817 }, { "epoch": 0.736936936936937, "grad_norm": 9.178800276935084, "learning_rate": 9.370173210968041e-06, "loss": 2.982046127319336, "step": 818 }, { "epoch": 0.7378378378378379, "grad_norm": 9.291849128138498, "learning_rate": 9.367624288001596e-06, "loss": 2.580765962600708, "step": 819 }, { "epoch": 0.7387387387387387, "grad_norm": 14.215598673679969, "learning_rate": 9.365070565805941e-06, "loss": 2.9165632724761963, "step": 820 }, { "epoch": 0.7396396396396396, "grad_norm": 20.65767647017504, "learning_rate": 9.362512047187159e-06, "loss": 2.551032304763794, "step": 821 }, { "epoch": 0.7405405405405405, "grad_norm": 9.285553299169587, "learning_rate": 9.359948734956591e-06, "loss": 2.451357841491699, "step": 822 }, { "epoch": 0.7414414414414414, "grad_norm": 10.736946004417806, "learning_rate": 9.357380631930863e-06, "loss": 2.8412632942199707, "step": 823 }, { "epoch": 0.7423423423423423, "grad_norm": 13.588522754081062, "learning_rate": 9.35480774093185e-06, "loss": 2.455108165740967, "step": 824 }, { "epoch": 0.7432432432432432, "grad_norm": 8.066711806396757, "learning_rate": 9.352230064786696e-06, "loss": 2.8943047523498535, "step": 825 }, { "epoch": 0.7441441441441441, "grad_norm": 15.193609099494148, "learning_rate": 9.349647606327798e-06, "loss": 2.916165351867676, "step": 826 }, { "epoch": 0.7450450450450451, "grad_norm": 14.04662684891242, "learning_rate": 9.347060368392816e-06, "loss": 2.5018086433410645, "step": 827 }, { "epoch": 0.745945945945946, "grad_norm": 7.39177413129495, "learning_rate": 9.344468353824653e-06, "loss": 2.697756052017212, "step": 828 }, { "epoch": 0.7468468468468469, "grad_norm": 17.813385065952076, "learning_rate": 9.341871565471464e-06, "loss": 2.811149835586548, "step": 829 }, { "epoch": 0.7477477477477478, "grad_norm": 15.113644784678222, "learning_rate": 9.33927000618665e-06, "loss": 2.9072134494781494, "step": 830 }, { "epoch": 0.7486486486486487, "grad_norm": 10.464234570824074, "learning_rate": 9.336663678828859e-06, "loss": 2.5747392177581787, "step": 831 }, { "epoch": 0.7495495495495496, "grad_norm": 10.25650921301874, "learning_rate": 9.334052586261965e-06, "loss": 2.636551856994629, "step": 832 }, { "epoch": 0.7504504504504504, "grad_norm": 16.38634375299398, "learning_rate": 9.331436731355093e-06, "loss": 2.807694911956787, "step": 833 }, { "epoch": 0.7513513513513513, "grad_norm": 11.603882134317555, "learning_rate": 9.32881611698259e-06, "loss": 2.8333516120910645, "step": 834 }, { "epoch": 0.7522522522522522, "grad_norm": 9.144476288348503, "learning_rate": 9.326190746024041e-06, "loss": 2.7606048583984375, "step": 835 }, { "epoch": 0.7531531531531531, "grad_norm": 8.565934481185979, "learning_rate": 9.323560621364253e-06, "loss": 2.768097400665283, "step": 836 }, { "epoch": 0.754054054054054, "grad_norm": 10.316465266491164, "learning_rate": 9.320925745893257e-06, "loss": 2.4527711868286133, "step": 837 }, { "epoch": 0.7549549549549549, "grad_norm": 9.976588781572577, "learning_rate": 9.318286122506304e-06, "loss": 3.151270627975464, "step": 838 }, { "epoch": 0.7558558558558559, "grad_norm": 15.224079481612058, "learning_rate": 9.315641754103863e-06, "loss": 2.5614888668060303, "step": 839 }, { "epoch": 0.7567567567567568, "grad_norm": 11.589239774949226, "learning_rate": 9.312992643591617e-06, "loss": 2.7102653980255127, "step": 840 }, { "epoch": 0.7576576576576577, "grad_norm": 7.516970212020474, "learning_rate": 9.310338793880458e-06, "loss": 2.6227128505706787, "step": 841 }, { "epoch": 0.7585585585585586, "grad_norm": 8.043716387922379, "learning_rate": 9.30768020788649e-06, "loss": 2.1668949127197266, "step": 842 }, { "epoch": 0.7594594594594595, "grad_norm": 11.933802574887105, "learning_rate": 9.305016888531013e-06, "loss": 2.6135597229003906, "step": 843 }, { "epoch": 0.7603603603603604, "grad_norm": 7.370709474714002, "learning_rate": 9.302348838740539e-06, "loss": 2.7681655883789062, "step": 844 }, { "epoch": 0.7612612612612613, "grad_norm": 16.425951235185856, "learning_rate": 9.29967606144677e-06, "loss": 2.987060546875, "step": 845 }, { "epoch": 0.7621621621621621, "grad_norm": 9.549678592141358, "learning_rate": 9.296998559586603e-06, "loss": 2.931814670562744, "step": 846 }, { "epoch": 0.763063063063063, "grad_norm": 18.44660477260879, "learning_rate": 9.294316336102132e-06, "loss": 2.7123594284057617, "step": 847 }, { "epoch": 0.7639639639639639, "grad_norm": 19.372543945414222, "learning_rate": 9.291629393940631e-06, "loss": 2.546339988708496, "step": 848 }, { "epoch": 0.7648648648648648, "grad_norm": 9.501187725957108, "learning_rate": 9.288937736054568e-06, "loss": 2.3622288703918457, "step": 849 }, { "epoch": 0.7657657657657657, "grad_norm": 19.46303995758465, "learning_rate": 9.286241365401585e-06, "loss": 2.243159294128418, "step": 850 }, { "epoch": 0.7666666666666667, "grad_norm": 8.549177641261387, "learning_rate": 9.283540284944507e-06, "loss": 2.2970545291900635, "step": 851 }, { "epoch": 0.7675675675675676, "grad_norm": 10.303935337104333, "learning_rate": 9.280834497651334e-06, "loss": 2.5987138748168945, "step": 852 }, { "epoch": 0.7684684684684685, "grad_norm": 9.753998036349868, "learning_rate": 9.278124006495234e-06, "loss": 2.220139980316162, "step": 853 }, { "epoch": 0.7693693693693694, "grad_norm": 9.188191929831634, "learning_rate": 9.27540881445455e-06, "loss": 2.614650011062622, "step": 854 }, { "epoch": 0.7702702702702703, "grad_norm": 11.225502603819619, "learning_rate": 9.272688924512783e-06, "loss": 2.9019663333892822, "step": 855 }, { "epoch": 0.7711711711711712, "grad_norm": 14.050951696540693, "learning_rate": 9.269964339658605e-06, "loss": 2.776390790939331, "step": 856 }, { "epoch": 0.7720720720720721, "grad_norm": 34.83073169335098, "learning_rate": 9.267235062885838e-06, "loss": 4.928389072418213, "step": 857 }, { "epoch": 0.772972972972973, "grad_norm": 7.4228290420989085, "learning_rate": 9.264501097193465e-06, "loss": 2.3369925022125244, "step": 858 }, { "epoch": 0.7738738738738739, "grad_norm": 23.330497182576813, "learning_rate": 9.26176244558562e-06, "loss": 2.757167339324951, "step": 859 }, { "epoch": 0.7747747747747747, "grad_norm": 8.526306477591158, "learning_rate": 9.259019111071587e-06, "loss": 3.1688270568847656, "step": 860 }, { "epoch": 0.7756756756756756, "grad_norm": 23.494552058234884, "learning_rate": 9.256271096665792e-06, "loss": 2.630497694015503, "step": 861 }, { "epoch": 0.7765765765765765, "grad_norm": 10.565389505665538, "learning_rate": 9.253518405387808e-06, "loss": 2.744032382965088, "step": 862 }, { "epoch": 0.7774774774774775, "grad_norm": 14.766385774718497, "learning_rate": 9.250761040262344e-06, "loss": 2.5588674545288086, "step": 863 }, { "epoch": 0.7783783783783784, "grad_norm": 9.207452155362795, "learning_rate": 9.247999004319245e-06, "loss": 2.6305861473083496, "step": 864 }, { "epoch": 0.7792792792792793, "grad_norm": 8.75815825583094, "learning_rate": 9.24523230059349e-06, "loss": 3.0089213848114014, "step": 865 }, { "epoch": 0.7801801801801802, "grad_norm": 21.26373014356904, "learning_rate": 9.242460932125185e-06, "loss": 3.4457340240478516, "step": 866 }, { "epoch": 0.7810810810810811, "grad_norm": 14.92483027731681, "learning_rate": 9.239684901959565e-06, "loss": 2.5607290267944336, "step": 867 }, { "epoch": 0.781981981981982, "grad_norm": 6.90303732761023, "learning_rate": 9.236904213146982e-06, "loss": 2.2297580242156982, "step": 868 }, { "epoch": 0.7828828828828829, "grad_norm": 13.062000921870082, "learning_rate": 9.234118868742911e-06, "loss": 3.182875871658325, "step": 869 }, { "epoch": 0.7837837837837838, "grad_norm": 8.967717184430734, "learning_rate": 9.231328871807943e-06, "loss": 2.0097410678863525, "step": 870 }, { "epoch": 0.7846846846846847, "grad_norm": 13.639231514475798, "learning_rate": 9.228534225407781e-06, "loss": 3.289792060852051, "step": 871 }, { "epoch": 0.7855855855855856, "grad_norm": 8.795213936439309, "learning_rate": 9.225734932613233e-06, "loss": 1.7139835357666016, "step": 872 }, { "epoch": 0.7864864864864864, "grad_norm": 29.48172548724543, "learning_rate": 9.222930996500218e-06, "loss": 2.708808183670044, "step": 873 }, { "epoch": 0.7873873873873873, "grad_norm": 12.608981891953999, "learning_rate": 9.220122420149753e-06, "loss": 2.9573237895965576, "step": 874 }, { "epoch": 0.7882882882882883, "grad_norm": 10.05828507973409, "learning_rate": 9.217309206647955e-06, "loss": 2.8175604343414307, "step": 875 }, { "epoch": 0.7891891891891892, "grad_norm": 18.025305074516382, "learning_rate": 9.21449135908604e-06, "loss": 2.6104860305786133, "step": 876 }, { "epoch": 0.7900900900900901, "grad_norm": 10.529882797070544, "learning_rate": 9.21166888056031e-06, "loss": 2.855435609817505, "step": 877 }, { "epoch": 0.790990990990991, "grad_norm": 9.608056253818662, "learning_rate": 9.208841774172159e-06, "loss": 2.761814832687378, "step": 878 }, { "epoch": 0.7918918918918919, "grad_norm": 9.47187691572592, "learning_rate": 9.206010043028066e-06, "loss": 2.776505470275879, "step": 879 }, { "epoch": 0.7927927927927928, "grad_norm": 14.241497175937738, "learning_rate": 9.203173690239591e-06, "loss": 2.668419361114502, "step": 880 }, { "epoch": 0.7936936936936937, "grad_norm": 14.832385901867733, "learning_rate": 9.200332718923374e-06, "loss": 2.8139710426330566, "step": 881 }, { "epoch": 0.7945945945945946, "grad_norm": 23.079974992143335, "learning_rate": 9.197487132201129e-06, "loss": 3.2440996170043945, "step": 882 }, { "epoch": 0.7954954954954955, "grad_norm": 16.23973883783376, "learning_rate": 9.194636933199637e-06, "loss": 2.396348476409912, "step": 883 }, { "epoch": 0.7963963963963964, "grad_norm": 11.451286076420798, "learning_rate": 9.191782125050757e-06, "loss": 3.1295599937438965, "step": 884 }, { "epoch": 0.7972972972972973, "grad_norm": 9.549326162841712, "learning_rate": 9.188922710891401e-06, "loss": 3.130983829498291, "step": 885 }, { "epoch": 0.7981981981981981, "grad_norm": 9.54142098168785, "learning_rate": 9.186058693863554e-06, "loss": 2.2931318283081055, "step": 886 }, { "epoch": 0.7990990990990992, "grad_norm": 37.205104515681505, "learning_rate": 9.18319007711425e-06, "loss": 3.0152671337127686, "step": 887 }, { "epoch": 0.8, "grad_norm": 11.628006866071045, "learning_rate": 9.180316863795578e-06, "loss": 2.771811008453369, "step": 888 }, { "epoch": 0.8009009009009009, "grad_norm": 12.902006492396236, "learning_rate": 9.177439057064684e-06, "loss": 2.614129066467285, "step": 889 }, { "epoch": 0.8018018018018018, "grad_norm": 19.478417774026575, "learning_rate": 9.174556660083751e-06, "loss": 2.6148383617401123, "step": 890 }, { "epoch": 0.8027027027027027, "grad_norm": 12.43984899089034, "learning_rate": 9.17166967602002e-06, "loss": 2.7329466342926025, "step": 891 }, { "epoch": 0.8036036036036036, "grad_norm": 12.761357469110417, "learning_rate": 9.16877810804576e-06, "loss": 2.4950637817382812, "step": 892 }, { "epoch": 0.8045045045045045, "grad_norm": 11.275645132613704, "learning_rate": 9.165881959338279e-06, "loss": 2.7276556491851807, "step": 893 }, { "epoch": 0.8054054054054054, "grad_norm": 11.39602365398208, "learning_rate": 9.162981233079925e-06, "loss": 2.9622392654418945, "step": 894 }, { "epoch": 0.8063063063063063, "grad_norm": 11.83232194085143, "learning_rate": 9.160075932458069e-06, "loss": 2.2887325286865234, "step": 895 }, { "epoch": 0.8072072072072072, "grad_norm": 16.802448060782268, "learning_rate": 9.157166060665113e-06, "loss": 2.6609508991241455, "step": 896 }, { "epoch": 0.8081081081081081, "grad_norm": 10.679228399556099, "learning_rate": 9.154251620898475e-06, "loss": 2.6985628604888916, "step": 897 }, { "epoch": 0.809009009009009, "grad_norm": 7.695041715720657, "learning_rate": 9.151332616360604e-06, "loss": 2.8990566730499268, "step": 898 }, { "epoch": 0.80990990990991, "grad_norm": 9.526552739953779, "learning_rate": 9.148409050258956e-06, "loss": 2.71006441116333, "step": 899 }, { "epoch": 0.8108108108108109, "grad_norm": 12.888284126466246, "learning_rate": 9.145480925805998e-06, "loss": 2.5472047328948975, "step": 900 }, { "epoch": 0.8117117117117117, "grad_norm": 11.468867576029266, "learning_rate": 9.142548246219212e-06, "loss": 2.5543947219848633, "step": 901 }, { "epoch": 0.8126126126126126, "grad_norm": 14.2108048341138, "learning_rate": 9.139611014721082e-06, "loss": 2.2650206089019775, "step": 902 }, { "epoch": 0.8135135135135135, "grad_norm": 10.510519818486246, "learning_rate": 9.136669234539093e-06, "loss": 2.8357656002044678, "step": 903 }, { "epoch": 0.8144144144144144, "grad_norm": 8.430176591688056, "learning_rate": 9.133722908905733e-06, "loss": 2.9496519565582275, "step": 904 }, { "epoch": 0.8153153153153153, "grad_norm": 14.294982303531594, "learning_rate": 9.130772041058478e-06, "loss": 2.9449565410614014, "step": 905 }, { "epoch": 0.8162162162162162, "grad_norm": 13.848470272783437, "learning_rate": 9.127816634239798e-06, "loss": 2.7125496864318848, "step": 906 }, { "epoch": 0.8171171171171171, "grad_norm": 9.712082839910767, "learning_rate": 9.124856691697152e-06, "loss": 2.622316360473633, "step": 907 }, { "epoch": 0.818018018018018, "grad_norm": 8.282921934671824, "learning_rate": 9.121892216682981e-06, "loss": 2.7912187576293945, "step": 908 }, { "epoch": 0.8189189189189189, "grad_norm": 8.898346246381115, "learning_rate": 9.118923212454706e-06, "loss": 3.250605583190918, "step": 909 }, { "epoch": 0.8198198198198198, "grad_norm": 24.150519660685216, "learning_rate": 9.115949682274727e-06, "loss": 2.956425666809082, "step": 910 }, { "epoch": 0.8207207207207208, "grad_norm": 10.87116408089112, "learning_rate": 9.112971629410416e-06, "loss": 3.0385324954986572, "step": 911 }, { "epoch": 0.8216216216216217, "grad_norm": 17.483675464713595, "learning_rate": 9.109989057134113e-06, "loss": 2.362560749053955, "step": 912 }, { "epoch": 0.8225225225225226, "grad_norm": 15.918245368373386, "learning_rate": 9.107001968723127e-06, "loss": 2.489495038986206, "step": 913 }, { "epoch": 0.8234234234234235, "grad_norm": 15.174666429652001, "learning_rate": 9.104010367459728e-06, "loss": 2.519327163696289, "step": 914 }, { "epoch": 0.8243243243243243, "grad_norm": 11.008930408753894, "learning_rate": 9.101014256631144e-06, "loss": 2.9351871013641357, "step": 915 }, { "epoch": 0.8252252252252252, "grad_norm": 5.441422734794709, "learning_rate": 9.098013639529557e-06, "loss": 2.472674608230591, "step": 916 }, { "epoch": 0.8261261261261261, "grad_norm": 15.615971571768284, "learning_rate": 9.095008519452108e-06, "loss": 3.1283674240112305, "step": 917 }, { "epoch": 0.827027027027027, "grad_norm": 9.72553659169402, "learning_rate": 9.091998899700876e-06, "loss": 2.636320114135742, "step": 918 }, { "epoch": 0.8279279279279279, "grad_norm": 22.068351344641943, "learning_rate": 9.08898478358289e-06, "loss": 2.253181219100952, "step": 919 }, { "epoch": 0.8288288288288288, "grad_norm": 11.65588326241802, "learning_rate": 9.085966174410118e-06, "loss": 2.6036620140075684, "step": 920 }, { "epoch": 0.8297297297297297, "grad_norm": 7.70849813769218, "learning_rate": 9.082943075499467e-06, "loss": 2.9057793617248535, "step": 921 }, { "epoch": 0.8306306306306306, "grad_norm": 15.145793198315555, "learning_rate": 9.079915490172775e-06, "loss": 3.194100856781006, "step": 922 }, { "epoch": 0.8315315315315316, "grad_norm": 20.301754986462093, "learning_rate": 9.07688342175681e-06, "loss": 2.951569080352783, "step": 923 }, { "epoch": 0.8324324324324325, "grad_norm": 8.06496478388907, "learning_rate": 9.073846873583268e-06, "loss": 2.6843085289001465, "step": 924 }, { "epoch": 0.8333333333333334, "grad_norm": 34.72774833106996, "learning_rate": 9.070805848988763e-06, "loss": 3.4267823696136475, "step": 925 }, { "epoch": 0.8342342342342343, "grad_norm": 18.05664547289118, "learning_rate": 9.067760351314838e-06, "loss": 2.6139276027679443, "step": 926 }, { "epoch": 0.8351351351351352, "grad_norm": 15.825387603740007, "learning_rate": 9.06471038390794e-06, "loss": 2.70221209526062, "step": 927 }, { "epoch": 0.836036036036036, "grad_norm": 12.785079120226515, "learning_rate": 9.06165595011943e-06, "loss": 2.9434709548950195, "step": 928 }, { "epoch": 0.8369369369369369, "grad_norm": 11.366277573937527, "learning_rate": 9.058597053305581e-06, "loss": 2.491422176361084, "step": 929 }, { "epoch": 0.8378378378378378, "grad_norm": 9.676716238112755, "learning_rate": 9.055533696827567e-06, "loss": 2.3882524967193604, "step": 930 }, { "epoch": 0.8387387387387387, "grad_norm": 22.644503500032666, "learning_rate": 9.05246588405146e-06, "loss": 3.4896740913391113, "step": 931 }, { "epoch": 0.8396396396396396, "grad_norm": 10.849687971098547, "learning_rate": 9.049393618348237e-06, "loss": 2.70531964302063, "step": 932 }, { "epoch": 0.8405405405405405, "grad_norm": 12.569047526859508, "learning_rate": 9.046316903093757e-06, "loss": 2.8259475231170654, "step": 933 }, { "epoch": 0.8414414414414414, "grad_norm": 19.438957664171042, "learning_rate": 9.043235741668775e-06, "loss": 2.8464393615722656, "step": 934 }, { "epoch": 0.8423423423423423, "grad_norm": 19.936953235312018, "learning_rate": 9.040150137458931e-06, "loss": 3.2039737701416016, "step": 935 }, { "epoch": 0.8432432432432433, "grad_norm": 8.613752918501891, "learning_rate": 9.037060093854748e-06, "loss": 2.366600275039673, "step": 936 }, { "epoch": 0.8441441441441442, "grad_norm": 11.072376563504072, "learning_rate": 9.033965614251623e-06, "loss": 2.9386978149414062, "step": 937 }, { "epoch": 0.8450450450450451, "grad_norm": 11.053325721807497, "learning_rate": 9.030866702049828e-06, "loss": 2.8608415126800537, "step": 938 }, { "epoch": 0.845945945945946, "grad_norm": 13.210636717606068, "learning_rate": 9.027763360654509e-06, "loss": 2.7908477783203125, "step": 939 }, { "epoch": 0.8468468468468469, "grad_norm": 12.165099599643389, "learning_rate": 9.024655593475675e-06, "loss": 2.92722487449646, "step": 940 }, { "epoch": 0.8477477477477477, "grad_norm": 20.884372936174117, "learning_rate": 9.021543403928202e-06, "loss": 2.5187618732452393, "step": 941 }, { "epoch": 0.8486486486486486, "grad_norm": 7.200474379907157, "learning_rate": 9.018426795431825e-06, "loss": 1.8800894021987915, "step": 942 }, { "epoch": 0.8495495495495495, "grad_norm": 10.238654769472504, "learning_rate": 9.015305771411128e-06, "loss": 2.726329803466797, "step": 943 }, { "epoch": 0.8504504504504504, "grad_norm": 8.044861147047099, "learning_rate": 9.012180335295558e-06, "loss": 2.646120548248291, "step": 944 }, { "epoch": 0.8513513513513513, "grad_norm": 6.750949554243864, "learning_rate": 9.0090504905194e-06, "loss": 2.664979934692383, "step": 945 }, { "epoch": 0.8522522522522522, "grad_norm": 10.468759833010632, "learning_rate": 9.005916240521788e-06, "loss": 2.7094297409057617, "step": 946 }, { "epoch": 0.8531531531531531, "grad_norm": 10.091214984327587, "learning_rate": 9.002777588746698e-06, "loss": 2.7719886302948, "step": 947 }, { "epoch": 0.8540540540540541, "grad_norm": 10.820966698368546, "learning_rate": 8.999634538642938e-06, "loss": 2.6184051036834717, "step": 948 }, { "epoch": 0.854954954954955, "grad_norm": 14.412149092715156, "learning_rate": 8.996487093664152e-06, "loss": 1.899958848953247, "step": 949 }, { "epoch": 0.8558558558558559, "grad_norm": 8.781129108710493, "learning_rate": 8.993335257268814e-06, "loss": 2.7084133625030518, "step": 950 }, { "epoch": 0.8567567567567568, "grad_norm": 11.727531617720302, "learning_rate": 8.990179032920222e-06, "loss": 2.895404100418091, "step": 951 }, { "epoch": 0.8576576576576577, "grad_norm": 11.556277303840965, "learning_rate": 8.987018424086496e-06, "loss": 2.6565675735473633, "step": 952 }, { "epoch": 0.8585585585585586, "grad_norm": 10.473320014625909, "learning_rate": 8.983853434240573e-06, "loss": 2.8310439586639404, "step": 953 }, { "epoch": 0.8594594594594595, "grad_norm": 13.148854857824386, "learning_rate": 8.980684066860203e-06, "loss": 3.1059255599975586, "step": 954 }, { "epoch": 0.8603603603603603, "grad_norm": 17.525522015414296, "learning_rate": 8.97751032542795e-06, "loss": 4.3065009117126465, "step": 955 }, { "epoch": 0.8612612612612612, "grad_norm": 27.81509585595586, "learning_rate": 8.974332213431182e-06, "loss": 2.922642230987549, "step": 956 }, { "epoch": 0.8621621621621621, "grad_norm": 30.277550053473007, "learning_rate": 8.971149734362067e-06, "loss": 3.379426956176758, "step": 957 }, { "epoch": 0.863063063063063, "grad_norm": 13.70645291398646, "learning_rate": 8.967962891717575e-06, "loss": 2.701871871948242, "step": 958 }, { "epoch": 0.8639639639639639, "grad_norm": 13.008491648469082, "learning_rate": 8.96477168899947e-06, "loss": 3.1602060794830322, "step": 959 }, { "epoch": 0.8648648648648649, "grad_norm": 12.744955511737194, "learning_rate": 8.961576129714307e-06, "loss": 2.9068946838378906, "step": 960 }, { "epoch": 0.8657657657657658, "grad_norm": 8.665898985016119, "learning_rate": 8.958376217373428e-06, "loss": 2.813460350036621, "step": 961 }, { "epoch": 0.8666666666666667, "grad_norm": 16.13879321331645, "learning_rate": 8.955171955492956e-06, "loss": 3.479156494140625, "step": 962 }, { "epoch": 0.8675675675675676, "grad_norm": 14.285750763371981, "learning_rate": 8.951963347593797e-06, "loss": 3.6346793174743652, "step": 963 }, { "epoch": 0.8684684684684685, "grad_norm": 24.895785145623265, "learning_rate": 8.948750397201631e-06, "loss": 3.2489395141601562, "step": 964 }, { "epoch": 0.8693693693693694, "grad_norm": 12.19008178226641, "learning_rate": 8.94553310784691e-06, "loss": 2.697892665863037, "step": 965 }, { "epoch": 0.8702702702702703, "grad_norm": 7.760892193309789, "learning_rate": 8.942311483064849e-06, "loss": 3.010653495788574, "step": 966 }, { "epoch": 0.8711711711711712, "grad_norm": 13.600426235180727, "learning_rate": 8.939085526395435e-06, "loss": 2.576807975769043, "step": 967 }, { "epoch": 0.872072072072072, "grad_norm": 9.293504375965979, "learning_rate": 8.93585524138341e-06, "loss": 1.9543118476867676, "step": 968 }, { "epoch": 0.8729729729729729, "grad_norm": 14.268656864628506, "learning_rate": 8.932620631578273e-06, "loss": 2.6119332313537598, "step": 969 }, { "epoch": 0.8738738738738738, "grad_norm": 6.2148919940604515, "learning_rate": 8.929381700534275e-06, "loss": 2.7489614486694336, "step": 970 }, { "epoch": 0.8747747747747747, "grad_norm": 12.017346068841979, "learning_rate": 8.926138451810415e-06, "loss": 2.812819004058838, "step": 971 }, { "epoch": 0.8756756756756757, "grad_norm": 9.31122675418436, "learning_rate": 8.92289088897044e-06, "loss": 2.209747314453125, "step": 972 }, { "epoch": 0.8765765765765766, "grad_norm": 6.994540232509598, "learning_rate": 8.91963901558283e-06, "loss": 2.4728050231933594, "step": 973 }, { "epoch": 0.8774774774774775, "grad_norm": 8.989192809882589, "learning_rate": 8.916382835220807e-06, "loss": 2.2611050605773926, "step": 974 }, { "epoch": 0.8783783783783784, "grad_norm": 10.16692604773728, "learning_rate": 8.913122351462325e-06, "loss": 3.0471978187561035, "step": 975 }, { "epoch": 0.8792792792792793, "grad_norm": 9.936796580361097, "learning_rate": 8.909857567890066e-06, "loss": 2.8904948234558105, "step": 976 }, { "epoch": 0.8801801801801802, "grad_norm": 11.190809672301613, "learning_rate": 8.906588488091437e-06, "loss": 2.7144124507904053, "step": 977 }, { "epoch": 0.8810810810810811, "grad_norm": 12.623352491069394, "learning_rate": 8.903315115658564e-06, "loss": 2.7676889896392822, "step": 978 }, { "epoch": 0.881981981981982, "grad_norm": 17.349761940508067, "learning_rate": 8.900037454188293e-06, "loss": 2.701625347137451, "step": 979 }, { "epoch": 0.8828828828828829, "grad_norm": 12.414156999805867, "learning_rate": 8.89675550728218e-06, "loss": 2.732914447784424, "step": 980 }, { "epoch": 0.8837837837837837, "grad_norm": 16.099666044563033, "learning_rate": 8.893469278546492e-06, "loss": 2.825678825378418, "step": 981 }, { "epoch": 0.8846846846846846, "grad_norm": 13.838046661247263, "learning_rate": 8.890178771592198e-06, "loss": 2.8472774028778076, "step": 982 }, { "epoch": 0.8855855855855855, "grad_norm": 12.563819633459376, "learning_rate": 8.886883990034973e-06, "loss": 3.426520824432373, "step": 983 }, { "epoch": 0.8864864864864865, "grad_norm": 9.893840320317794, "learning_rate": 8.883584937495185e-06, "loss": 2.547525405883789, "step": 984 }, { "epoch": 0.8873873873873874, "grad_norm": 27.712967334706622, "learning_rate": 8.880281617597895e-06, "loss": 2.8002498149871826, "step": 985 }, { "epoch": 0.8882882882882883, "grad_norm": 8.552225351097242, "learning_rate": 8.876974033972855e-06, "loss": 2.2080230712890625, "step": 986 }, { "epoch": 0.8891891891891892, "grad_norm": 12.181276388042427, "learning_rate": 8.873662190254503e-06, "loss": 3.1568124294281006, "step": 987 }, { "epoch": 0.8900900900900901, "grad_norm": 7.284003438251827, "learning_rate": 8.870346090081954e-06, "loss": 3.1568026542663574, "step": 988 }, { "epoch": 0.890990990990991, "grad_norm": 8.030153735651504, "learning_rate": 8.867025737099003e-06, "loss": 2.9712777137756348, "step": 989 }, { "epoch": 0.8918918918918919, "grad_norm": 13.56543096571292, "learning_rate": 8.863701134954116e-06, "loss": 2.6984872817993164, "step": 990 }, { "epoch": 0.8927927927927928, "grad_norm": 9.699493597923782, "learning_rate": 8.860372287300432e-06, "loss": 2.580648422241211, "step": 991 }, { "epoch": 0.8936936936936937, "grad_norm": 11.566024306750842, "learning_rate": 8.857039197795751e-06, "loss": 3.0338239669799805, "step": 992 }, { "epoch": 0.8945945945945946, "grad_norm": 15.284229479177787, "learning_rate": 8.853701870102536e-06, "loss": 2.9295122623443604, "step": 993 }, { "epoch": 0.8954954954954955, "grad_norm": 22.242804170201218, "learning_rate": 8.850360307887906e-06, "loss": 3.161966323852539, "step": 994 }, { "epoch": 0.8963963963963963, "grad_norm": 18.63466452079311, "learning_rate": 8.847014514823635e-06, "loss": 2.4487550258636475, "step": 995 }, { "epoch": 0.8972972972972973, "grad_norm": 7.759208826565421, "learning_rate": 8.843664494586144e-06, "loss": 2.9561946392059326, "step": 996 }, { "epoch": 0.8981981981981982, "grad_norm": 12.32861462876588, "learning_rate": 8.840310250856498e-06, "loss": 2.5366580486297607, "step": 997 }, { "epoch": 0.8990990990990991, "grad_norm": 12.50596301253408, "learning_rate": 8.836951787320407e-06, "loss": 2.808290481567383, "step": 998 }, { "epoch": 0.9, "grad_norm": 7.0421358171911095, "learning_rate": 8.833589107668212e-06, "loss": 2.9124622344970703, "step": 999 }, { "epoch": 0.9009009009009009, "grad_norm": 18.95726436434203, "learning_rate": 8.83022221559489e-06, "loss": 2.8594493865966797, "step": 1000 }, { "epoch": 0.9018018018018018, "grad_norm": 14.360861662237461, "learning_rate": 8.82685111480005e-06, "loss": 3.033987283706665, "step": 1001 }, { "epoch": 0.9027027027027027, "grad_norm": 12.725727994537058, "learning_rate": 8.823475808987918e-06, "loss": 2.891810894012451, "step": 1002 }, { "epoch": 0.9036036036036036, "grad_norm": 13.572670302653405, "learning_rate": 8.820096301867346e-06, "loss": 2.401615619659424, "step": 1003 }, { "epoch": 0.9045045045045045, "grad_norm": 8.41678694622558, "learning_rate": 8.816712597151805e-06, "loss": 2.6334965229034424, "step": 1004 }, { "epoch": 0.9054054054054054, "grad_norm": 8.854776168215475, "learning_rate": 8.813324698559367e-06, "loss": 2.9537599086761475, "step": 1005 }, { "epoch": 0.9063063063063063, "grad_norm": 11.12285578151776, "learning_rate": 8.809932609812727e-06, "loss": 2.487921714782715, "step": 1006 }, { "epoch": 0.9072072072072072, "grad_norm": 17.024347913585913, "learning_rate": 8.806536334639171e-06, "loss": 3.0856282711029053, "step": 1007 }, { "epoch": 0.9081081081081082, "grad_norm": 10.816465698621194, "learning_rate": 8.803135876770596e-06, "loss": 3.0241355895996094, "step": 1008 }, { "epoch": 0.909009009009009, "grad_norm": 9.68705192478706, "learning_rate": 8.799731239943488e-06, "loss": 2.661813974380493, "step": 1009 }, { "epoch": 0.9099099099099099, "grad_norm": 9.619989580445472, "learning_rate": 8.796322427898928e-06, "loss": 2.7854785919189453, "step": 1010 }, { "epoch": 0.9108108108108108, "grad_norm": 10.915941939454688, "learning_rate": 8.792909444382583e-06, "loss": 2.263568878173828, "step": 1011 }, { "epoch": 0.9117117117117117, "grad_norm": 10.503261281888763, "learning_rate": 8.789492293144706e-06, "loss": 2.716181993484497, "step": 1012 }, { "epoch": 0.9126126126126126, "grad_norm": 9.257983129719602, "learning_rate": 8.786070977940126e-06, "loss": 2.7381417751312256, "step": 1013 }, { "epoch": 0.9135135135135135, "grad_norm": 8.31630031526566, "learning_rate": 8.782645502528252e-06, "loss": 3.162320137023926, "step": 1014 }, { "epoch": 0.9144144144144144, "grad_norm": 16.43338595527805, "learning_rate": 8.77921587067306e-06, "loss": 2.9138591289520264, "step": 1015 }, { "epoch": 0.9153153153153153, "grad_norm": 14.235643594684289, "learning_rate": 8.775782086143099e-06, "loss": 2.0848324298858643, "step": 1016 }, { "epoch": 0.9162162162162162, "grad_norm": 7.891020172544132, "learning_rate": 8.772344152711471e-06, "loss": 2.5524768829345703, "step": 1017 }, { "epoch": 0.9171171171171171, "grad_norm": 16.24503078546184, "learning_rate": 8.768902074155848e-06, "loss": 2.9793877601623535, "step": 1018 }, { "epoch": 0.918018018018018, "grad_norm": 8.785763725472759, "learning_rate": 8.765455854258451e-06, "loss": 2.7830615043640137, "step": 1019 }, { "epoch": 0.918918918918919, "grad_norm": 11.653157311412453, "learning_rate": 8.762005496806049e-06, "loss": 3.0042476654052734, "step": 1020 }, { "epoch": 0.9198198198198199, "grad_norm": 16.60196923530908, "learning_rate": 8.758551005589967e-06, "loss": 2.7907586097717285, "step": 1021 }, { "epoch": 0.9207207207207208, "grad_norm": 12.108493842364739, "learning_rate": 8.75509238440606e-06, "loss": 2.2653555870056152, "step": 1022 }, { "epoch": 0.9216216216216216, "grad_norm": 10.496920522396609, "learning_rate": 8.751629637054732e-06, "loss": 2.5076041221618652, "step": 1023 }, { "epoch": 0.9225225225225225, "grad_norm": 11.443885242245704, "learning_rate": 8.748162767340913e-06, "loss": 2.913900375366211, "step": 1024 }, { "epoch": 0.9234234234234234, "grad_norm": 22.94493743951495, "learning_rate": 8.744691779074067e-06, "loss": 3.189591407775879, "step": 1025 }, { "epoch": 0.9243243243243243, "grad_norm": 15.007894138964957, "learning_rate": 8.741216676068182e-06, "loss": 3.1167805194854736, "step": 1026 }, { "epoch": 0.9252252252252252, "grad_norm": 10.701832402692443, "learning_rate": 8.73773746214177e-06, "loss": 3.0631814002990723, "step": 1027 }, { "epoch": 0.9261261261261261, "grad_norm": 22.839782379389632, "learning_rate": 8.734254141117854e-06, "loss": 2.710697889328003, "step": 1028 }, { "epoch": 0.927027027027027, "grad_norm": 13.701631951913418, "learning_rate": 8.730766716823974e-06, "loss": 3.417142868041992, "step": 1029 }, { "epoch": 0.9279279279279279, "grad_norm": 14.065120927128005, "learning_rate": 8.727275193092182e-06, "loss": 2.857334852218628, "step": 1030 }, { "epoch": 0.9288288288288288, "grad_norm": 10.303680600969198, "learning_rate": 8.723779573759028e-06, "loss": 2.1903722286224365, "step": 1031 }, { "epoch": 0.9297297297297298, "grad_norm": 10.694191816249965, "learning_rate": 8.720279862665568e-06, "loss": 2.833894729614258, "step": 1032 }, { "epoch": 0.9306306306306307, "grad_norm": 10.646006782741036, "learning_rate": 8.71677606365735e-06, "loss": 2.849029541015625, "step": 1033 }, { "epoch": 0.9315315315315316, "grad_norm": 7.103070319164133, "learning_rate": 8.713268180584418e-06, "loss": 2.9669673442840576, "step": 1034 }, { "epoch": 0.9324324324324325, "grad_norm": 61.3378003575111, "learning_rate": 8.709756217301297e-06, "loss": 2.606581687927246, "step": 1035 }, { "epoch": 0.9333333333333333, "grad_norm": 7.857691032867668, "learning_rate": 8.706240177667003e-06, "loss": 2.770477294921875, "step": 1036 }, { "epoch": 0.9342342342342342, "grad_norm": 11.728458658229151, "learning_rate": 8.702720065545024e-06, "loss": 2.768040418624878, "step": 1037 }, { "epoch": 0.9351351351351351, "grad_norm": 22.532803809334503, "learning_rate": 8.69919588480333e-06, "loss": 3.213409423828125, "step": 1038 }, { "epoch": 0.936036036036036, "grad_norm": 13.299290384074183, "learning_rate": 8.695667639314356e-06, "loss": 2.9722280502319336, "step": 1039 }, { "epoch": 0.9369369369369369, "grad_norm": 10.179605937395092, "learning_rate": 8.692135332955008e-06, "loss": 2.879065752029419, "step": 1040 }, { "epoch": 0.9378378378378378, "grad_norm": 10.469521987091323, "learning_rate": 8.68859896960665e-06, "loss": 2.9380648136138916, "step": 1041 }, { "epoch": 0.9387387387387387, "grad_norm": 19.18919294251035, "learning_rate": 8.685058553155108e-06, "loss": 2.389150619506836, "step": 1042 }, { "epoch": 0.9396396396396396, "grad_norm": 9.85030798714381, "learning_rate": 8.681514087490656e-06, "loss": 2.6265854835510254, "step": 1043 }, { "epoch": 0.9405405405405406, "grad_norm": 15.751685941479064, "learning_rate": 8.677965576508023e-06, "loss": 3.252495527267456, "step": 1044 }, { "epoch": 0.9414414414414415, "grad_norm": 17.470155615456466, "learning_rate": 8.67441302410638e-06, "loss": 2.970489978790283, "step": 1045 }, { "epoch": 0.9423423423423424, "grad_norm": 10.515437271818497, "learning_rate": 8.670856434189341e-06, "loss": 2.4407780170440674, "step": 1046 }, { "epoch": 0.9432432432432433, "grad_norm": 14.247174716756376, "learning_rate": 8.667295810664953e-06, "loss": 2.4543590545654297, "step": 1047 }, { "epoch": 0.9441441441441442, "grad_norm": 12.75533365867589, "learning_rate": 8.663731157445701e-06, "loss": 3.617422580718994, "step": 1048 }, { "epoch": 0.945045045045045, "grad_norm": 16.220959393701147, "learning_rate": 8.660162478448488e-06, "loss": 1.659257173538208, "step": 1049 }, { "epoch": 0.9459459459459459, "grad_norm": 13.986671940370561, "learning_rate": 8.656589777594653e-06, "loss": 2.4097299575805664, "step": 1050 }, { "epoch": 0.9468468468468468, "grad_norm": 8.65526211507018, "learning_rate": 8.653013058809945e-06, "loss": 3.0357894897460938, "step": 1051 }, { "epoch": 0.9477477477477477, "grad_norm": 9.449523178663169, "learning_rate": 8.649432326024531e-06, "loss": 2.2048110961914062, "step": 1052 }, { "epoch": 0.9486486486486486, "grad_norm": 18.198976515034804, "learning_rate": 8.64584758317299e-06, "loss": 2.9834446907043457, "step": 1053 }, { "epoch": 0.9495495495495495, "grad_norm": 14.176661243192957, "learning_rate": 8.642258834194307e-06, "loss": 2.716215133666992, "step": 1054 }, { "epoch": 0.9504504504504504, "grad_norm": 16.24849823374468, "learning_rate": 8.638666083031864e-06, "loss": 2.4051785469055176, "step": 1055 }, { "epoch": 0.9513513513513514, "grad_norm": 10.323688529598419, "learning_rate": 8.635069333633449e-06, "loss": 2.5208535194396973, "step": 1056 }, { "epoch": 0.9522522522522523, "grad_norm": 11.942409294214032, "learning_rate": 8.631468589951236e-06, "loss": 2.975149393081665, "step": 1057 }, { "epoch": 0.9531531531531532, "grad_norm": 24.540734605676096, "learning_rate": 8.627863855941794e-06, "loss": 2.916090965270996, "step": 1058 }, { "epoch": 0.9540540540540541, "grad_norm": 8.728220424100492, "learning_rate": 8.624255135566071e-06, "loss": 2.4554600715637207, "step": 1059 }, { "epoch": 0.954954954954955, "grad_norm": 13.06212302080104, "learning_rate": 8.6206424327894e-06, "loss": 2.824169158935547, "step": 1060 }, { "epoch": 0.9558558558558559, "grad_norm": 20.581661868346774, "learning_rate": 8.61702575158149e-06, "loss": 3.1394472122192383, "step": 1061 }, { "epoch": 0.9567567567567568, "grad_norm": 13.824699726387482, "learning_rate": 8.613405095916415e-06, "loss": 2.6784205436706543, "step": 1062 }, { "epoch": 0.9576576576576576, "grad_norm": 7.226778164286236, "learning_rate": 8.609780469772623e-06, "loss": 2.159748077392578, "step": 1063 }, { "epoch": 0.9585585585585585, "grad_norm": 17.74386991419334, "learning_rate": 8.606151877132922e-06, "loss": 2.42501163482666, "step": 1064 }, { "epoch": 0.9594594594594594, "grad_norm": 11.786811385744368, "learning_rate": 8.60251932198448e-06, "loss": 2.690016508102417, "step": 1065 }, { "epoch": 0.9603603603603603, "grad_norm": 12.530584920708565, "learning_rate": 8.598882808318818e-06, "loss": 2.0007379055023193, "step": 1066 }, { "epoch": 0.9612612612612612, "grad_norm": 12.032977450254815, "learning_rate": 8.595242340131806e-06, "loss": 2.8771276473999023, "step": 1067 }, { "epoch": 0.9621621621621622, "grad_norm": 12.990670627995613, "learning_rate": 8.591597921423661e-06, "loss": 2.881491184234619, "step": 1068 }, { "epoch": 0.9630630630630631, "grad_norm": 8.478639408597052, "learning_rate": 8.58794955619894e-06, "loss": 2.4792816638946533, "step": 1069 }, { "epoch": 0.963963963963964, "grad_norm": 10.808242878186473, "learning_rate": 8.584297248466536e-06, "loss": 2.679600715637207, "step": 1070 }, { "epoch": 0.9648648648648649, "grad_norm": 10.666407154007661, "learning_rate": 8.580641002239676e-06, "loss": 2.258665084838867, "step": 1071 }, { "epoch": 0.9657657657657658, "grad_norm": 13.089016586138516, "learning_rate": 8.57698082153591e-06, "loss": 2.685615062713623, "step": 1072 }, { "epoch": 0.9666666666666667, "grad_norm": 11.897872127993535, "learning_rate": 8.573316710377119e-06, "loss": 2.5039196014404297, "step": 1073 }, { "epoch": 0.9675675675675676, "grad_norm": 7.790603844140429, "learning_rate": 8.569648672789496e-06, "loss": 2.5071516036987305, "step": 1074 }, { "epoch": 0.9684684684684685, "grad_norm": 8.517147853409444, "learning_rate": 8.565976712803551e-06, "loss": 2.6650633811950684, "step": 1075 }, { "epoch": 0.9693693693693693, "grad_norm": 14.218988165724692, "learning_rate": 8.562300834454106e-06, "loss": 2.593008279800415, "step": 1076 }, { "epoch": 0.9702702702702702, "grad_norm": 13.720113145665367, "learning_rate": 8.558621041780283e-06, "loss": 2.5849051475524902, "step": 1077 }, { "epoch": 0.9711711711711711, "grad_norm": 34.20313502324281, "learning_rate": 8.554937338825511e-06, "loss": 2.3197808265686035, "step": 1078 }, { "epoch": 0.972072072072072, "grad_norm": 10.755292958039405, "learning_rate": 8.551249729637514e-06, "loss": 2.836639165878296, "step": 1079 }, { "epoch": 0.972972972972973, "grad_norm": 9.542291164874905, "learning_rate": 8.547558218268308e-06, "loss": 2.4726526737213135, "step": 1080 }, { "epoch": 0.9738738738738739, "grad_norm": 9.101503649495474, "learning_rate": 8.543862808774193e-06, "loss": 2.8820559978485107, "step": 1081 }, { "epoch": 0.9747747747747748, "grad_norm": 27.756006165054163, "learning_rate": 8.540163505215758e-06, "loss": 2.6131694316864014, "step": 1082 }, { "epoch": 0.9756756756756757, "grad_norm": 11.900513829471146, "learning_rate": 8.536460311657868e-06, "loss": 3.004784107208252, "step": 1083 }, { "epoch": 0.9765765765765766, "grad_norm": 12.282451059049643, "learning_rate": 8.532753232169663e-06, "loss": 3.2131333351135254, "step": 1084 }, { "epoch": 0.9774774774774775, "grad_norm": 6.207133910941661, "learning_rate": 8.529042270824552e-06, "loss": 2.8020894527435303, "step": 1085 }, { "epoch": 0.9783783783783784, "grad_norm": 8.98261289615078, "learning_rate": 8.525327431700215e-06, "loss": 2.8358824253082275, "step": 1086 }, { "epoch": 0.9792792792792793, "grad_norm": 5.879296428796706, "learning_rate": 8.521608718878582e-06, "loss": 2.6441617012023926, "step": 1087 }, { "epoch": 0.9801801801801802, "grad_norm": 14.704560095815832, "learning_rate": 8.517886136445851e-06, "loss": 3.0834951400756836, "step": 1088 }, { "epoch": 0.981081081081081, "grad_norm": 8.908562992873398, "learning_rate": 8.514159688492464e-06, "loss": 2.323145866394043, "step": 1089 }, { "epoch": 0.9819819819819819, "grad_norm": 9.355692462449275, "learning_rate": 8.510429379113114e-06, "loss": 2.854309558868408, "step": 1090 }, { "epoch": 0.9828828828828828, "grad_norm": 14.242696861776297, "learning_rate": 8.506695212406734e-06, "loss": 2.8959994316101074, "step": 1091 }, { "epoch": 0.9837837837837838, "grad_norm": 9.655187837242087, "learning_rate": 8.502957192476505e-06, "loss": 2.3906760215759277, "step": 1092 }, { "epoch": 0.9846846846846847, "grad_norm": 16.686220459430942, "learning_rate": 8.499215323429828e-06, "loss": 3.729126453399658, "step": 1093 }, { "epoch": 0.9855855855855856, "grad_norm": 10.688427431513956, "learning_rate": 8.495469609378342e-06, "loss": 2.0590858459472656, "step": 1094 }, { "epoch": 0.9864864864864865, "grad_norm": 12.418419957345947, "learning_rate": 8.491720054437911e-06, "loss": 2.6640067100524902, "step": 1095 }, { "epoch": 0.9873873873873874, "grad_norm": 11.15755443159756, "learning_rate": 8.487966662728615e-06, "loss": 2.7714591026306152, "step": 1096 }, { "epoch": 0.9882882882882883, "grad_norm": 8.003587800163373, "learning_rate": 8.484209438374755e-06, "loss": 3.144829511642456, "step": 1097 }, { "epoch": 0.9891891891891892, "grad_norm": 11.562203253058653, "learning_rate": 8.480448385504842e-06, "loss": 2.6076714992523193, "step": 1098 }, { "epoch": 0.9900900900900901, "grad_norm": 12.795449782864218, "learning_rate": 8.476683508251591e-06, "loss": 2.9385275840759277, "step": 1099 }, { "epoch": 0.990990990990991, "grad_norm": 8.34869623402233, "learning_rate": 8.47291481075192e-06, "loss": 2.556445598602295, "step": 1100 }, { "epoch": 0.9918918918918919, "grad_norm": 41.06221827535907, "learning_rate": 8.469142297146949e-06, "loss": 3.650862216949463, "step": 1101 }, { "epoch": 0.9927927927927928, "grad_norm": 8.429716966478185, "learning_rate": 8.465365971581988e-06, "loss": 2.6668899059295654, "step": 1102 }, { "epoch": 0.9936936936936936, "grad_norm": 8.58753720175306, "learning_rate": 8.461585838206531e-06, "loss": 2.888392925262451, "step": 1103 }, { "epoch": 0.9945945945945946, "grad_norm": 10.625126330975723, "learning_rate": 8.457801901174267e-06, "loss": 2.9486002922058105, "step": 1104 }, { "epoch": 0.9954954954954955, "grad_norm": 9.686365195239674, "learning_rate": 8.454014164643056e-06, "loss": 2.6803948879241943, "step": 1105 }, { "epoch": 0.9963963963963964, "grad_norm": 20.539892083201337, "learning_rate": 8.450222632774934e-06, "loss": 2.840132236480713, "step": 1106 }, { "epoch": 0.9972972972972973, "grad_norm": 10.384738292731727, "learning_rate": 8.446427309736111e-06, "loss": 2.4560065269470215, "step": 1107 }, { "epoch": 0.9981981981981982, "grad_norm": 11.742221200846808, "learning_rate": 8.442628199696961e-06, "loss": 2.985318660736084, "step": 1108 }, { "epoch": 0.9990990990990991, "grad_norm": 11.539013810526916, "learning_rate": 8.438825306832016e-06, "loss": 2.8573508262634277, "step": 1109 }, { "epoch": 1.0, "grad_norm": 9.764784001874446, "learning_rate": 8.435018635319971e-06, "loss": 2.8428144454956055, "step": 1110 }, { "epoch": 1.000900900900901, "grad_norm": 11.020427019859978, "learning_rate": 8.43120818934367e-06, "loss": 1.6871743202209473, "step": 1111 }, { "epoch": 1.0018018018018018, "grad_norm": 16.195969495168665, "learning_rate": 8.427393973090099e-06, "loss": 1.8083250522613525, "step": 1112 }, { "epoch": 1.0027027027027027, "grad_norm": 10.095894005649189, "learning_rate": 8.423575990750395e-06, "loss": 1.6103458404541016, "step": 1113 }, { "epoch": 1.0036036036036036, "grad_norm": 21.321239667766413, "learning_rate": 8.41975424651983e-06, "loss": 1.87693452835083, "step": 1114 }, { "epoch": 1.0045045045045045, "grad_norm": 13.705097911530245, "learning_rate": 8.415928744597809e-06, "loss": 2.0128674507141113, "step": 1115 }, { "epoch": 1.0054054054054054, "grad_norm": 11.458298437524217, "learning_rate": 8.412099489187869e-06, "loss": 1.6997014284133911, "step": 1116 }, { "epoch": 1.0063063063063062, "grad_norm": 8.195963817611196, "learning_rate": 8.408266484497664e-06, "loss": 1.5195136070251465, "step": 1117 }, { "epoch": 1.0072072072072071, "grad_norm": 14.923918708563708, "learning_rate": 8.40442973473898e-06, "loss": 1.493704915046692, "step": 1118 }, { "epoch": 1.008108108108108, "grad_norm": 11.105529141527436, "learning_rate": 8.400589244127706e-06, "loss": 1.435099720954895, "step": 1119 }, { "epoch": 1.009009009009009, "grad_norm": 14.94678429310199, "learning_rate": 8.396745016883849e-06, "loss": 1.0782595872879028, "step": 1120 }, { "epoch": 1.0099099099099098, "grad_norm": 18.55316224056508, "learning_rate": 8.39289705723152e-06, "loss": 1.224260687828064, "step": 1121 }, { "epoch": 1.0108108108108107, "grad_norm": 13.383219077438845, "learning_rate": 8.389045369398927e-06, "loss": 2.4057981967926025, "step": 1122 }, { "epoch": 1.0117117117117118, "grad_norm": 13.853889947376945, "learning_rate": 8.385189957618383e-06, "loss": 1.8272724151611328, "step": 1123 }, { "epoch": 1.0126126126126127, "grad_norm": 14.417095255534402, "learning_rate": 8.381330826126284e-06, "loss": 1.3936996459960938, "step": 1124 }, { "epoch": 1.0135135135135136, "grad_norm": 9.362334521585238, "learning_rate": 8.377467979163121e-06, "loss": 1.6082737445831299, "step": 1125 }, { "epoch": 1.0144144144144145, "grad_norm": 19.61330240989115, "learning_rate": 8.373601420973464e-06, "loss": 1.5716181993484497, "step": 1126 }, { "epoch": 1.0153153153153154, "grad_norm": 16.02149253836458, "learning_rate": 8.36973115580596e-06, "loss": 2.0253872871398926, "step": 1127 }, { "epoch": 1.0162162162162163, "grad_norm": 8.868636627253291, "learning_rate": 8.365857187913329e-06, "loss": 1.5251405239105225, "step": 1128 }, { "epoch": 1.0171171171171172, "grad_norm": 9.224258602542248, "learning_rate": 8.361979521552363e-06, "loss": 1.3686174154281616, "step": 1129 }, { "epoch": 1.018018018018018, "grad_norm": 9.932368168789786, "learning_rate": 8.358098160983916e-06, "loss": 1.1146045923233032, "step": 1130 }, { "epoch": 1.018918918918919, "grad_norm": 13.307457528809199, "learning_rate": 8.354213110472903e-06, "loss": 2.0996854305267334, "step": 1131 }, { "epoch": 1.0198198198198198, "grad_norm": 14.850252436116673, "learning_rate": 8.350324374288289e-06, "loss": 2.0421838760375977, "step": 1132 }, { "epoch": 1.0207207207207207, "grad_norm": 10.125209745260454, "learning_rate": 8.34643195670309e-06, "loss": 1.5646802186965942, "step": 1133 }, { "epoch": 1.0216216216216216, "grad_norm": 11.54911553947335, "learning_rate": 8.342535861994374e-06, "loss": 1.8787198066711426, "step": 1134 }, { "epoch": 1.0225225225225225, "grad_norm": 18.523592449785106, "learning_rate": 8.338636094443242e-06, "loss": 1.1811740398406982, "step": 1135 }, { "epoch": 1.0234234234234234, "grad_norm": 15.057537445556772, "learning_rate": 8.334732658334834e-06, "loss": 2.080415725708008, "step": 1136 }, { "epoch": 1.0243243243243243, "grad_norm": 10.837715905790452, "learning_rate": 8.33082555795832e-06, "loss": 0.9543228149414062, "step": 1137 }, { "epoch": 1.0252252252252252, "grad_norm": 9.028361152261766, "learning_rate": 8.326914797606897e-06, "loss": 1.548874855041504, "step": 1138 }, { "epoch": 1.026126126126126, "grad_norm": 11.198019677870143, "learning_rate": 8.323000381577783e-06, "loss": 2.275304079055786, "step": 1139 }, { "epoch": 1.027027027027027, "grad_norm": 10.149551619289737, "learning_rate": 8.319082314172213e-06, "loss": 1.595092535018921, "step": 1140 }, { "epoch": 1.0279279279279279, "grad_norm": 7.274664038664192, "learning_rate": 8.315160599695434e-06, "loss": 1.22855544090271, "step": 1141 }, { "epoch": 1.0288288288288288, "grad_norm": 8.944842389439918, "learning_rate": 8.311235242456703e-06, "loss": 1.3381730318069458, "step": 1142 }, { "epoch": 1.0297297297297296, "grad_norm": 13.677904292942957, "learning_rate": 8.307306246769275e-06, "loss": 1.7892537117004395, "step": 1143 }, { "epoch": 1.0306306306306305, "grad_norm": 16.091411976470066, "learning_rate": 8.303373616950408e-06, "loss": 2.1872668266296387, "step": 1144 }, { "epoch": 1.0315315315315314, "grad_norm": 18.49183870526625, "learning_rate": 8.299437357321349e-06, "loss": 1.2070658206939697, "step": 1145 }, { "epoch": 1.0324324324324325, "grad_norm": 14.845403652544013, "learning_rate": 8.295497472207338e-06, "loss": 2.032404661178589, "step": 1146 }, { "epoch": 1.0333333333333334, "grad_norm": 14.313150837493948, "learning_rate": 8.291553965937596e-06, "loss": 1.3609435558319092, "step": 1147 }, { "epoch": 1.0342342342342343, "grad_norm": 11.387225351747333, "learning_rate": 8.28760684284532e-06, "loss": 1.6761469841003418, "step": 1148 }, { "epoch": 1.0351351351351352, "grad_norm": 13.154008351902426, "learning_rate": 8.283656107267686e-06, "loss": 1.7343151569366455, "step": 1149 }, { "epoch": 1.0360360360360361, "grad_norm": 9.847030852367354, "learning_rate": 8.279701763545838e-06, "loss": 2.2546045780181885, "step": 1150 }, { "epoch": 1.036936936936937, "grad_norm": 8.091196662552067, "learning_rate": 8.275743816024886e-06, "loss": 1.8532781600952148, "step": 1151 }, { "epoch": 1.037837837837838, "grad_norm": 8.382867243646281, "learning_rate": 8.271782269053899e-06, "loss": 1.4469783306121826, "step": 1152 }, { "epoch": 1.0387387387387388, "grad_norm": 11.461998428693212, "learning_rate": 8.267817126985898e-06, "loss": 1.674446940422058, "step": 1153 }, { "epoch": 1.0396396396396397, "grad_norm": 10.267039583398452, "learning_rate": 8.263848394177856e-06, "loss": 1.6920478343963623, "step": 1154 }, { "epoch": 1.0405405405405406, "grad_norm": 16.231667928814414, "learning_rate": 8.259876074990698e-06, "loss": 1.8150936365127563, "step": 1155 }, { "epoch": 1.0414414414414415, "grad_norm": 13.39039416726503, "learning_rate": 8.25590017378928e-06, "loss": 1.7284342050552368, "step": 1156 }, { "epoch": 1.0423423423423424, "grad_norm": 16.86825951513754, "learning_rate": 8.251920694942399e-06, "loss": 1.5762327909469604, "step": 1157 }, { "epoch": 1.0432432432432432, "grad_norm": 8.302781692845457, "learning_rate": 8.247937642822783e-06, "loss": 1.9574871063232422, "step": 1158 }, { "epoch": 1.0441441441441441, "grad_norm": 14.050467961204983, "learning_rate": 8.243951021807085e-06, "loss": 1.2647265195846558, "step": 1159 }, { "epoch": 1.045045045045045, "grad_norm": 17.840809821088865, "learning_rate": 8.239960836275886e-06, "loss": 1.3962913751602173, "step": 1160 }, { "epoch": 1.045945945945946, "grad_norm": 10.101731759748537, "learning_rate": 8.23596709061367e-06, "loss": 1.5938246250152588, "step": 1161 }, { "epoch": 1.0468468468468468, "grad_norm": 10.759880870958964, "learning_rate": 8.231969789208848e-06, "loss": 1.6824748516082764, "step": 1162 }, { "epoch": 1.0477477477477477, "grad_norm": 9.610035985087961, "learning_rate": 8.227968936453725e-06, "loss": 1.4494415521621704, "step": 1163 }, { "epoch": 1.0486486486486486, "grad_norm": 10.436591920309185, "learning_rate": 8.22396453674452e-06, "loss": 1.639620065689087, "step": 1164 }, { "epoch": 1.0495495495495495, "grad_norm": 10.421869292017382, "learning_rate": 8.219956594481342e-06, "loss": 1.3855957984924316, "step": 1165 }, { "epoch": 1.0504504504504504, "grad_norm": 13.09735512969942, "learning_rate": 8.215945114068196e-06, "loss": 1.7121726274490356, "step": 1166 }, { "epoch": 1.0513513513513513, "grad_norm": 10.621685505079233, "learning_rate": 8.21193009991297e-06, "loss": 1.5693837404251099, "step": 1167 }, { "epoch": 1.0522522522522522, "grad_norm": 8.661909260403378, "learning_rate": 8.207911556427442e-06, "loss": 1.9569733142852783, "step": 1168 }, { "epoch": 1.053153153153153, "grad_norm": 10.961758764007032, "learning_rate": 8.203889488027263e-06, "loss": 2.3044838905334473, "step": 1169 }, { "epoch": 1.054054054054054, "grad_norm": 10.705919878277495, "learning_rate": 8.19986389913196e-06, "loss": 1.9391883611679077, "step": 1170 }, { "epoch": 1.054954954954955, "grad_norm": 17.11264626842883, "learning_rate": 8.195834794164925e-06, "loss": 2.016961097717285, "step": 1171 }, { "epoch": 1.055855855855856, "grad_norm": 9.28350029324163, "learning_rate": 8.191802177553419e-06, "loss": 1.9359326362609863, "step": 1172 }, { "epoch": 1.0567567567567568, "grad_norm": 29.005868757379027, "learning_rate": 8.187766053728554e-06, "loss": 2.1905364990234375, "step": 1173 }, { "epoch": 1.0576576576576577, "grad_norm": 17.452604277084465, "learning_rate": 8.183726427125302e-06, "loss": 1.7908490896224976, "step": 1174 }, { "epoch": 1.0585585585585586, "grad_norm": 8.187973593415926, "learning_rate": 8.179683302182486e-06, "loss": 1.3162143230438232, "step": 1175 }, { "epoch": 1.0594594594594595, "grad_norm": 14.952205890351422, "learning_rate": 8.175636683342763e-06, "loss": 1.6705520153045654, "step": 1176 }, { "epoch": 1.0603603603603604, "grad_norm": 12.44885358568132, "learning_rate": 8.17158657505264e-06, "loss": 1.8957538604736328, "step": 1177 }, { "epoch": 1.0612612612612613, "grad_norm": 9.069270431390361, "learning_rate": 8.16753298176245e-06, "loss": 0.9111911654472351, "step": 1178 }, { "epoch": 1.0621621621621622, "grad_norm": 10.191977733913982, "learning_rate": 8.16347590792636e-06, "loss": 1.4045283794403076, "step": 1179 }, { "epoch": 1.063063063063063, "grad_norm": 13.00032871093983, "learning_rate": 8.159415358002361e-06, "loss": 1.5373706817626953, "step": 1180 }, { "epoch": 1.063963963963964, "grad_norm": 14.124441412264169, "learning_rate": 8.155351336452263e-06, "loss": 1.0758987665176392, "step": 1181 }, { "epoch": 1.0648648648648649, "grad_norm": 12.441316798050691, "learning_rate": 8.151283847741691e-06, "loss": 1.4989533424377441, "step": 1182 }, { "epoch": 1.0657657657657658, "grad_norm": 10.51543842939082, "learning_rate": 8.14721289634008e-06, "loss": 1.4637765884399414, "step": 1183 }, { "epoch": 1.0666666666666667, "grad_norm": 13.602448520620147, "learning_rate": 8.143138486720667e-06, "loss": 1.3450312614440918, "step": 1184 }, { "epoch": 1.0675675675675675, "grad_norm": 8.280529775846368, "learning_rate": 8.139060623360494e-06, "loss": 1.489940881729126, "step": 1185 }, { "epoch": 1.0684684684684684, "grad_norm": 10.46426651432547, "learning_rate": 8.134979310740395e-06, "loss": 1.5905389785766602, "step": 1186 }, { "epoch": 1.0693693693693693, "grad_norm": 13.358266883594808, "learning_rate": 8.13089455334499e-06, "loss": 1.2072575092315674, "step": 1187 }, { "epoch": 1.0702702702702702, "grad_norm": 16.330825739721572, "learning_rate": 8.126806355662693e-06, "loss": 1.7562267780303955, "step": 1188 }, { "epoch": 1.071171171171171, "grad_norm": 12.195167613218787, "learning_rate": 8.122714722185696e-06, "loss": 1.0812071561813354, "step": 1189 }, { "epoch": 1.072072072072072, "grad_norm": 15.670715731924602, "learning_rate": 8.118619657409959e-06, "loss": 1.0835292339324951, "step": 1190 }, { "epoch": 1.072972972972973, "grad_norm": 9.573100215659544, "learning_rate": 8.114521165835221e-06, "loss": 1.1943262815475464, "step": 1191 }, { "epoch": 1.0738738738738738, "grad_norm": 7.5920072132170375, "learning_rate": 8.11041925196498e-06, "loss": 1.1824040412902832, "step": 1192 }, { "epoch": 1.0747747747747747, "grad_norm": 14.720889174028226, "learning_rate": 8.106313920306503e-06, "loss": 1.6713793277740479, "step": 1193 }, { "epoch": 1.0756756756756758, "grad_norm": 14.203867713126108, "learning_rate": 8.102205175370801e-06, "loss": 2.0247316360473633, "step": 1194 }, { "epoch": 1.0765765765765765, "grad_norm": 8.406718147766572, "learning_rate": 8.098093021672645e-06, "loss": 1.42876398563385, "step": 1195 }, { "epoch": 1.0774774774774776, "grad_norm": 9.914578355617206, "learning_rate": 8.093977463730546e-06, "loss": 1.242950201034546, "step": 1196 }, { "epoch": 1.0783783783783785, "grad_norm": 12.82859235072137, "learning_rate": 8.089858506066762e-06, "loss": 2.044468402862549, "step": 1197 }, { "epoch": 1.0792792792792794, "grad_norm": 15.21115282821868, "learning_rate": 8.085736153207277e-06, "loss": 1.8516826629638672, "step": 1198 }, { "epoch": 1.0801801801801802, "grad_norm": 10.97731431398425, "learning_rate": 8.081610409681815e-06, "loss": 1.2195425033569336, "step": 1199 }, { "epoch": 1.0810810810810811, "grad_norm": 10.051010569598152, "learning_rate": 8.077481280023822e-06, "loss": 1.627333402633667, "step": 1200 }, { "epoch": 1.081981981981982, "grad_norm": 12.896898338302256, "learning_rate": 8.073348768770463e-06, "loss": 1.999898910522461, "step": 1201 }, { "epoch": 1.082882882882883, "grad_norm": 12.658839336316985, "learning_rate": 8.06921288046262e-06, "loss": 1.5643589496612549, "step": 1202 }, { "epoch": 1.0837837837837838, "grad_norm": 10.956247301531295, "learning_rate": 8.06507361964489e-06, "loss": 1.6658892631530762, "step": 1203 }, { "epoch": 1.0846846846846847, "grad_norm": 12.298519860263355, "learning_rate": 8.060930990865569e-06, "loss": 1.4011785984039307, "step": 1204 }, { "epoch": 1.0855855855855856, "grad_norm": 9.72709077172854, "learning_rate": 8.056784998676656e-06, "loss": 2.0497689247131348, "step": 1205 }, { "epoch": 1.0864864864864865, "grad_norm": 7.466528196092187, "learning_rate": 8.05263564763385e-06, "loss": 2.2291030883789062, "step": 1206 }, { "epoch": 1.0873873873873874, "grad_norm": 12.515884561780549, "learning_rate": 8.048482942296535e-06, "loss": 1.490678071975708, "step": 1207 }, { "epoch": 1.0882882882882883, "grad_norm": 10.468667146864716, "learning_rate": 8.044326887227784e-06, "loss": 1.6769261360168457, "step": 1208 }, { "epoch": 1.0891891891891892, "grad_norm": 14.319507357911029, "learning_rate": 8.040167486994349e-06, "loss": 1.0405046939849854, "step": 1209 }, { "epoch": 1.09009009009009, "grad_norm": 7.171034569727635, "learning_rate": 8.03600474616666e-06, "loss": 1.6920535564422607, "step": 1210 }, { "epoch": 1.090990990990991, "grad_norm": 9.878017463906483, "learning_rate": 8.031838669318815e-06, "loss": 1.6081115007400513, "step": 1211 }, { "epoch": 1.0918918918918918, "grad_norm": 14.076104282243213, "learning_rate": 8.02766926102858e-06, "loss": 1.0227457284927368, "step": 1212 }, { "epoch": 1.0927927927927927, "grad_norm": 9.853681408844322, "learning_rate": 8.023496525877377e-06, "loss": 1.634497880935669, "step": 1213 }, { "epoch": 1.0936936936936936, "grad_norm": 15.624806973123826, "learning_rate": 8.019320468450293e-06, "loss": 2.02766752243042, "step": 1214 }, { "epoch": 1.0945945945945945, "grad_norm": 11.477751200975193, "learning_rate": 8.015141093336059e-06, "loss": 1.8287404775619507, "step": 1215 }, { "epoch": 1.0954954954954954, "grad_norm": 57.919064943020636, "learning_rate": 8.010958405127048e-06, "loss": 2.305783271789551, "step": 1216 }, { "epoch": 1.0963963963963963, "grad_norm": 11.707293625051278, "learning_rate": 8.006772408419281e-06, "loss": 1.1302621364593506, "step": 1217 }, { "epoch": 1.0972972972972972, "grad_norm": 9.560840226906693, "learning_rate": 8.002583107812414e-06, "loss": 1.7443042993545532, "step": 1218 }, { "epoch": 1.0981981981981983, "grad_norm": 8.637141874105088, "learning_rate": 7.998390507909724e-06, "loss": 1.2046616077423096, "step": 1219 }, { "epoch": 1.0990990990990992, "grad_norm": 13.253483776227233, "learning_rate": 7.994194613318126e-06, "loss": 1.3849682807922363, "step": 1220 }, { "epoch": 1.1, "grad_norm": 12.414811146075584, "learning_rate": 7.989995428648148e-06, "loss": 1.4627125263214111, "step": 1221 }, { "epoch": 1.100900900900901, "grad_norm": 11.292520077449975, "learning_rate": 7.985792958513932e-06, "loss": 2.078278064727783, "step": 1222 }, { "epoch": 1.1018018018018019, "grad_norm": 9.253922726492476, "learning_rate": 7.981587207533234e-06, "loss": 1.5167481899261475, "step": 1223 }, { "epoch": 1.1027027027027028, "grad_norm": 21.46976466629096, "learning_rate": 7.977378180327415e-06, "loss": 2.4628076553344727, "step": 1224 }, { "epoch": 1.1036036036036037, "grad_norm": 9.756905499452317, "learning_rate": 7.973165881521435e-06, "loss": 1.5625048875808716, "step": 1225 }, { "epoch": 1.1045045045045045, "grad_norm": 12.77253396194263, "learning_rate": 7.968950315743845e-06, "loss": 1.2804774045944214, "step": 1226 }, { "epoch": 1.1054054054054054, "grad_norm": 13.17580429811146, "learning_rate": 7.964731487626793e-06, "loss": 1.7434399127960205, "step": 1227 }, { "epoch": 1.1063063063063063, "grad_norm": 8.8860333522632, "learning_rate": 7.960509401806007e-06, "loss": 1.3496845960617065, "step": 1228 }, { "epoch": 1.1072072072072072, "grad_norm": 9.689757737587119, "learning_rate": 7.956284062920795e-06, "loss": 1.2241935729980469, "step": 1229 }, { "epoch": 1.1081081081081081, "grad_norm": 9.189552358817172, "learning_rate": 7.952055475614041e-06, "loss": 1.6663897037506104, "step": 1230 }, { "epoch": 1.109009009009009, "grad_norm": 8.79050806300742, "learning_rate": 7.947823644532198e-06, "loss": 1.8955342769622803, "step": 1231 }, { "epoch": 1.10990990990991, "grad_norm": 11.433091564916111, "learning_rate": 7.943588574325283e-06, "loss": 2.5440666675567627, "step": 1232 }, { "epoch": 1.1108108108108108, "grad_norm": 9.45792054858781, "learning_rate": 7.939350269646871e-06, "loss": 1.8544093370437622, "step": 1233 }, { "epoch": 1.1117117117117117, "grad_norm": 15.295838121016162, "learning_rate": 7.935108735154093e-06, "loss": 1.7091851234436035, "step": 1234 }, { "epoch": 1.1126126126126126, "grad_norm": 9.185738743837135, "learning_rate": 7.93086397550763e-06, "loss": 1.4154603481292725, "step": 1235 }, { "epoch": 1.1135135135135135, "grad_norm": 19.610794074775608, "learning_rate": 7.926615995371704e-06, "loss": 0.9731454849243164, "step": 1236 }, { "epoch": 1.1144144144144144, "grad_norm": 35.18486509243029, "learning_rate": 7.922364799414075e-06, "loss": 1.8865573406219482, "step": 1237 }, { "epoch": 1.1153153153153152, "grad_norm": 10.786773415690652, "learning_rate": 7.918110392306042e-06, "loss": 1.138608694076538, "step": 1238 }, { "epoch": 1.1162162162162161, "grad_norm": 8.28684544934627, "learning_rate": 7.913852778722426e-06, "loss": 1.8452454805374146, "step": 1239 }, { "epoch": 1.117117117117117, "grad_norm": 7.506354201537919, "learning_rate": 7.909591963341576e-06, "loss": 1.302584171295166, "step": 1240 }, { "epoch": 1.118018018018018, "grad_norm": 13.755149715721302, "learning_rate": 7.905327950845357e-06, "loss": 1.8079793453216553, "step": 1241 }, { "epoch": 1.118918918918919, "grad_norm": 13.330989665217649, "learning_rate": 7.901060745919148e-06, "loss": 1.2016383409500122, "step": 1242 }, { "epoch": 1.1198198198198197, "grad_norm": 9.829354825068858, "learning_rate": 7.896790353251836e-06, "loss": 1.247478723526001, "step": 1243 }, { "epoch": 1.1207207207207208, "grad_norm": 16.22052628865822, "learning_rate": 7.892516777535808e-06, "loss": 2.252026081085205, "step": 1244 }, { "epoch": 1.1216216216216217, "grad_norm": 10.917816174993641, "learning_rate": 7.888240023466952e-06, "loss": 1.7726813554763794, "step": 1245 }, { "epoch": 1.1225225225225226, "grad_norm": 9.743573351476616, "learning_rate": 7.883960095744649e-06, "loss": 1.7089396715164185, "step": 1246 }, { "epoch": 1.1234234234234235, "grad_norm": 10.385925486216097, "learning_rate": 7.879676999071764e-06, "loss": 1.327872633934021, "step": 1247 }, { "epoch": 1.1243243243243244, "grad_norm": 14.640452121020735, "learning_rate": 7.875390738154645e-06, "loss": 1.3367271423339844, "step": 1248 }, { "epoch": 1.1252252252252253, "grad_norm": 14.336590579499099, "learning_rate": 7.871101317703118e-06, "loss": 1.280564546585083, "step": 1249 }, { "epoch": 1.1261261261261262, "grad_norm": 10.789650982585174, "learning_rate": 7.866808742430481e-06, "loss": 1.9063369035720825, "step": 1250 }, { "epoch": 1.127027027027027, "grad_norm": 15.38404039592624, "learning_rate": 7.8625130170535e-06, "loss": 1.8157051801681519, "step": 1251 }, { "epoch": 1.127927927927928, "grad_norm": 15.262532261695428, "learning_rate": 7.858214146292394e-06, "loss": 1.309761643409729, "step": 1252 }, { "epoch": 1.1288288288288288, "grad_norm": 17.302922232288402, "learning_rate": 7.853912134870851e-06, "loss": 1.5196702480316162, "step": 1253 }, { "epoch": 1.1297297297297297, "grad_norm": 14.852904225740026, "learning_rate": 7.849606987516e-06, "loss": 1.4707289934158325, "step": 1254 }, { "epoch": 1.1306306306306306, "grad_norm": 20.72461967356017, "learning_rate": 7.84529870895842e-06, "loss": 1.5276254415512085, "step": 1255 }, { "epoch": 1.1315315315315315, "grad_norm": 13.611472477748729, "learning_rate": 7.840987303932131e-06, "loss": 2.18794584274292, "step": 1256 }, { "epoch": 1.1324324324324324, "grad_norm": 8.085616722923268, "learning_rate": 7.836672777174585e-06, "loss": 1.352491855621338, "step": 1257 }, { "epoch": 1.1333333333333333, "grad_norm": 16.132324263932222, "learning_rate": 7.83235513342667e-06, "loss": 1.7365800142288208, "step": 1258 }, { "epoch": 1.1342342342342342, "grad_norm": 11.86482454877073, "learning_rate": 7.828034377432694e-06, "loss": 1.753848910331726, "step": 1259 }, { "epoch": 1.135135135135135, "grad_norm": 7.956056959211617, "learning_rate": 7.823710513940385e-06, "loss": 1.3149350881576538, "step": 1260 }, { "epoch": 1.136036036036036, "grad_norm": 25.654039284216676, "learning_rate": 7.819383547700889e-06, "loss": 1.264143705368042, "step": 1261 }, { "epoch": 1.1369369369369369, "grad_norm": 6.991141902419393, "learning_rate": 7.81505348346876e-06, "loss": 1.4038636684417725, "step": 1262 }, { "epoch": 1.1378378378378378, "grad_norm": 8.25379690380056, "learning_rate": 7.810720326001954e-06, "loss": 1.7839691638946533, "step": 1263 }, { "epoch": 1.1387387387387387, "grad_norm": 9.646297674995921, "learning_rate": 7.806384080061827e-06, "loss": 1.5281214714050293, "step": 1264 }, { "epoch": 1.1396396396396395, "grad_norm": 8.434120353270666, "learning_rate": 7.802044750413128e-06, "loss": 1.055334448814392, "step": 1265 }, { "epoch": 1.1405405405405404, "grad_norm": 13.563465933944338, "learning_rate": 7.797702341824e-06, "loss": 1.307537317276001, "step": 1266 }, { "epoch": 1.1414414414414416, "grad_norm": 13.948444094045099, "learning_rate": 7.793356859065962e-06, "loss": 1.3452039957046509, "step": 1267 }, { "epoch": 1.1423423423423422, "grad_norm": 13.969006681460863, "learning_rate": 7.789008306913911e-06, "loss": 1.434963583946228, "step": 1268 }, { "epoch": 1.1432432432432433, "grad_norm": 11.92492065608128, "learning_rate": 7.784656690146125e-06, "loss": 1.6941349506378174, "step": 1269 }, { "epoch": 1.1441441441441442, "grad_norm": 9.786806235171635, "learning_rate": 7.78030201354424e-06, "loss": 1.5654044151306152, "step": 1270 }, { "epoch": 1.1450450450450451, "grad_norm": 16.528398727541727, "learning_rate": 7.775944281893258e-06, "loss": 1.3948932886123657, "step": 1271 }, { "epoch": 1.145945945945946, "grad_norm": 21.470940029536713, "learning_rate": 7.771583499981538e-06, "loss": 2.152101516723633, "step": 1272 }, { "epoch": 1.146846846846847, "grad_norm": 13.235891849465588, "learning_rate": 7.767219672600794e-06, "loss": 1.002134084701538, "step": 1273 }, { "epoch": 1.1477477477477478, "grad_norm": 11.354407284936569, "learning_rate": 7.76285280454608e-06, "loss": 1.4057799577713013, "step": 1274 }, { "epoch": 1.1486486486486487, "grad_norm": 11.048595701802194, "learning_rate": 7.758482900615794e-06, "loss": 1.1977487802505493, "step": 1275 }, { "epoch": 1.1495495495495496, "grad_norm": 12.227212627101522, "learning_rate": 7.75410996561167e-06, "loss": 1.527909517288208, "step": 1276 }, { "epoch": 1.1504504504504505, "grad_norm": 15.173327014778602, "learning_rate": 7.749734004338777e-06, "loss": 1.143022060394287, "step": 1277 }, { "epoch": 1.1513513513513514, "grad_norm": 9.146107789902002, "learning_rate": 7.745355021605499e-06, "loss": 1.2634553909301758, "step": 1278 }, { "epoch": 1.1522522522522523, "grad_norm": 12.249337106978563, "learning_rate": 7.74097302222355e-06, "loss": 2.0868749618530273, "step": 1279 }, { "epoch": 1.1531531531531531, "grad_norm": 10.95442874403436, "learning_rate": 7.736588011007952e-06, "loss": 1.8694771528244019, "step": 1280 }, { "epoch": 1.154054054054054, "grad_norm": 9.634759373981339, "learning_rate": 7.732199992777045e-06, "loss": 1.4333633184432983, "step": 1281 }, { "epoch": 1.154954954954955, "grad_norm": 14.418215297755733, "learning_rate": 7.72780897235246e-06, "loss": 1.240595817565918, "step": 1282 }, { "epoch": 1.1558558558558558, "grad_norm": 17.359398926703346, "learning_rate": 7.72341495455914e-06, "loss": 1.9707403182983398, "step": 1283 }, { "epoch": 1.1567567567567567, "grad_norm": 17.892415832475763, "learning_rate": 7.71901794422531e-06, "loss": 1.1688909530639648, "step": 1284 }, { "epoch": 1.1576576576576576, "grad_norm": 26.730124348758526, "learning_rate": 7.714617946182498e-06, "loss": 1.3510494232177734, "step": 1285 }, { "epoch": 1.1585585585585585, "grad_norm": 15.56918791780566, "learning_rate": 7.710214965265499e-06, "loss": 1.6069289445877075, "step": 1286 }, { "epoch": 1.1594594594594594, "grad_norm": 8.474185793071808, "learning_rate": 7.705809006312394e-06, "loss": 1.445816993713379, "step": 1287 }, { "epoch": 1.1603603603603603, "grad_norm": 16.54501389107988, "learning_rate": 7.701400074164535e-06, "loss": 1.384445309638977, "step": 1288 }, { "epoch": 1.1612612612612612, "grad_norm": 13.98831684862548, "learning_rate": 7.696988173666545e-06, "loss": 1.0784920454025269, "step": 1289 }, { "epoch": 1.1621621621621623, "grad_norm": 12.039360431390216, "learning_rate": 7.692573309666298e-06, "loss": 1.702589988708496, "step": 1290 }, { "epoch": 1.163063063063063, "grad_norm": 13.378061008808606, "learning_rate": 7.688155487014936e-06, "loss": 1.2335466146469116, "step": 1291 }, { "epoch": 1.163963963963964, "grad_norm": 11.01659306250313, "learning_rate": 7.683734710566848e-06, "loss": 1.6434353590011597, "step": 1292 }, { "epoch": 1.164864864864865, "grad_norm": 12.310872183382594, "learning_rate": 7.679310985179664e-06, "loss": 1.2276870012283325, "step": 1293 }, { "epoch": 1.1657657657657658, "grad_norm": 11.84406149723134, "learning_rate": 7.67488431571426e-06, "loss": 1.0436772108078003, "step": 1294 }, { "epoch": 1.1666666666666667, "grad_norm": 11.693643942171297, "learning_rate": 7.670454707034745e-06, "loss": 1.523186206817627, "step": 1295 }, { "epoch": 1.1675675675675676, "grad_norm": 8.07150386900118, "learning_rate": 7.666022164008458e-06, "loss": 1.8224475383758545, "step": 1296 }, { "epoch": 1.1684684684684685, "grad_norm": 11.398184522360536, "learning_rate": 7.661586691505961e-06, "loss": 2.4738755226135254, "step": 1297 }, { "epoch": 1.1693693693693694, "grad_norm": 8.745404391664705, "learning_rate": 7.657148294401037e-06, "loss": 1.1814496517181396, "step": 1298 }, { "epoch": 1.1702702702702703, "grad_norm": 9.302437223698567, "learning_rate": 7.652706977570682e-06, "loss": 1.6256625652313232, "step": 1299 }, { "epoch": 1.1711711711711712, "grad_norm": 10.333906575252291, "learning_rate": 7.648262745895103e-06, "loss": 2.1055374145507812, "step": 1300 }, { "epoch": 1.172072072072072, "grad_norm": 11.315946775683122, "learning_rate": 7.643815604257703e-06, "loss": 1.6647669076919556, "step": 1301 }, { "epoch": 1.172972972972973, "grad_norm": 19.541790861158102, "learning_rate": 7.639365557545085e-06, "loss": 2.452302932739258, "step": 1302 }, { "epoch": 1.1738738738738739, "grad_norm": 9.99620076555469, "learning_rate": 7.63491261064705e-06, "loss": 1.2176405191421509, "step": 1303 }, { "epoch": 1.1747747747747748, "grad_norm": 20.37284495024169, "learning_rate": 7.630456768456578e-06, "loss": 1.5443081855773926, "step": 1304 }, { "epoch": 1.1756756756756757, "grad_norm": 9.514029041847198, "learning_rate": 7.625998035869833e-06, "loss": 1.2930316925048828, "step": 1305 }, { "epoch": 1.1765765765765765, "grad_norm": 12.776789687380294, "learning_rate": 7.621536417786159e-06, "loss": 1.2133829593658447, "step": 1306 }, { "epoch": 1.1774774774774774, "grad_norm": 14.137936897976031, "learning_rate": 7.617071919108066e-06, "loss": 1.6285301446914673, "step": 1307 }, { "epoch": 1.1783783783783783, "grad_norm": 9.750814868572935, "learning_rate": 7.612604544741231e-06, "loss": 1.6151678562164307, "step": 1308 }, { "epoch": 1.1792792792792792, "grad_norm": 11.124079331189733, "learning_rate": 7.608134299594489e-06, "loss": 0.9893984198570251, "step": 1309 }, { "epoch": 1.1801801801801801, "grad_norm": 14.341680894143462, "learning_rate": 7.603661188579834e-06, "loss": 1.4867775440216064, "step": 1310 }, { "epoch": 1.181081081081081, "grad_norm": 10.389613013976772, "learning_rate": 7.599185216612404e-06, "loss": 2.4986953735351562, "step": 1311 }, { "epoch": 1.181981981981982, "grad_norm": 15.031980879964772, "learning_rate": 7.59470638861048e-06, "loss": 1.6477925777435303, "step": 1312 }, { "epoch": 1.1828828828828828, "grad_norm": 12.672810633719594, "learning_rate": 7.590224709495488e-06, "loss": 1.228175401687622, "step": 1313 }, { "epoch": 1.1837837837837837, "grad_norm": 8.678480242174444, "learning_rate": 7.585740184191983e-06, "loss": 1.1139668226242065, "step": 1314 }, { "epoch": 1.1846846846846848, "grad_norm": 11.693803772702594, "learning_rate": 7.581252817627645e-06, "loss": 1.8523716926574707, "step": 1315 }, { "epoch": 1.1855855855855855, "grad_norm": 15.84584428770457, "learning_rate": 7.576762614733278e-06, "loss": 1.498417854309082, "step": 1316 }, { "epoch": 1.1864864864864866, "grad_norm": 8.044145709378215, "learning_rate": 7.572269580442806e-06, "loss": 1.3055856227874756, "step": 1317 }, { "epoch": 1.1873873873873875, "grad_norm": 11.3776637027518, "learning_rate": 7.567773719693259e-06, "loss": 1.3935675621032715, "step": 1318 }, { "epoch": 1.1882882882882884, "grad_norm": 11.392851172846493, "learning_rate": 7.563275037424775e-06, "loss": 1.1069931983947754, "step": 1319 }, { "epoch": 1.1891891891891893, "grad_norm": 8.397596012134446, "learning_rate": 7.558773538580593e-06, "loss": 1.5372304916381836, "step": 1320 }, { "epoch": 1.1900900900900901, "grad_norm": 14.191071446322592, "learning_rate": 7.554269228107044e-06, "loss": 1.5797395706176758, "step": 1321 }, { "epoch": 1.190990990990991, "grad_norm": 13.358608945649758, "learning_rate": 7.549762110953553e-06, "loss": 1.3508579730987549, "step": 1322 }, { "epoch": 1.191891891891892, "grad_norm": 16.107248339446517, "learning_rate": 7.545252192072625e-06, "loss": 1.4219568967819214, "step": 1323 }, { "epoch": 1.1927927927927928, "grad_norm": 15.949458931646804, "learning_rate": 7.540739476419847e-06, "loss": 1.5413322448730469, "step": 1324 }, { "epoch": 1.1936936936936937, "grad_norm": 7.25570395373709, "learning_rate": 7.5362239689538765e-06, "loss": 1.5617890357971191, "step": 1325 }, { "epoch": 1.1945945945945946, "grad_norm": 5.507534271918394, "learning_rate": 7.531705674636439e-06, "loss": 0.9302718639373779, "step": 1326 }, { "epoch": 1.1954954954954955, "grad_norm": 11.704751357096594, "learning_rate": 7.527184598432322e-06, "loss": 1.9931082725524902, "step": 1327 }, { "epoch": 1.1963963963963964, "grad_norm": 9.933944417942092, "learning_rate": 7.522660745309375e-06, "loss": 1.3197085857391357, "step": 1328 }, { "epoch": 1.1972972972972973, "grad_norm": 7.720230920801659, "learning_rate": 7.518134120238489e-06, "loss": 1.283144474029541, "step": 1329 }, { "epoch": 1.1981981981981982, "grad_norm": 13.160022270225245, "learning_rate": 7.5136047281936094e-06, "loss": 1.2439384460449219, "step": 1330 }, { "epoch": 1.199099099099099, "grad_norm": 9.588431958829952, "learning_rate": 7.509072574151719e-06, "loss": 0.9278205633163452, "step": 1331 }, { "epoch": 1.2, "grad_norm": 10.694717205816724, "learning_rate": 7.504537663092834e-06, "loss": 1.0412869453430176, "step": 1332 }, { "epoch": 1.2009009009009008, "grad_norm": 12.597064505650591, "learning_rate": 7.500000000000001e-06, "loss": 1.7085771560668945, "step": 1333 }, { "epoch": 1.2018018018018017, "grad_norm": 9.980346507290404, "learning_rate": 7.495459589859293e-06, "loss": 0.9375530481338501, "step": 1334 }, { "epoch": 1.2027027027027026, "grad_norm": 25.49173748694432, "learning_rate": 7.490916437659799e-06, "loss": 1.4595293998718262, "step": 1335 }, { "epoch": 1.2036036036036035, "grad_norm": 10.509646074519262, "learning_rate": 7.486370548393621e-06, "loss": 1.883044719696045, "step": 1336 }, { "epoch": 1.2045045045045044, "grad_norm": 10.135246780041918, "learning_rate": 7.48182192705587e-06, "loss": 1.252532958984375, "step": 1337 }, { "epoch": 1.2054054054054055, "grad_norm": 14.784925489162314, "learning_rate": 7.47727057864466e-06, "loss": 1.6863490343093872, "step": 1338 }, { "epoch": 1.2063063063063062, "grad_norm": 9.426808553510034, "learning_rate": 7.472716508161095e-06, "loss": 1.2537802457809448, "step": 1339 }, { "epoch": 1.2072072072072073, "grad_norm": 11.36536859739626, "learning_rate": 7.468159720609278e-06, "loss": 1.567642331123352, "step": 1340 }, { "epoch": 1.2081081081081082, "grad_norm": 13.907149635693166, "learning_rate": 7.463600220996294e-06, "loss": 1.0958294868469238, "step": 1341 }, { "epoch": 1.209009009009009, "grad_norm": 14.923315595776977, "learning_rate": 7.45903801433221e-06, "loss": 1.4967621564865112, "step": 1342 }, { "epoch": 1.20990990990991, "grad_norm": 12.484070207598085, "learning_rate": 7.454473105630063e-06, "loss": 0.9661339521408081, "step": 1343 }, { "epoch": 1.2108108108108109, "grad_norm": 13.667218559643285, "learning_rate": 7.449905499905863e-06, "loss": 1.088149905204773, "step": 1344 }, { "epoch": 1.2117117117117118, "grad_norm": 10.615603407838263, "learning_rate": 7.445335202178585e-06, "loss": 1.7580902576446533, "step": 1345 }, { "epoch": 1.2126126126126127, "grad_norm": 9.87829841243034, "learning_rate": 7.440762217470156e-06, "loss": 1.0903618335723877, "step": 1346 }, { "epoch": 1.2135135135135136, "grad_norm": 13.23634609532938, "learning_rate": 7.4361865508054646e-06, "loss": 1.5855700969696045, "step": 1347 }, { "epoch": 1.2144144144144144, "grad_norm": 15.17742491666185, "learning_rate": 7.431608207212335e-06, "loss": 1.5372284650802612, "step": 1348 }, { "epoch": 1.2153153153153153, "grad_norm": 11.916694769113313, "learning_rate": 7.427027191721541e-06, "loss": 1.5875383615493774, "step": 1349 }, { "epoch": 1.2162162162162162, "grad_norm": 100.75380099015622, "learning_rate": 7.422443509366791e-06, "loss": 2.580488681793213, "step": 1350 }, { "epoch": 1.2171171171171171, "grad_norm": 18.528713273542575, "learning_rate": 7.417857165184723e-06, "loss": 1.1066110134124756, "step": 1351 }, { "epoch": 1.218018018018018, "grad_norm": 8.831968227387177, "learning_rate": 7.413268164214898e-06, "loss": 1.5199337005615234, "step": 1352 }, { "epoch": 1.218918918918919, "grad_norm": 9.685783038261286, "learning_rate": 7.4086765114998e-06, "loss": 1.211181640625, "step": 1353 }, { "epoch": 1.2198198198198198, "grad_norm": 14.46493643753685, "learning_rate": 7.404082212084825e-06, "loss": 1.3475542068481445, "step": 1354 }, { "epoch": 1.2207207207207207, "grad_norm": 7.827895772435491, "learning_rate": 7.399485271018275e-06, "loss": 1.2011393308639526, "step": 1355 }, { "epoch": 1.2216216216216216, "grad_norm": 12.112899033982343, "learning_rate": 7.394885693351359e-06, "loss": 1.5029023885726929, "step": 1356 }, { "epoch": 1.2225225225225225, "grad_norm": 19.16374374542783, "learning_rate": 7.390283484138181e-06, "loss": 1.1819264888763428, "step": 1357 }, { "epoch": 1.2234234234234234, "grad_norm": 15.837163637802899, "learning_rate": 7.385678648435736e-06, "loss": 1.7538678646087646, "step": 1358 }, { "epoch": 1.2243243243243243, "grad_norm": 8.816568931447835, "learning_rate": 7.381071191303905e-06, "loss": 1.7851725816726685, "step": 1359 }, { "epoch": 1.2252252252252251, "grad_norm": 12.425085747807193, "learning_rate": 7.37646111780545e-06, "loss": 1.6314969062805176, "step": 1360 }, { "epoch": 1.226126126126126, "grad_norm": 11.622292873388071, "learning_rate": 7.371848433006011e-06, "loss": 1.8315403461456299, "step": 1361 }, { "epoch": 1.227027027027027, "grad_norm": 11.700004162311084, "learning_rate": 7.367233141974091e-06, "loss": 1.2369434833526611, "step": 1362 }, { "epoch": 1.227927927927928, "grad_norm": 7.913797704821526, "learning_rate": 7.36261524978106e-06, "loss": 1.6064811944961548, "step": 1363 }, { "epoch": 1.2288288288288287, "grad_norm": 9.524045204852513, "learning_rate": 7.357994761501148e-06, "loss": 0.9757938981056213, "step": 1364 }, { "epoch": 1.2297297297297298, "grad_norm": 7.515508698675772, "learning_rate": 7.353371682211439e-06, "loss": 1.7672373056411743, "step": 1365 }, { "epoch": 1.2306306306306307, "grad_norm": 11.170188493390835, "learning_rate": 7.348746016991855e-06, "loss": 1.468174934387207, "step": 1366 }, { "epoch": 1.2315315315315316, "grad_norm": 11.147007367575577, "learning_rate": 7.344117770925169e-06, "loss": 1.455219030380249, "step": 1367 }, { "epoch": 1.2324324324324325, "grad_norm": 12.241165739692947, "learning_rate": 7.339486949096986e-06, "loss": 1.9719338417053223, "step": 1368 }, { "epoch": 1.2333333333333334, "grad_norm": 8.88542917374653, "learning_rate": 7.3348535565957415e-06, "loss": 1.3688957691192627, "step": 1369 }, { "epoch": 1.2342342342342343, "grad_norm": 13.03109391063374, "learning_rate": 7.330217598512696e-06, "loss": 1.9144933223724365, "step": 1370 }, { "epoch": 1.2351351351351352, "grad_norm": 11.356009632577601, "learning_rate": 7.3255790799419276e-06, "loss": 2.976274251937866, "step": 1371 }, { "epoch": 1.236036036036036, "grad_norm": 16.20171708391758, "learning_rate": 7.320938005980331e-06, "loss": 2.230113983154297, "step": 1372 }, { "epoch": 1.236936936936937, "grad_norm": 17.420836190283396, "learning_rate": 7.316294381727607e-06, "loss": 2.2218849658966064, "step": 1373 }, { "epoch": 1.2378378378378379, "grad_norm": 21.68831769625887, "learning_rate": 7.311648212286259e-06, "loss": 1.731313943862915, "step": 1374 }, { "epoch": 1.2387387387387387, "grad_norm": 9.670390727557972, "learning_rate": 7.3069995027615866e-06, "loss": 1.6031734943389893, "step": 1375 }, { "epoch": 1.2396396396396396, "grad_norm": 17.7257502941489, "learning_rate": 7.302348258261681e-06, "loss": 1.090880274772644, "step": 1376 }, { "epoch": 1.2405405405405405, "grad_norm": 12.158862132711823, "learning_rate": 7.297694483897419e-06, "loss": 1.7922217845916748, "step": 1377 }, { "epoch": 1.2414414414414414, "grad_norm": 10.343322201737845, "learning_rate": 7.293038184782455e-06, "loss": 1.5444504022598267, "step": 1378 }, { "epoch": 1.2423423423423423, "grad_norm": 15.600510863887093, "learning_rate": 7.288379366033222e-06, "loss": 1.8387994766235352, "step": 1379 }, { "epoch": 1.2432432432432432, "grad_norm": 17.805275666565176, "learning_rate": 7.283718032768918e-06, "loss": 2.0836944580078125, "step": 1380 }, { "epoch": 1.244144144144144, "grad_norm": 15.290226624511453, "learning_rate": 7.279054190111506e-06, "loss": 1.8654720783233643, "step": 1381 }, { "epoch": 1.245045045045045, "grad_norm": 11.358877730376452, "learning_rate": 7.274387843185706e-06, "loss": 0.9758589267730713, "step": 1382 }, { "epoch": 1.2459459459459459, "grad_norm": 10.315239518582793, "learning_rate": 7.269718997118989e-06, "loss": 1.3522006273269653, "step": 1383 }, { "epoch": 1.2468468468468468, "grad_norm": 11.204012591868468, "learning_rate": 7.265047657041572e-06, "loss": 2.102362632751465, "step": 1384 }, { "epoch": 1.2477477477477477, "grad_norm": 11.042702044425438, "learning_rate": 7.260373828086412e-06, "loss": 1.0497840642929077, "step": 1385 }, { "epoch": 1.2486486486486488, "grad_norm": 10.28616439594604, "learning_rate": 7.2556975153892026e-06, "loss": 1.5571216344833374, "step": 1386 }, { "epoch": 1.2495495495495494, "grad_norm": 13.383199442545159, "learning_rate": 7.251018724088367e-06, "loss": 1.8421202898025513, "step": 1387 }, { "epoch": 1.2504504504504506, "grad_norm": 12.244421177234683, "learning_rate": 7.24633745932505e-06, "loss": 1.5307228565216064, "step": 1388 }, { "epoch": 1.2513513513513512, "grad_norm": 8.821148614606216, "learning_rate": 7.241653726243114e-06, "loss": 1.2365405559539795, "step": 1389 }, { "epoch": 1.2522522522522523, "grad_norm": 10.320916195803836, "learning_rate": 7.236967529989135e-06, "loss": 2.1349010467529297, "step": 1390 }, { "epoch": 1.2531531531531532, "grad_norm": 41.31115369427396, "learning_rate": 7.232278875712396e-06, "loss": 3.4286715984344482, "step": 1391 }, { "epoch": 1.2540540540540541, "grad_norm": 8.296025650924667, "learning_rate": 7.22758776856488e-06, "loss": 1.4277901649475098, "step": 1392 }, { "epoch": 1.254954954954955, "grad_norm": 10.533961574576528, "learning_rate": 7.222894213701264e-06, "loss": 1.6853958368301392, "step": 1393 }, { "epoch": 1.255855855855856, "grad_norm": 13.35248993703874, "learning_rate": 7.218198216278918e-06, "loss": 2.1888997554779053, "step": 1394 }, { "epoch": 1.2567567567567568, "grad_norm": 12.990859083080924, "learning_rate": 7.213499781457891e-06, "loss": 1.1632167100906372, "step": 1395 }, { "epoch": 1.2576576576576577, "grad_norm": 11.893579892206938, "learning_rate": 7.208798914400916e-06, "loss": 1.218822717666626, "step": 1396 }, { "epoch": 1.2585585585585586, "grad_norm": 11.106466441833884, "learning_rate": 7.204095620273395e-06, "loss": 1.7315642833709717, "step": 1397 }, { "epoch": 1.2594594594594595, "grad_norm": 8.767909217390986, "learning_rate": 7.1993899042433965e-06, "loss": 0.9117752313613892, "step": 1398 }, { "epoch": 1.2603603603603604, "grad_norm": 9.32030423453937, "learning_rate": 7.194681771481653e-06, "loss": 1.5483365058898926, "step": 1399 }, { "epoch": 1.2612612612612613, "grad_norm": 10.922927319662296, "learning_rate": 7.189971227161551e-06, "loss": 1.449894905090332, "step": 1400 }, { "epoch": 1.2621621621621621, "grad_norm": 8.460395029652267, "learning_rate": 7.185258276459125e-06, "loss": 1.7599258422851562, "step": 1401 }, { "epoch": 1.263063063063063, "grad_norm": 17.449187812870015, "learning_rate": 7.180542924553055e-06, "loss": 1.0262142419815063, "step": 1402 }, { "epoch": 1.263963963963964, "grad_norm": 14.96280750597402, "learning_rate": 7.175825176624665e-06, "loss": 2.000757932662964, "step": 1403 }, { "epoch": 1.2648648648648648, "grad_norm": 8.965402157080927, "learning_rate": 7.171105037857901e-06, "loss": 1.717637300491333, "step": 1404 }, { "epoch": 1.2657657657657657, "grad_norm": 16.005789341830745, "learning_rate": 7.166382513439344e-06, "loss": 1.4934778213500977, "step": 1405 }, { "epoch": 1.2666666666666666, "grad_norm": 29.960182603102517, "learning_rate": 7.161657608558195e-06, "loss": 2.0391616821289062, "step": 1406 }, { "epoch": 1.2675675675675675, "grad_norm": 13.301955309440645, "learning_rate": 7.156930328406268e-06, "loss": 3.2663180828094482, "step": 1407 }, { "epoch": 1.2684684684684684, "grad_norm": 9.317719860301414, "learning_rate": 7.152200678177992e-06, "loss": 1.3521085977554321, "step": 1408 }, { "epoch": 1.2693693693693695, "grad_norm": 15.01321760720811, "learning_rate": 7.147468663070394e-06, "loss": 1.457066297531128, "step": 1409 }, { "epoch": 1.2702702702702702, "grad_norm": 64.91707114599426, "learning_rate": 7.142734288283104e-06, "loss": 3.4249014854431152, "step": 1410 }, { "epoch": 1.2711711711711713, "grad_norm": 8.851525925579665, "learning_rate": 7.137997559018347e-06, "loss": 1.3285980224609375, "step": 1411 }, { "epoch": 1.272072072072072, "grad_norm": 12.594045557885265, "learning_rate": 7.133258480480927e-06, "loss": 1.5793802738189697, "step": 1412 }, { "epoch": 1.272972972972973, "grad_norm": 13.593476027500934, "learning_rate": 7.128517057878236e-06, "loss": 1.9223592281341553, "step": 1413 }, { "epoch": 1.2738738738738737, "grad_norm": 8.690744191150314, "learning_rate": 7.12377329642024e-06, "loss": 1.2703932523727417, "step": 1414 }, { "epoch": 1.2747747747747749, "grad_norm": 12.402794101289746, "learning_rate": 7.119027201319475e-06, "loss": 1.16603684425354, "step": 1415 }, { "epoch": 1.2756756756756757, "grad_norm": 9.367974071505143, "learning_rate": 7.114278777791041e-06, "loss": 1.7819464206695557, "step": 1416 }, { "epoch": 1.2765765765765766, "grad_norm": 7.8478231909280325, "learning_rate": 7.109528031052597e-06, "loss": 1.3226629495620728, "step": 1417 }, { "epoch": 1.2774774774774775, "grad_norm": 9.661943223100234, "learning_rate": 7.1047749663243545e-06, "loss": 1.7379398345947266, "step": 1418 }, { "epoch": 1.2783783783783784, "grad_norm": 10.490058757861664, "learning_rate": 7.1000195888290726e-06, "loss": 1.9525641202926636, "step": 1419 }, { "epoch": 1.2792792792792793, "grad_norm": 29.87284270496952, "learning_rate": 7.09526190379205e-06, "loss": 0.845831573009491, "step": 1420 }, { "epoch": 1.2801801801801802, "grad_norm": 11.91578853488134, "learning_rate": 7.090501916441124e-06, "loss": 1.4141738414764404, "step": 1421 }, { "epoch": 1.281081081081081, "grad_norm": 11.098002980381764, "learning_rate": 7.085739632006656e-06, "loss": 0.9975290894508362, "step": 1422 }, { "epoch": 1.281981981981982, "grad_norm": 16.92866770300015, "learning_rate": 7.0809750557215385e-06, "loss": 1.3577886819839478, "step": 1423 }, { "epoch": 1.2828828828828829, "grad_norm": 10.099739910797975, "learning_rate": 7.076208192821179e-06, "loss": 1.1366277933120728, "step": 1424 }, { "epoch": 1.2837837837837838, "grad_norm": 14.743356708038405, "learning_rate": 7.071439048543498e-06, "loss": 1.587902545928955, "step": 1425 }, { "epoch": 1.2846846846846847, "grad_norm": 13.55985735794094, "learning_rate": 7.0666676281289205e-06, "loss": 1.235689401626587, "step": 1426 }, { "epoch": 1.2855855855855856, "grad_norm": 10.195364678567254, "learning_rate": 7.061893936820376e-06, "loss": 1.7189230918884277, "step": 1427 }, { "epoch": 1.2864864864864864, "grad_norm": 11.420024715459206, "learning_rate": 7.057117979863288e-06, "loss": 1.6788554191589355, "step": 1428 }, { "epoch": 1.2873873873873873, "grad_norm": 7.899561912850803, "learning_rate": 7.05233976250557e-06, "loss": 1.0105758905410767, "step": 1429 }, { "epoch": 1.2882882882882882, "grad_norm": 16.941219293922575, "learning_rate": 7.047559289997618e-06, "loss": 1.7400261163711548, "step": 1430 }, { "epoch": 1.2891891891891891, "grad_norm": 9.56288562596186, "learning_rate": 7.0427765675923055e-06, "loss": 1.8979640007019043, "step": 1431 }, { "epoch": 1.29009009009009, "grad_norm": 12.510087641070497, "learning_rate": 7.037991600544982e-06, "loss": 1.7491884231567383, "step": 1432 }, { "epoch": 1.290990990990991, "grad_norm": 9.667832761107086, "learning_rate": 7.0332043941134595e-06, "loss": 1.933447003364563, "step": 1433 }, { "epoch": 1.291891891891892, "grad_norm": 10.3254616942883, "learning_rate": 7.028414953558012e-06, "loss": 0.959897518157959, "step": 1434 }, { "epoch": 1.2927927927927927, "grad_norm": 9.966037197858935, "learning_rate": 7.023623284141369e-06, "loss": 1.429056167602539, "step": 1435 }, { "epoch": 1.2936936936936938, "grad_norm": 10.193568731979536, "learning_rate": 7.0188293911287075e-06, "loss": 1.8352670669555664, "step": 1436 }, { "epoch": 1.2945945945945945, "grad_norm": 14.231366838619765, "learning_rate": 7.014033279787651e-06, "loss": 3.6025497913360596, "step": 1437 }, { "epoch": 1.2954954954954956, "grad_norm": 17.102283011638892, "learning_rate": 7.009234955388257e-06, "loss": 1.7527354955673218, "step": 1438 }, { "epoch": 1.2963963963963965, "grad_norm": 11.049771073776762, "learning_rate": 7.004434423203016e-06, "loss": 1.2662566900253296, "step": 1439 }, { "epoch": 1.2972972972972974, "grad_norm": 12.530549988649303, "learning_rate": 6.999631688506846e-06, "loss": 0.9092813730239868, "step": 1440 }, { "epoch": 1.2981981981981983, "grad_norm": 15.15531879332505, "learning_rate": 6.994826756577082e-06, "loss": 2.4031832218170166, "step": 1441 }, { "epoch": 1.2990990990990992, "grad_norm": 10.56692184348743, "learning_rate": 6.990019632693478e-06, "loss": 1.1668978929519653, "step": 1442 }, { "epoch": 1.3, "grad_norm": 11.039725683520837, "learning_rate": 6.985210322138191e-06, "loss": 1.592980146408081, "step": 1443 }, { "epoch": 1.300900900900901, "grad_norm": 11.074299403387734, "learning_rate": 6.980398830195785e-06, "loss": 1.2689815759658813, "step": 1444 }, { "epoch": 1.3018018018018018, "grad_norm": 7.922665314187741, "learning_rate": 6.975585162153218e-06, "loss": 1.483189582824707, "step": 1445 }, { "epoch": 1.3027027027027027, "grad_norm": 12.352596563820562, "learning_rate": 6.970769323299843e-06, "loss": 1.1730546951293945, "step": 1446 }, { "epoch": 1.3036036036036036, "grad_norm": 8.647036918170205, "learning_rate": 6.965951318927395e-06, "loss": 1.3135360479354858, "step": 1447 }, { "epoch": 1.3045045045045045, "grad_norm": 19.76918698112762, "learning_rate": 6.96113115432999e-06, "loss": 2.7980329990386963, "step": 1448 }, { "epoch": 1.3054054054054054, "grad_norm": 10.59361494593598, "learning_rate": 6.956308834804115e-06, "loss": 1.9804593324661255, "step": 1449 }, { "epoch": 1.3063063063063063, "grad_norm": 12.670842435395594, "learning_rate": 6.951484365648628e-06, "loss": 1.292311191558838, "step": 1450 }, { "epoch": 1.3072072072072072, "grad_norm": 14.579133809409123, "learning_rate": 6.9466577521647496e-06, "loss": 1.7021632194519043, "step": 1451 }, { "epoch": 1.308108108108108, "grad_norm": 11.659110578279638, "learning_rate": 6.941828999656054e-06, "loss": 2.4892637729644775, "step": 1452 }, { "epoch": 1.309009009009009, "grad_norm": 11.723701997771665, "learning_rate": 6.936998113428466e-06, "loss": 2.369381904602051, "step": 1453 }, { "epoch": 1.3099099099099099, "grad_norm": 15.520099051937635, "learning_rate": 6.932165098790257e-06, "loss": 2.2101807594299316, "step": 1454 }, { "epoch": 1.3108108108108107, "grad_norm": 6.329220230845185, "learning_rate": 6.927329961052036e-06, "loss": 1.4462792873382568, "step": 1455 }, { "epoch": 1.3117117117117116, "grad_norm": 8.577589949788242, "learning_rate": 6.9224927055267456e-06, "loss": 1.212193489074707, "step": 1456 }, { "epoch": 1.3126126126126128, "grad_norm": 10.300198588278217, "learning_rate": 6.917653337529655e-06, "loss": 1.342195749282837, "step": 1457 }, { "epoch": 1.3135135135135134, "grad_norm": 11.523689425446618, "learning_rate": 6.912811862378353e-06, "loss": 1.1294045448303223, "step": 1458 }, { "epoch": 1.3144144144144145, "grad_norm": 7.504283966414375, "learning_rate": 6.9079682853927436e-06, "loss": 1.299343466758728, "step": 1459 }, { "epoch": 1.3153153153153152, "grad_norm": 18.030183928050544, "learning_rate": 6.9031226118950445e-06, "loss": 1.2723416090011597, "step": 1460 }, { "epoch": 1.3162162162162163, "grad_norm": 9.79491788344939, "learning_rate": 6.898274847209775e-06, "loss": 1.2439301013946533, "step": 1461 }, { "epoch": 1.317117117117117, "grad_norm": 16.072963433739307, "learning_rate": 6.8934249966637515e-06, "loss": 0.9168739318847656, "step": 1462 }, { "epoch": 1.318018018018018, "grad_norm": 6.646130498815386, "learning_rate": 6.88857306558608e-06, "loss": 1.6370880603790283, "step": 1463 }, { "epoch": 1.318918918918919, "grad_norm": 13.474685631393964, "learning_rate": 6.8837190593081595e-06, "loss": 1.6939034461975098, "step": 1464 }, { "epoch": 1.3198198198198199, "grad_norm": 13.255101591637459, "learning_rate": 6.878862983163661e-06, "loss": 0.566031277179718, "step": 1465 }, { "epoch": 1.3207207207207208, "grad_norm": 23.61069042071233, "learning_rate": 6.874004842488537e-06, "loss": 1.9992173910140991, "step": 1466 }, { "epoch": 1.3216216216216217, "grad_norm": 16.17865447072103, "learning_rate": 6.869144642621006e-06, "loss": 2.696247100830078, "step": 1467 }, { "epoch": 1.3225225225225226, "grad_norm": 30.219320393157133, "learning_rate": 6.864282388901544e-06, "loss": 2.0518383979797363, "step": 1468 }, { "epoch": 1.3234234234234235, "grad_norm": 10.251150920416583, "learning_rate": 6.859418086672892e-06, "loss": 1.741684913635254, "step": 1469 }, { "epoch": 1.3243243243243243, "grad_norm": 12.22188532170062, "learning_rate": 6.854551741280039e-06, "loss": 1.4471855163574219, "step": 1470 }, { "epoch": 1.3252252252252252, "grad_norm": 8.464858435050514, "learning_rate": 6.849683358070217e-06, "loss": 2.0512211322784424, "step": 1471 }, { "epoch": 1.3261261261261261, "grad_norm": 9.663271857650159, "learning_rate": 6.844812942392899e-06, "loss": 1.8038698434829712, "step": 1472 }, { "epoch": 1.327027027027027, "grad_norm": 7.5145310553659135, "learning_rate": 6.839940499599791e-06, "loss": 1.251605749130249, "step": 1473 }, { "epoch": 1.327927927927928, "grad_norm": 9.654822014222745, "learning_rate": 6.835066035044827e-06, "loss": 1.6228208541870117, "step": 1474 }, { "epoch": 1.3288288288288288, "grad_norm": 12.073003036682927, "learning_rate": 6.830189554084162e-06, "loss": 1.4056490659713745, "step": 1475 }, { "epoch": 1.3297297297297297, "grad_norm": 9.09359100958607, "learning_rate": 6.825311062076166e-06, "loss": 1.4476819038391113, "step": 1476 }, { "epoch": 1.3306306306306306, "grad_norm": 8.974308806871944, "learning_rate": 6.820430564381419e-06, "loss": 1.0044509172439575, "step": 1477 }, { "epoch": 1.3315315315315315, "grad_norm": 11.560229844922576, "learning_rate": 6.815548066362707e-06, "loss": 1.3208290338516235, "step": 1478 }, { "epoch": 1.3324324324324324, "grad_norm": 27.433320472132237, "learning_rate": 6.810663573385013e-06, "loss": 1.8785879611968994, "step": 1479 }, { "epoch": 1.3333333333333333, "grad_norm": 7.34239456788459, "learning_rate": 6.805777090815506e-06, "loss": 1.3099184036254883, "step": 1480 }, { "epoch": 1.3342342342342342, "grad_norm": 13.785305743855641, "learning_rate": 6.800888624023552e-06, "loss": 1.6740031242370605, "step": 1481 }, { "epoch": 1.3351351351351353, "grad_norm": 13.478577023871006, "learning_rate": 6.79599817838069e-06, "loss": 1.2225863933563232, "step": 1482 }, { "epoch": 1.336036036036036, "grad_norm": 8.949648729074358, "learning_rate": 6.791105759260637e-06, "loss": 1.498471975326538, "step": 1483 }, { "epoch": 1.336936936936937, "grad_norm": 10.935949380255698, "learning_rate": 6.786211372039277e-06, "loss": 2.295103073120117, "step": 1484 }, { "epoch": 1.3378378378378377, "grad_norm": 9.925065969678082, "learning_rate": 6.781315022094652e-06, "loss": 1.3988707065582275, "step": 1485 }, { "epoch": 1.3387387387387388, "grad_norm": 29.961737390518984, "learning_rate": 6.7764167148069695e-06, "loss": 1.797900915145874, "step": 1486 }, { "epoch": 1.3396396396396395, "grad_norm": 23.355601544653386, "learning_rate": 6.771516455558581e-06, "loss": 1.6941187381744385, "step": 1487 }, { "epoch": 1.3405405405405406, "grad_norm": 8.925186199314515, "learning_rate": 6.766614249733986e-06, "loss": 1.1308190822601318, "step": 1488 }, { "epoch": 1.3414414414414415, "grad_norm": 77.27501005877426, "learning_rate": 6.761710102719823e-06, "loss": 1.5973894596099854, "step": 1489 }, { "epoch": 1.3423423423423424, "grad_norm": 11.444046265690886, "learning_rate": 6.7568040199048604e-06, "loss": 1.4958088397979736, "step": 1490 }, { "epoch": 1.3432432432432433, "grad_norm": 10.019881082111748, "learning_rate": 6.751896006679999e-06, "loss": 1.3319631814956665, "step": 1491 }, { "epoch": 1.3441441441441442, "grad_norm": 17.6714685880708, "learning_rate": 6.746986068438255e-06, "loss": 2.3347811698913574, "step": 1492 }, { "epoch": 1.345045045045045, "grad_norm": 8.951677689395087, "learning_rate": 6.742074210574764e-06, "loss": 1.483781337738037, "step": 1493 }, { "epoch": 1.345945945945946, "grad_norm": 9.509507922198216, "learning_rate": 6.737160438486771e-06, "loss": 1.783625841140747, "step": 1494 }, { "epoch": 1.3468468468468469, "grad_norm": 14.358783693076733, "learning_rate": 6.732244757573619e-06, "loss": 1.4702600240707397, "step": 1495 }, { "epoch": 1.3477477477477477, "grad_norm": 11.278813813202781, "learning_rate": 6.727327173236755e-06, "loss": 1.2078044414520264, "step": 1496 }, { "epoch": 1.3486486486486486, "grad_norm": 13.923544729865053, "learning_rate": 6.722407690879713e-06, "loss": 1.5310215950012207, "step": 1497 }, { "epoch": 1.3495495495495495, "grad_norm": 12.384659652404792, "learning_rate": 6.717486315908117e-06, "loss": 1.6496665477752686, "step": 1498 }, { "epoch": 1.3504504504504504, "grad_norm": 16.531447348895114, "learning_rate": 6.712563053729666e-06, "loss": 1.783576250076294, "step": 1499 }, { "epoch": 1.3513513513513513, "grad_norm": 12.340681437779834, "learning_rate": 6.707637909754136e-06, "loss": 1.4855138063430786, "step": 1500 }, { "epoch": 1.3522522522522522, "grad_norm": 12.840687199470583, "learning_rate": 6.702710889393369e-06, "loss": 1.4109737873077393, "step": 1501 }, { "epoch": 1.353153153153153, "grad_norm": 11.793260271316448, "learning_rate": 6.697781998061269e-06, "loss": 1.1371577978134155, "step": 1502 }, { "epoch": 1.354054054054054, "grad_norm": 8.953952530348078, "learning_rate": 6.692851241173796e-06, "loss": 1.495855450630188, "step": 1503 }, { "epoch": 1.3549549549549549, "grad_norm": 11.357007680099965, "learning_rate": 6.687918624148963e-06, "loss": 1.0094865560531616, "step": 1504 }, { "epoch": 1.3558558558558558, "grad_norm": 13.866286980242856, "learning_rate": 6.682984152406819e-06, "loss": 2.112832546234131, "step": 1505 }, { "epoch": 1.3567567567567567, "grad_norm": 11.31135397643678, "learning_rate": 6.6780478313694595e-06, "loss": 1.3318833112716675, "step": 1506 }, { "epoch": 1.3576576576576578, "grad_norm": 7.261488334927335, "learning_rate": 6.6731096664610085e-06, "loss": 1.0667122602462769, "step": 1507 }, { "epoch": 1.3585585585585584, "grad_norm": 17.226149858340406, "learning_rate": 6.668169663107614e-06, "loss": 1.4053834676742554, "step": 1508 }, { "epoch": 1.3594594594594596, "grad_norm": 15.457783991296136, "learning_rate": 6.663227826737448e-06, "loss": 1.7283596992492676, "step": 1509 }, { "epoch": 1.3603603603603602, "grad_norm": 14.47847833402991, "learning_rate": 6.658284162780696e-06, "loss": 1.3663225173950195, "step": 1510 }, { "epoch": 1.3612612612612613, "grad_norm": 11.76769315410328, "learning_rate": 6.653338676669549e-06, "loss": 1.1600167751312256, "step": 1511 }, { "epoch": 1.3621621621621622, "grad_norm": 11.287613650548183, "learning_rate": 6.6483913738382015e-06, "loss": 2.11704158782959, "step": 1512 }, { "epoch": 1.3630630630630631, "grad_norm": 8.769643345190612, "learning_rate": 6.643442259722845e-06, "loss": 2.3525846004486084, "step": 1513 }, { "epoch": 1.363963963963964, "grad_norm": 10.057037063527666, "learning_rate": 6.6384913397616614e-06, "loss": 1.8541146516799927, "step": 1514 }, { "epoch": 1.364864864864865, "grad_norm": 16.84125796871969, "learning_rate": 6.633538619394817e-06, "loss": 2.212017059326172, "step": 1515 }, { "epoch": 1.3657657657657658, "grad_norm": 15.014498400819402, "learning_rate": 6.628584104064454e-06, "loss": 1.8785572052001953, "step": 1516 }, { "epoch": 1.3666666666666667, "grad_norm": 7.8208850480783445, "learning_rate": 6.623627799214689e-06, "loss": 1.8144475221633911, "step": 1517 }, { "epoch": 1.3675675675675676, "grad_norm": 8.796725097213452, "learning_rate": 6.618669710291607e-06, "loss": 1.1300932168960571, "step": 1518 }, { "epoch": 1.3684684684684685, "grad_norm": 16.96085567967029, "learning_rate": 6.613709842743247e-06, "loss": 1.8939905166625977, "step": 1519 }, { "epoch": 1.3693693693693694, "grad_norm": 15.890620069538523, "learning_rate": 6.608748202019609e-06, "loss": 2.296168565750122, "step": 1520 }, { "epoch": 1.3702702702702703, "grad_norm": 12.897079381014754, "learning_rate": 6.60378479357264e-06, "loss": 1.373428225517273, "step": 1521 }, { "epoch": 1.3711711711711712, "grad_norm": 72.08633517003211, "learning_rate": 6.598819622856227e-06, "loss": 2.1196229457855225, "step": 1522 }, { "epoch": 1.372072072072072, "grad_norm": 11.863094670122011, "learning_rate": 6.593852695326195e-06, "loss": 1.8780111074447632, "step": 1523 }, { "epoch": 1.372972972972973, "grad_norm": 12.40630514355168, "learning_rate": 6.588884016440296e-06, "loss": 2.2452330589294434, "step": 1524 }, { "epoch": 1.3738738738738738, "grad_norm": 10.698082427109062, "learning_rate": 6.583913591658215e-06, "loss": 1.7420361042022705, "step": 1525 }, { "epoch": 1.3747747747747747, "grad_norm": 11.45374027254298, "learning_rate": 6.5789414264415455e-06, "loss": 1.486595869064331, "step": 1526 }, { "epoch": 1.3756756756756756, "grad_norm": 9.960417384966648, "learning_rate": 6.573967526253799e-06, "loss": 2.052097797393799, "step": 1527 }, { "epoch": 1.3765765765765765, "grad_norm": 9.398298485798593, "learning_rate": 6.568991896560394e-06, "loss": 1.6156516075134277, "step": 1528 }, { "epoch": 1.3774774774774774, "grad_norm": 7.991873232786982, "learning_rate": 6.564014542828645e-06, "loss": 1.7775673866271973, "step": 1529 }, { "epoch": 1.3783783783783785, "grad_norm": 12.990266393680315, "learning_rate": 6.559035470527766e-06, "loss": 1.3804233074188232, "step": 1530 }, { "epoch": 1.3792792792792792, "grad_norm": 19.1123845290699, "learning_rate": 6.554054685128857e-06, "loss": 2.075040578842163, "step": 1531 }, { "epoch": 1.3801801801801803, "grad_norm": 11.649539270034651, "learning_rate": 6.549072192104899e-06, "loss": 1.1930575370788574, "step": 1532 }, { "epoch": 1.381081081081081, "grad_norm": 10.849947510650079, "learning_rate": 6.54408799693075e-06, "loss": 1.5370773077011108, "step": 1533 }, { "epoch": 1.381981981981982, "grad_norm": 14.00986862839, "learning_rate": 6.539102105083139e-06, "loss": 1.4853053092956543, "step": 1534 }, { "epoch": 1.3828828828828827, "grad_norm": 11.43709366826303, "learning_rate": 6.5341145220406624e-06, "loss": 1.607257604598999, "step": 1535 }, { "epoch": 1.3837837837837839, "grad_norm": 9.335255223305646, "learning_rate": 6.52912525328377e-06, "loss": 1.058199167251587, "step": 1536 }, { "epoch": 1.3846846846846848, "grad_norm": 8.65597380645358, "learning_rate": 6.524134304294767e-06, "loss": 1.581141710281372, "step": 1537 }, { "epoch": 1.3855855855855856, "grad_norm": 8.905658798603659, "learning_rate": 6.519141680557801e-06, "loss": 1.7198735475540161, "step": 1538 }, { "epoch": 1.3864864864864865, "grad_norm": 13.866845671291236, "learning_rate": 6.514147387558866e-06, "loss": 1.1386839151382446, "step": 1539 }, { "epoch": 1.3873873873873874, "grad_norm": 7.183340151141346, "learning_rate": 6.509151430785785e-06, "loss": 1.55452299118042, "step": 1540 }, { "epoch": 1.3882882882882883, "grad_norm": 14.031047648797244, "learning_rate": 6.5041538157282105e-06, "loss": 1.459531307220459, "step": 1541 }, { "epoch": 1.3891891891891892, "grad_norm": 9.921835400261099, "learning_rate": 6.4991545478776175e-06, "loss": 1.7958147525787354, "step": 1542 }, { "epoch": 1.39009009009009, "grad_norm": 14.997502932067247, "learning_rate": 6.494153632727299e-06, "loss": 1.7099924087524414, "step": 1543 }, { "epoch": 1.390990990990991, "grad_norm": 15.02260259254844, "learning_rate": 6.489151075772355e-06, "loss": 2.1933164596557617, "step": 1544 }, { "epoch": 1.3918918918918919, "grad_norm": 10.2867303895181, "learning_rate": 6.484146882509692e-06, "loss": 1.3097925186157227, "step": 1545 }, { "epoch": 1.3927927927927928, "grad_norm": 9.534133003704742, "learning_rate": 6.4791410584380134e-06, "loss": 1.562878131866455, "step": 1546 }, { "epoch": 1.3936936936936937, "grad_norm": 8.408041594397528, "learning_rate": 6.474133609057812e-06, "loss": 1.8736263513565063, "step": 1547 }, { "epoch": 1.3945945945945946, "grad_norm": 13.757239562523456, "learning_rate": 6.469124539871372e-06, "loss": 1.7695668935775757, "step": 1548 }, { "epoch": 1.3954954954954955, "grad_norm": 11.411530144244546, "learning_rate": 6.464113856382752e-06, "loss": 3.1489667892456055, "step": 1549 }, { "epoch": 1.3963963963963963, "grad_norm": 16.402962216303628, "learning_rate": 6.45910156409779e-06, "loss": 1.3440314531326294, "step": 1550 }, { "epoch": 1.3972972972972972, "grad_norm": 13.418126658463782, "learning_rate": 6.4540876685240876e-06, "loss": 1.2822811603546143, "step": 1551 }, { "epoch": 1.3981981981981981, "grad_norm": 11.034343836938302, "learning_rate": 6.449072175171008e-06, "loss": 1.186071753501892, "step": 1552 }, { "epoch": 1.399099099099099, "grad_norm": 18.777461199501083, "learning_rate": 6.44405508954967e-06, "loss": 1.8270515203475952, "step": 1553 }, { "epoch": 1.4, "grad_norm": 7.822655272651996, "learning_rate": 6.439036417172948e-06, "loss": 1.5740070343017578, "step": 1554 }, { "epoch": 1.400900900900901, "grad_norm": 12.887351504893184, "learning_rate": 6.434016163555452e-06, "loss": 1.8109557628631592, "step": 1555 }, { "epoch": 1.4018018018018017, "grad_norm": 16.52740736826598, "learning_rate": 6.428994334213533e-06, "loss": 3.0481677055358887, "step": 1556 }, { "epoch": 1.4027027027027028, "grad_norm": 10.743501044416927, "learning_rate": 6.423970934665275e-06, "loss": 1.4444117546081543, "step": 1557 }, { "epoch": 1.4036036036036035, "grad_norm": 11.938361621219478, "learning_rate": 6.418945970430486e-06, "loss": 1.5727248191833496, "step": 1558 }, { "epoch": 1.4045045045045046, "grad_norm": 13.699446290887359, "learning_rate": 6.4139194470306885e-06, "loss": 1.1469335556030273, "step": 1559 }, { "epoch": 1.4054054054054055, "grad_norm": 11.087383650013706, "learning_rate": 6.408891369989128e-06, "loss": 1.6197885274887085, "step": 1560 }, { "epoch": 1.4063063063063064, "grad_norm": 10.275757877334277, "learning_rate": 6.403861744830749e-06, "loss": 2.283778667449951, "step": 1561 }, { "epoch": 1.4072072072072073, "grad_norm": 8.899213704142253, "learning_rate": 6.398830577082198e-06, "loss": 1.2480144500732422, "step": 1562 }, { "epoch": 1.4081081081081082, "grad_norm": 8.514196767597555, "learning_rate": 6.393797872271823e-06, "loss": 1.2771308422088623, "step": 1563 }, { "epoch": 1.409009009009009, "grad_norm": 15.00421976405728, "learning_rate": 6.3887636359296534e-06, "loss": 1.398680329322815, "step": 1564 }, { "epoch": 1.40990990990991, "grad_norm": 9.213733227110374, "learning_rate": 6.383727873587406e-06, "loss": 1.3736463785171509, "step": 1565 }, { "epoch": 1.4108108108108108, "grad_norm": 12.995105041138832, "learning_rate": 6.378690590778471e-06, "loss": 1.017199158668518, "step": 1566 }, { "epoch": 1.4117117117117117, "grad_norm": 11.874571646596916, "learning_rate": 6.373651793037916e-06, "loss": 0.8746930360794067, "step": 1567 }, { "epoch": 1.4126126126126126, "grad_norm": 7.435370378254893, "learning_rate": 6.368611485902463e-06, "loss": 1.9025697708129883, "step": 1568 }, { "epoch": 1.4135135135135135, "grad_norm": 9.285030912488718, "learning_rate": 6.363569674910499e-06, "loss": 2.2031970024108887, "step": 1569 }, { "epoch": 1.4144144144144144, "grad_norm": 11.609358703290049, "learning_rate": 6.358526365602064e-06, "loss": 1.5108566284179688, "step": 1570 }, { "epoch": 1.4153153153153153, "grad_norm": 10.638422004709088, "learning_rate": 6.353481563518842e-06, "loss": 1.4841814041137695, "step": 1571 }, { "epoch": 1.4162162162162162, "grad_norm": 14.190019820334548, "learning_rate": 6.3484352742041586e-06, "loss": 1.2413240671157837, "step": 1572 }, { "epoch": 1.417117117117117, "grad_norm": 10.124847187016426, "learning_rate": 6.343387503202974e-06, "loss": 1.3165152072906494, "step": 1573 }, { "epoch": 1.418018018018018, "grad_norm": 21.978059431121792, "learning_rate": 6.338338256061873e-06, "loss": 1.9939208030700684, "step": 1574 }, { "epoch": 1.4189189189189189, "grad_norm": 16.402579483713858, "learning_rate": 6.333287538329067e-06, "loss": 1.8033710718154907, "step": 1575 }, { "epoch": 1.4198198198198198, "grad_norm": 8.44207105176216, "learning_rate": 6.328235355554382e-06, "loss": 1.7890293598175049, "step": 1576 }, { "epoch": 1.4207207207207206, "grad_norm": 11.354134019653287, "learning_rate": 6.323181713289252e-06, "loss": 1.3599138259887695, "step": 1577 }, { "epoch": 1.4216216216216218, "grad_norm": 9.123051520886271, "learning_rate": 6.318126617086715e-06, "loss": 1.515425205230713, "step": 1578 }, { "epoch": 1.4225225225225224, "grad_norm": 15.343909798586768, "learning_rate": 6.31307007250141e-06, "loss": 1.4394266605377197, "step": 1579 }, { "epoch": 1.4234234234234235, "grad_norm": 8.212165320846351, "learning_rate": 6.308012085089563e-06, "loss": 1.6644560098648071, "step": 1580 }, { "epoch": 1.4243243243243242, "grad_norm": 9.281393071269752, "learning_rate": 6.3029526604089884e-06, "loss": 1.6146223545074463, "step": 1581 }, { "epoch": 1.4252252252252253, "grad_norm": 9.012711145286287, "learning_rate": 6.297891804019078e-06, "loss": 1.294966697692871, "step": 1582 }, { "epoch": 1.426126126126126, "grad_norm": 9.725957470939198, "learning_rate": 6.292829521480799e-06, "loss": 1.305307149887085, "step": 1583 }, { "epoch": 1.427027027027027, "grad_norm": 9.957884015738744, "learning_rate": 6.2877658183566835e-06, "loss": 1.8277273178100586, "step": 1584 }, { "epoch": 1.427927927927928, "grad_norm": 14.176997988886253, "learning_rate": 6.282700700210826e-06, "loss": 2.1664233207702637, "step": 1585 }, { "epoch": 1.428828828828829, "grad_norm": 11.210158890912192, "learning_rate": 6.277634172608875e-06, "loss": 1.5417143106460571, "step": 1586 }, { "epoch": 1.4297297297297298, "grad_norm": 43.360585840995725, "learning_rate": 6.272566241118028e-06, "loss": 1.2948834896087646, "step": 1587 }, { "epoch": 1.4306306306306307, "grad_norm": 23.179384362830724, "learning_rate": 6.267496911307025e-06, "loss": 2.2447755336761475, "step": 1588 }, { "epoch": 1.4315315315315316, "grad_norm": 13.150370235034188, "learning_rate": 6.262426188746142e-06, "loss": 1.6491607427597046, "step": 1589 }, { "epoch": 1.4324324324324325, "grad_norm": 16.607971986636734, "learning_rate": 6.257354079007188e-06, "loss": 1.8326668739318848, "step": 1590 }, { "epoch": 1.4333333333333333, "grad_norm": 12.860466881669609, "learning_rate": 6.252280587663493e-06, "loss": 1.7672603130340576, "step": 1591 }, { "epoch": 1.4342342342342342, "grad_norm": 13.821510643759613, "learning_rate": 6.247205720289907e-06, "loss": 1.5929617881774902, "step": 1592 }, { "epoch": 1.4351351351351351, "grad_norm": 16.62571737411264, "learning_rate": 6.242129482462791e-06, "loss": 1.1646729707717896, "step": 1593 }, { "epoch": 1.436036036036036, "grad_norm": 10.361866409910958, "learning_rate": 6.2370518797600134e-06, "loss": 1.9066646099090576, "step": 1594 }, { "epoch": 1.436936936936937, "grad_norm": 17.129189452143006, "learning_rate": 6.2319729177609385e-06, "loss": 1.6732923984527588, "step": 1595 }, { "epoch": 1.4378378378378378, "grad_norm": 8.787234499239403, "learning_rate": 6.226892602046431e-06, "loss": 1.8658177852630615, "step": 1596 }, { "epoch": 1.4387387387387387, "grad_norm": 11.615649470253091, "learning_rate": 6.221810938198836e-06, "loss": 1.6445609331130981, "step": 1597 }, { "epoch": 1.4396396396396396, "grad_norm": 15.692881856944984, "learning_rate": 6.216727931801983e-06, "loss": 1.9909805059432983, "step": 1598 }, { "epoch": 1.4405405405405405, "grad_norm": 14.21867844260476, "learning_rate": 6.21164358844118e-06, "loss": 1.9550414085388184, "step": 1599 }, { "epoch": 1.4414414414414414, "grad_norm": 10.660692370132626, "learning_rate": 6.206557913703196e-06, "loss": 1.4929118156433105, "step": 1600 }, { "epoch": 1.4423423423423423, "grad_norm": 10.216387245363482, "learning_rate": 6.201470913176273e-06, "loss": 1.3300130367279053, "step": 1601 }, { "epoch": 1.4432432432432432, "grad_norm": 21.373978171219317, "learning_rate": 6.196382592450101e-06, "loss": 1.9349777698516846, "step": 1602 }, { "epoch": 1.4441441441441443, "grad_norm": 14.959338680440988, "learning_rate": 6.191292957115825e-06, "loss": 1.6885695457458496, "step": 1603 }, { "epoch": 1.445045045045045, "grad_norm": 10.201541049752565, "learning_rate": 6.186202012766036e-06, "loss": 1.5974383354187012, "step": 1604 }, { "epoch": 1.445945945945946, "grad_norm": 13.33819723535737, "learning_rate": 6.1811097649947574e-06, "loss": 1.3800368309020996, "step": 1605 }, { "epoch": 1.4468468468468467, "grad_norm": 9.264941433832085, "learning_rate": 6.176016219397452e-06, "loss": 1.209047555923462, "step": 1606 }, { "epoch": 1.4477477477477478, "grad_norm": 16.113363796632687, "learning_rate": 6.170921381571002e-06, "loss": 1.006246566772461, "step": 1607 }, { "epoch": 1.4486486486486487, "grad_norm": 16.770842222995086, "learning_rate": 6.165825257113713e-06, "loss": 1.6659287214279175, "step": 1608 }, { "epoch": 1.4495495495495496, "grad_norm": 16.039169570521924, "learning_rate": 6.160727851625307e-06, "loss": 3.2486722469329834, "step": 1609 }, { "epoch": 1.4504504504504505, "grad_norm": 17.357556570755342, "learning_rate": 6.155629170706911e-06, "loss": 2.889737606048584, "step": 1610 }, { "epoch": 1.4513513513513514, "grad_norm": 12.05780156243574, "learning_rate": 6.150529219961051e-06, "loss": 1.5399055480957031, "step": 1611 }, { "epoch": 1.4522522522522523, "grad_norm": 12.821440883999509, "learning_rate": 6.14542800499165e-06, "loss": 0.9202550649642944, "step": 1612 }, { "epoch": 1.4531531531531532, "grad_norm": 9.525863966614185, "learning_rate": 6.1403255314040236e-06, "loss": 1.334446668624878, "step": 1613 }, { "epoch": 1.454054054054054, "grad_norm": 11.119332552840138, "learning_rate": 6.135221804804865e-06, "loss": 1.0869536399841309, "step": 1614 }, { "epoch": 1.454954954954955, "grad_norm": 9.457639599285084, "learning_rate": 6.130116830802246e-06, "loss": 1.2879786491394043, "step": 1615 }, { "epoch": 1.4558558558558559, "grad_norm": 11.840952811412075, "learning_rate": 6.125010615005612e-06, "loss": 1.6483588218688965, "step": 1616 }, { "epoch": 1.4567567567567568, "grad_norm": 12.597190006246704, "learning_rate": 6.11990316302577e-06, "loss": 1.1935402154922485, "step": 1617 }, { "epoch": 1.4576576576576576, "grad_norm": 14.775593935385816, "learning_rate": 6.114794480474886e-06, "loss": 1.4351227283477783, "step": 1618 }, { "epoch": 1.4585585585585585, "grad_norm": 25.85373262150229, "learning_rate": 6.109684572966479e-06, "loss": 1.638490915298462, "step": 1619 }, { "epoch": 1.4594594594594594, "grad_norm": 7.955022606363988, "learning_rate": 6.104573446115411e-06, "loss": 1.7206358909606934, "step": 1620 }, { "epoch": 1.4603603603603603, "grad_norm": 8.620128522921192, "learning_rate": 6.099461105537889e-06, "loss": 2.0019052028656006, "step": 1621 }, { "epoch": 1.4612612612612612, "grad_norm": 13.946066909423006, "learning_rate": 6.094347556851449e-06, "loss": 1.5832326412200928, "step": 1622 }, { "epoch": 1.462162162162162, "grad_norm": 12.133350625112662, "learning_rate": 6.089232805674956e-06, "loss": 1.5703575611114502, "step": 1623 }, { "epoch": 1.463063063063063, "grad_norm": 9.00483359712496, "learning_rate": 6.084116857628597e-06, "loss": 1.1276874542236328, "step": 1624 }, { "epoch": 1.4639639639639639, "grad_norm": 9.763789195084014, "learning_rate": 6.078999718333873e-06, "loss": 2.4685914516448975, "step": 1625 }, { "epoch": 1.464864864864865, "grad_norm": 9.700531000079286, "learning_rate": 6.073881393413596e-06, "loss": 1.3841001987457275, "step": 1626 }, { "epoch": 1.4657657657657657, "grad_norm": 16.588302386398713, "learning_rate": 6.068761888491879e-06, "loss": 2.0603816509246826, "step": 1627 }, { "epoch": 1.4666666666666668, "grad_norm": 14.757717618867021, "learning_rate": 6.063641209194132e-06, "loss": 1.2781075239181519, "step": 1628 }, { "epoch": 1.4675675675675675, "grad_norm": 40.19571124648031, "learning_rate": 6.058519361147055e-06, "loss": 1.8549585342407227, "step": 1629 }, { "epoch": 1.4684684684684686, "grad_norm": 7.224974582225779, "learning_rate": 6.053396349978632e-06, "loss": 1.322721004486084, "step": 1630 }, { "epoch": 1.4693693693693692, "grad_norm": 17.844463774704433, "learning_rate": 6.048272181318128e-06, "loss": 1.6630491018295288, "step": 1631 }, { "epoch": 1.4702702702702704, "grad_norm": 11.434248056957271, "learning_rate": 6.043146860796076e-06, "loss": 1.1549161672592163, "step": 1632 }, { "epoch": 1.4711711711711712, "grad_norm": 11.432998465665783, "learning_rate": 6.0380203940442775e-06, "loss": 0.9584408402442932, "step": 1633 }, { "epoch": 1.4720720720720721, "grad_norm": 8.388264777564174, "learning_rate": 6.032892786695791e-06, "loss": 2.0273547172546387, "step": 1634 }, { "epoch": 1.472972972972973, "grad_norm": 10.0987785197442, "learning_rate": 6.0277640443849304e-06, "loss": 1.3959040641784668, "step": 1635 }, { "epoch": 1.473873873873874, "grad_norm": 15.271438862832918, "learning_rate": 6.022634172747256e-06, "loss": 1.2520636320114136, "step": 1636 }, { "epoch": 1.4747747747747748, "grad_norm": 14.927005011233385, "learning_rate": 6.017503177419567e-06, "loss": 2.620509147644043, "step": 1637 }, { "epoch": 1.4756756756756757, "grad_norm": 11.735603116312422, "learning_rate": 6.012371064039902e-06, "loss": 1.672066330909729, "step": 1638 }, { "epoch": 1.4765765765765766, "grad_norm": 13.62705118948423, "learning_rate": 6.007237838247526e-06, "loss": 2.0264129638671875, "step": 1639 }, { "epoch": 1.4774774774774775, "grad_norm": 8.554462912863533, "learning_rate": 6.0021035056829245e-06, "loss": 1.2257013320922852, "step": 1640 }, { "epoch": 1.4783783783783784, "grad_norm": 8.449478832103793, "learning_rate": 5.9969680719878e-06, "loss": 1.8023260831832886, "step": 1641 }, { "epoch": 1.4792792792792793, "grad_norm": 8.959188230712469, "learning_rate": 5.991831542805065e-06, "loss": 1.2051575183868408, "step": 1642 }, { "epoch": 1.4801801801801802, "grad_norm": 15.777369325858878, "learning_rate": 5.986693923778838e-06, "loss": 1.8904740810394287, "step": 1643 }, { "epoch": 1.481081081081081, "grad_norm": 10.411842713882923, "learning_rate": 5.9815552205544316e-06, "loss": 0.7278385162353516, "step": 1644 }, { "epoch": 1.481981981981982, "grad_norm": 8.146928013269534, "learning_rate": 5.97641543877835e-06, "loss": 1.5132724046707153, "step": 1645 }, { "epoch": 1.4828828828828828, "grad_norm": 11.882997439370762, "learning_rate": 5.971274584098288e-06, "loss": 1.256619930267334, "step": 1646 }, { "epoch": 1.4837837837837837, "grad_norm": 11.595953910475629, "learning_rate": 5.966132662163111e-06, "loss": 1.0904524326324463, "step": 1647 }, { "epoch": 1.4846846846846846, "grad_norm": 10.305489803373794, "learning_rate": 5.960989678622865e-06, "loss": 1.3187847137451172, "step": 1648 }, { "epoch": 1.4855855855855855, "grad_norm": 13.548112746313338, "learning_rate": 5.955845639128756e-06, "loss": 1.4859611988067627, "step": 1649 }, { "epoch": 1.4864864864864864, "grad_norm": 10.724179999334005, "learning_rate": 5.950700549333155e-06, "loss": 1.4655499458312988, "step": 1650 }, { "epoch": 1.4873873873873875, "grad_norm": 12.19615947777353, "learning_rate": 5.945554414889583e-06, "loss": 1.3756601810455322, "step": 1651 }, { "epoch": 1.4882882882882882, "grad_norm": 13.17831608172685, "learning_rate": 5.940407241452711e-06, "loss": 2.211275339126587, "step": 1652 }, { "epoch": 1.4891891891891893, "grad_norm": 18.11415047074662, "learning_rate": 5.935259034678355e-06, "loss": 0.9074662923812866, "step": 1653 }, { "epoch": 1.49009009009009, "grad_norm": 11.868427755788677, "learning_rate": 5.93010980022346e-06, "loss": 1.8241913318634033, "step": 1654 }, { "epoch": 1.490990990990991, "grad_norm": 11.664353534766478, "learning_rate": 5.924959543746106e-06, "loss": 2.207322120666504, "step": 1655 }, { "epoch": 1.491891891891892, "grad_norm": 8.330443411504472, "learning_rate": 5.919808270905492e-06, "loss": 1.806227684020996, "step": 1656 }, { "epoch": 1.4927927927927929, "grad_norm": 8.941231059731615, "learning_rate": 5.914655987361934e-06, "loss": 1.4988905191421509, "step": 1657 }, { "epoch": 1.4936936936936938, "grad_norm": 7.830759914268425, "learning_rate": 5.909502698776862e-06, "loss": 1.8874337673187256, "step": 1658 }, { "epoch": 1.4945945945945946, "grad_norm": 10.5751037090603, "learning_rate": 5.9043484108128065e-06, "loss": 1.1740607023239136, "step": 1659 }, { "epoch": 1.4954954954954955, "grad_norm": 10.412303621365872, "learning_rate": 5.8991931291334e-06, "loss": 2.1098806858062744, "step": 1660 }, { "epoch": 1.4963963963963964, "grad_norm": 9.858650119398847, "learning_rate": 5.894036859403363e-06, "loss": 1.1361764669418335, "step": 1661 }, { "epoch": 1.4972972972972973, "grad_norm": 10.787153898457976, "learning_rate": 5.8888796072885035e-06, "loss": 1.571702480316162, "step": 1662 }, { "epoch": 1.4981981981981982, "grad_norm": 12.97077720565757, "learning_rate": 5.883721378455709e-06, "loss": 1.1731760501861572, "step": 1663 }, { "epoch": 1.499099099099099, "grad_norm": 8.890718278014416, "learning_rate": 5.8785621785729404e-06, "loss": 1.6960201263427734, "step": 1664 }, { "epoch": 1.5, "grad_norm": 9.177425634084365, "learning_rate": 5.873402013309226e-06, "loss": 2.0487327575683594, "step": 1665 }, { "epoch": 1.500900900900901, "grad_norm": 19.481958445940464, "learning_rate": 5.8682408883346535e-06, "loss": 1.5003571510314941, "step": 1666 }, { "epoch": 1.5018018018018018, "grad_norm": 7.910706063031221, "learning_rate": 5.863078809320364e-06, "loss": 1.8180335760116577, "step": 1667 }, { "epoch": 1.5027027027027027, "grad_norm": 9.026031805006543, "learning_rate": 5.857915781938552e-06, "loss": 0.5815849900245667, "step": 1668 }, { "epoch": 1.5036036036036036, "grad_norm": 9.194787009241589, "learning_rate": 5.85275181186245e-06, "loss": 2.0344133377075195, "step": 1669 }, { "epoch": 1.5045045045045045, "grad_norm": 15.441711483190371, "learning_rate": 5.847586904766326e-06, "loss": 1.4995135068893433, "step": 1670 }, { "epoch": 1.5054054054054054, "grad_norm": 19.405524715570145, "learning_rate": 5.8424210663254785e-06, "loss": 1.014390468597412, "step": 1671 }, { "epoch": 1.5063063063063065, "grad_norm": 7.728607689108665, "learning_rate": 5.837254302216232e-06, "loss": 1.178196668624878, "step": 1672 }, { "epoch": 1.5072072072072071, "grad_norm": 7.933918814442345, "learning_rate": 5.832086618115924e-06, "loss": 1.8295114040374756, "step": 1673 }, { "epoch": 1.5081081081081082, "grad_norm": 14.836941228728008, "learning_rate": 5.8269180197029055e-06, "loss": 1.8722667694091797, "step": 1674 }, { "epoch": 1.509009009009009, "grad_norm": 11.861924285254538, "learning_rate": 5.821748512656531e-06, "loss": 1.7150132656097412, "step": 1675 }, { "epoch": 1.50990990990991, "grad_norm": 7.914972670958925, "learning_rate": 5.816578102657154e-06, "loss": 2.0199174880981445, "step": 1676 }, { "epoch": 1.5108108108108107, "grad_norm": 14.704452033368009, "learning_rate": 5.811406795386122e-06, "loss": 2.478482961654663, "step": 1677 }, { "epoch": 1.5117117117117118, "grad_norm": 20.401386884909293, "learning_rate": 5.806234596525763e-06, "loss": 2.1431708335876465, "step": 1678 }, { "epoch": 1.5126126126126125, "grad_norm": 9.657768229659336, "learning_rate": 5.80106151175939e-06, "loss": 1.2024478912353516, "step": 1679 }, { "epoch": 1.5135135135135136, "grad_norm": 11.154050310367278, "learning_rate": 5.795887546771286e-06, "loss": 1.7211270332336426, "step": 1680 }, { "epoch": 1.5144144144144143, "grad_norm": 27.025931308202402, "learning_rate": 5.790712707246705e-06, "loss": 1.498245358467102, "step": 1681 }, { "epoch": 1.5153153153153154, "grad_norm": 8.568964534712551, "learning_rate": 5.785536998871858e-06, "loss": 1.4259952306747437, "step": 1682 }, { "epoch": 1.516216216216216, "grad_norm": 9.352484629253393, "learning_rate": 5.780360427333915e-06, "loss": 1.5320580005645752, "step": 1683 }, { "epoch": 1.5171171171171172, "grad_norm": 19.846026084647807, "learning_rate": 5.77518299832099e-06, "loss": 1.1129813194274902, "step": 1684 }, { "epoch": 1.518018018018018, "grad_norm": 9.143607370757177, "learning_rate": 5.770004717522141e-06, "loss": 2.056257486343384, "step": 1685 }, { "epoch": 1.518918918918919, "grad_norm": 16.3647108204785, "learning_rate": 5.764825590627362e-06, "loss": 1.675489902496338, "step": 1686 }, { "epoch": 1.5198198198198198, "grad_norm": 8.620761350410266, "learning_rate": 5.75964562332758e-06, "loss": 1.0735529661178589, "step": 1687 }, { "epoch": 1.5207207207207207, "grad_norm": 11.949137784819353, "learning_rate": 5.754464821314637e-06, "loss": 1.6738760471343994, "step": 1688 }, { "epoch": 1.5216216216216216, "grad_norm": 13.20388841691068, "learning_rate": 5.749283190281301e-06, "loss": 2.174321413040161, "step": 1689 }, { "epoch": 1.5225225225225225, "grad_norm": 8.30531785385007, "learning_rate": 5.744100735921245e-06, "loss": 1.2775280475616455, "step": 1690 }, { "epoch": 1.5234234234234234, "grad_norm": 9.575544371832327, "learning_rate": 5.7389174639290526e-06, "loss": 1.6569616794586182, "step": 1691 }, { "epoch": 1.5243243243243243, "grad_norm": 11.02059585703722, "learning_rate": 5.733733380000199e-06, "loss": 1.7098231315612793, "step": 1692 }, { "epoch": 1.5252252252252252, "grad_norm": 20.77680238959511, "learning_rate": 5.728548489831057e-06, "loss": 1.6244301795959473, "step": 1693 }, { "epoch": 1.526126126126126, "grad_norm": 13.085006557615154, "learning_rate": 5.723362799118883e-06, "loss": 1.1960186958312988, "step": 1694 }, { "epoch": 1.527027027027027, "grad_norm": 16.9436732795327, "learning_rate": 5.718176313561812e-06, "loss": 1.2833318710327148, "step": 1695 }, { "epoch": 1.5279279279279279, "grad_norm": 11.56067198397842, "learning_rate": 5.712989038858855e-06, "loss": 1.3736417293548584, "step": 1696 }, { "epoch": 1.528828828828829, "grad_norm": 11.914661268488837, "learning_rate": 5.707800980709888e-06, "loss": 1.7185921669006348, "step": 1697 }, { "epoch": 1.5297297297297296, "grad_norm": 12.478648162885634, "learning_rate": 5.702612144815648e-06, "loss": 0.889504075050354, "step": 1698 }, { "epoch": 1.5306306306306308, "grad_norm": 12.015993636472965, "learning_rate": 5.697422536877728e-06, "loss": 1.2465369701385498, "step": 1699 }, { "epoch": 1.5315315315315314, "grad_norm": 8.569076120163366, "learning_rate": 5.69223216259857e-06, "loss": 1.6955018043518066, "step": 1700 }, { "epoch": 1.5324324324324325, "grad_norm": 11.079823892196782, "learning_rate": 5.687041027681455e-06, "loss": 2.3626818656921387, "step": 1701 }, { "epoch": 1.5333333333333332, "grad_norm": 14.626377664270034, "learning_rate": 5.681849137830501e-06, "loss": 1.3092429637908936, "step": 1702 }, { "epoch": 1.5342342342342343, "grad_norm": 13.003938549380914, "learning_rate": 5.6766564987506564e-06, "loss": 1.6559178829193115, "step": 1703 }, { "epoch": 1.535135135135135, "grad_norm": 30.983859642167147, "learning_rate": 5.671463116147693e-06, "loss": 1.196112871170044, "step": 1704 }, { "epoch": 1.5360360360360361, "grad_norm": 11.7516729348981, "learning_rate": 5.666268995728199e-06, "loss": 1.7353311777114868, "step": 1705 }, { "epoch": 1.5369369369369368, "grad_norm": 8.027133091016674, "learning_rate": 5.661074143199574e-06, "loss": 0.9091062545776367, "step": 1706 }, { "epoch": 1.537837837837838, "grad_norm": 16.1951342231858, "learning_rate": 5.65587856427002e-06, "loss": 0.7852965593338013, "step": 1707 }, { "epoch": 1.5387387387387388, "grad_norm": 20.435391009280067, "learning_rate": 5.650682264648539e-06, "loss": 0.6935000419616699, "step": 1708 }, { "epoch": 1.5396396396396397, "grad_norm": 12.942865389701538, "learning_rate": 5.645485250044925e-06, "loss": 1.6129305362701416, "step": 1709 }, { "epoch": 1.5405405405405406, "grad_norm": 10.064534723487958, "learning_rate": 5.640287526169758e-06, "loss": 1.5475406646728516, "step": 1710 }, { "epoch": 1.5414414414414415, "grad_norm": 10.937035308927271, "learning_rate": 5.635089098734394e-06, "loss": 1.6922842264175415, "step": 1711 }, { "epoch": 1.5423423423423424, "grad_norm": 11.282532754995433, "learning_rate": 5.629889973450967e-06, "loss": 1.1134803295135498, "step": 1712 }, { "epoch": 1.5432432432432432, "grad_norm": 20.10974367652297, "learning_rate": 5.624690156032375e-06, "loss": 1.522965908050537, "step": 1713 }, { "epoch": 1.5441441441441441, "grad_norm": 31.445859279573188, "learning_rate": 5.619489652192277e-06, "loss": 1.5033540725708008, "step": 1714 }, { "epoch": 1.545045045045045, "grad_norm": 26.19661678772121, "learning_rate": 5.614288467645085e-06, "loss": 1.8068926334381104, "step": 1715 }, { "epoch": 1.545945945945946, "grad_norm": 13.689355970616518, "learning_rate": 5.60908660810596e-06, "loss": 1.1300309896469116, "step": 1716 }, { "epoch": 1.5468468468468468, "grad_norm": 17.717202462674102, "learning_rate": 5.603884079290807e-06, "loss": 1.168104648590088, "step": 1717 }, { "epoch": 1.5477477477477477, "grad_norm": 13.78333679043539, "learning_rate": 5.598680886916262e-06, "loss": 2.086900234222412, "step": 1718 }, { "epoch": 1.5486486486486486, "grad_norm": 11.61939517847149, "learning_rate": 5.593477036699694e-06, "loss": 0.9420620203018188, "step": 1719 }, { "epoch": 1.5495495495495497, "grad_norm": 8.566983109466861, "learning_rate": 5.588272534359193e-06, "loss": 1.6835405826568604, "step": 1720 }, { "epoch": 1.5504504504504504, "grad_norm": 20.272241725615217, "learning_rate": 5.583067385613565e-06, "loss": 1.3769150972366333, "step": 1721 }, { "epoch": 1.5513513513513515, "grad_norm": 13.152434110198287, "learning_rate": 5.577861596182329e-06, "loss": 1.4473698139190674, "step": 1722 }, { "epoch": 1.5522522522522522, "grad_norm": 16.03627561622673, "learning_rate": 5.572655171785706e-06, "loss": 1.2154319286346436, "step": 1723 }, { "epoch": 1.5531531531531533, "grad_norm": 8.941137328941675, "learning_rate": 5.567448118144612e-06, "loss": 2.1888017654418945, "step": 1724 }, { "epoch": 1.554054054054054, "grad_norm": 8.394871606465358, "learning_rate": 5.56224044098066e-06, "loss": 1.4928936958312988, "step": 1725 }, { "epoch": 1.554954954954955, "grad_norm": 9.725576143990676, "learning_rate": 5.557032146016142e-06, "loss": 1.499352216720581, "step": 1726 }, { "epoch": 1.5558558558558557, "grad_norm": 11.049766302756778, "learning_rate": 5.551823238974036e-06, "loss": 1.7239288091659546, "step": 1727 }, { "epoch": 1.5567567567567568, "grad_norm": 11.428954415639115, "learning_rate": 5.5466137255779874e-06, "loss": 1.3868229389190674, "step": 1728 }, { "epoch": 1.5576576576576575, "grad_norm": 20.396513511623166, "learning_rate": 5.541403611552309e-06, "loss": 2.211121082305908, "step": 1729 }, { "epoch": 1.5585585585585586, "grad_norm": 13.881234123571051, "learning_rate": 5.536192902621975e-06, "loss": 1.5474152565002441, "step": 1730 }, { "epoch": 1.5594594594594593, "grad_norm": 9.726536291578002, "learning_rate": 5.530981604512612e-06, "loss": 2.0104358196258545, "step": 1731 }, { "epoch": 1.5603603603603604, "grad_norm": 12.36264130130095, "learning_rate": 5.525769722950491e-06, "loss": 1.5727704763412476, "step": 1732 }, { "epoch": 1.5612612612612613, "grad_norm": 12.5047381018751, "learning_rate": 5.520557263662533e-06, "loss": 1.171445369720459, "step": 1733 }, { "epoch": 1.5621621621621622, "grad_norm": 7.379902224066317, "learning_rate": 5.515344232376283e-06, "loss": 2.027517318725586, "step": 1734 }, { "epoch": 1.563063063063063, "grad_norm": 7.6975595525575, "learning_rate": 5.510130634819921e-06, "loss": 2.2303547859191895, "step": 1735 }, { "epoch": 1.563963963963964, "grad_norm": 8.677028985658822, "learning_rate": 5.504916476722249e-06, "loss": 1.9097020626068115, "step": 1736 }, { "epoch": 1.5648648648648649, "grad_norm": 13.031580998989343, "learning_rate": 5.499701763812684e-06, "loss": 1.7067222595214844, "step": 1737 }, { "epoch": 1.5657657657657658, "grad_norm": 9.184292289384498, "learning_rate": 5.49448650182125e-06, "loss": 1.273280382156372, "step": 1738 }, { "epoch": 1.5666666666666667, "grad_norm": 11.550196038454393, "learning_rate": 5.489270696478578e-06, "loss": 1.4262604713439941, "step": 1739 }, { "epoch": 1.5675675675675675, "grad_norm": 7.769261047923687, "learning_rate": 5.484054353515896e-06, "loss": 1.2348459959030151, "step": 1740 }, { "epoch": 1.5684684684684684, "grad_norm": 13.107939324830204, "learning_rate": 5.478837478665021e-06, "loss": 1.8473243713378906, "step": 1741 }, { "epoch": 1.5693693693693693, "grad_norm": 10.401820528121625, "learning_rate": 5.473620077658353e-06, "loss": 1.4473576545715332, "step": 1742 }, { "epoch": 1.5702702702702702, "grad_norm": 9.423733960457518, "learning_rate": 5.468402156228875e-06, "loss": 1.2356266975402832, "step": 1743 }, { "epoch": 1.571171171171171, "grad_norm": 10.585209187451408, "learning_rate": 5.463183720110138e-06, "loss": 1.097558617591858, "step": 1744 }, { "epoch": 1.5720720720720722, "grad_norm": 8.721091264497147, "learning_rate": 5.457964775036259e-06, "loss": 1.1964499950408936, "step": 1745 }, { "epoch": 1.572972972972973, "grad_norm": 14.234017672895392, "learning_rate": 5.452745326741914e-06, "loss": 2.836690664291382, "step": 1746 }, { "epoch": 1.573873873873874, "grad_norm": 14.692751474492763, "learning_rate": 5.447525380962334e-06, "loss": 1.6723198890686035, "step": 1747 }, { "epoch": 1.5747747747747747, "grad_norm": 9.03843708877778, "learning_rate": 5.442304943433294e-06, "loss": 1.8948137760162354, "step": 1748 }, { "epoch": 1.5756756756756758, "grad_norm": 10.540186751581782, "learning_rate": 5.437084019891113e-06, "loss": 1.5536619424819946, "step": 1749 }, { "epoch": 1.5765765765765765, "grad_norm": 9.353422818160505, "learning_rate": 5.431862616072643e-06, "loss": 1.621275544166565, "step": 1750 }, { "epoch": 1.5774774774774776, "grad_norm": 7.226380065522959, "learning_rate": 5.426640737715259e-06, "loss": 1.4333561658859253, "step": 1751 }, { "epoch": 1.5783783783783782, "grad_norm": 8.905206873635436, "learning_rate": 5.421418390556861e-06, "loss": 1.806396245956421, "step": 1752 }, { "epoch": 1.5792792792792794, "grad_norm": 14.590205105832894, "learning_rate": 5.416195580335864e-06, "loss": 2.2460813522338867, "step": 1753 }, { "epoch": 1.58018018018018, "grad_norm": 12.896655028517138, "learning_rate": 5.410972312791196e-06, "loss": 1.7828409671783447, "step": 1754 }, { "epoch": 1.5810810810810811, "grad_norm": 10.13319642764654, "learning_rate": 5.4057485936622774e-06, "loss": 1.285484790802002, "step": 1755 }, { "epoch": 1.581981981981982, "grad_norm": 12.578810943595805, "learning_rate": 5.400524428689035e-06, "loss": 1.4974174499511719, "step": 1756 }, { "epoch": 1.582882882882883, "grad_norm": 39.19894799894435, "learning_rate": 5.395299823611881e-06, "loss": 1.149336814880371, "step": 1757 }, { "epoch": 1.5837837837837838, "grad_norm": 8.933910764324592, "learning_rate": 5.390074784171711e-06, "loss": 2.2303504943847656, "step": 1758 }, { "epoch": 1.5846846846846847, "grad_norm": 8.087996858180993, "learning_rate": 5.384849316109897e-06, "loss": 1.6556181907653809, "step": 1759 }, { "epoch": 1.5855855855855856, "grad_norm": 15.824654305234416, "learning_rate": 5.379623425168287e-06, "loss": 1.2944085597991943, "step": 1760 }, { "epoch": 1.5864864864864865, "grad_norm": 10.838593459559144, "learning_rate": 5.374397117089185e-06, "loss": 0.8539993762969971, "step": 1761 }, { "epoch": 1.5873873873873874, "grad_norm": 13.54095066315367, "learning_rate": 5.369170397615361e-06, "loss": 1.1020762920379639, "step": 1762 }, { "epoch": 1.5882882882882883, "grad_norm": 8.534711679875066, "learning_rate": 5.363943272490034e-06, "loss": 1.6411093473434448, "step": 1763 }, { "epoch": 1.5891891891891892, "grad_norm": 6.1730102866116, "learning_rate": 5.358715747456871e-06, "loss": 1.6978633403778076, "step": 1764 }, { "epoch": 1.59009009009009, "grad_norm": 8.041044716644684, "learning_rate": 5.353487828259973e-06, "loss": 1.5700498819351196, "step": 1765 }, { "epoch": 1.590990990990991, "grad_norm": 11.102138549576248, "learning_rate": 5.348259520643883e-06, "loss": 1.0377918481826782, "step": 1766 }, { "epoch": 1.5918918918918918, "grad_norm": 12.738014608736016, "learning_rate": 5.343030830353561e-06, "loss": 1.8865203857421875, "step": 1767 }, { "epoch": 1.592792792792793, "grad_norm": 11.457612778203872, "learning_rate": 5.3378017631343925e-06, "loss": 1.6433229446411133, "step": 1768 }, { "epoch": 1.5936936936936936, "grad_norm": 14.71488254333933, "learning_rate": 5.332572324732178e-06, "loss": 1.7563650608062744, "step": 1769 }, { "epoch": 1.5945945945945947, "grad_norm": 17.721491557671634, "learning_rate": 5.327342520893125e-06, "loss": 1.1539019346237183, "step": 1770 }, { "epoch": 1.5954954954954954, "grad_norm": 8.71741568280385, "learning_rate": 5.322112357363841e-06, "loss": 2.0637054443359375, "step": 1771 }, { "epoch": 1.5963963963963965, "grad_norm": 12.56926543285426, "learning_rate": 5.31688183989133e-06, "loss": 1.3117108345031738, "step": 1772 }, { "epoch": 1.5972972972972972, "grad_norm": 8.865456653148819, "learning_rate": 5.311650974222986e-06, "loss": 1.6243171691894531, "step": 1773 }, { "epoch": 1.5981981981981983, "grad_norm": 12.345358106267252, "learning_rate": 5.306419766106582e-06, "loss": 0.6512848734855652, "step": 1774 }, { "epoch": 1.599099099099099, "grad_norm": 10.705630687496493, "learning_rate": 5.301188221290272e-06, "loss": 1.4087820053100586, "step": 1775 }, { "epoch": 1.6, "grad_norm": 10.741620683648545, "learning_rate": 5.295956345522576e-06, "loss": 1.1886143684387207, "step": 1776 }, { "epoch": 1.6009009009009008, "grad_norm": 8.543286773620101, "learning_rate": 5.290724144552379e-06, "loss": 1.4079172611236572, "step": 1777 }, { "epoch": 1.6018018018018019, "grad_norm": 13.272800580786697, "learning_rate": 5.285491624128927e-06, "loss": 1.1488702297210693, "step": 1778 }, { "epoch": 1.6027027027027025, "grad_norm": 14.426102880294756, "learning_rate": 5.280258790001809e-06, "loss": 0.9321128129959106, "step": 1779 }, { "epoch": 1.6036036036036037, "grad_norm": 9.548974634623296, "learning_rate": 5.275025647920966e-06, "loss": 1.1412519216537476, "step": 1780 }, { "epoch": 1.6045045045045045, "grad_norm": 20.524347880108646, "learning_rate": 5.2697922036366746e-06, "loss": 1.6211285591125488, "step": 1781 }, { "epoch": 1.6054054054054054, "grad_norm": 33.479578225974755, "learning_rate": 5.264558462899543e-06, "loss": 1.5719397068023682, "step": 1782 }, { "epoch": 1.6063063063063063, "grad_norm": 19.53752839452626, "learning_rate": 5.259324431460506e-06, "loss": 1.844597339630127, "step": 1783 }, { "epoch": 1.6072072072072072, "grad_norm": 16.542751722494142, "learning_rate": 5.254090115070818e-06, "loss": 1.5311559438705444, "step": 1784 }, { "epoch": 1.6081081081081081, "grad_norm": 8.724197779311208, "learning_rate": 5.248855519482043e-06, "loss": 1.4715955257415771, "step": 1785 }, { "epoch": 1.609009009009009, "grad_norm": 11.2231172158559, "learning_rate": 5.2436206504460605e-06, "loss": 1.6918704509735107, "step": 1786 }, { "epoch": 1.60990990990991, "grad_norm": 5.892152936833488, "learning_rate": 5.238385513715043e-06, "loss": 1.3249701261520386, "step": 1787 }, { "epoch": 1.6108108108108108, "grad_norm": 17.068558495770255, "learning_rate": 5.233150115041455e-06, "loss": 0.935366690158844, "step": 1788 }, { "epoch": 1.6117117117117117, "grad_norm": 14.047495217762876, "learning_rate": 5.227914460178057e-06, "loss": 1.8201367855072021, "step": 1789 }, { "epoch": 1.6126126126126126, "grad_norm": 11.293164291285848, "learning_rate": 5.222678554877886e-06, "loss": 2.1987149715423584, "step": 1790 }, { "epoch": 1.6135135135135135, "grad_norm": 22.676416411920346, "learning_rate": 5.217442404894254e-06, "loss": 1.5156608819961548, "step": 1791 }, { "epoch": 1.6144144144144144, "grad_norm": 7.958578598404144, "learning_rate": 5.212206015980742e-06, "loss": 1.5375053882598877, "step": 1792 }, { "epoch": 1.6153153153153155, "grad_norm": 9.514909529564704, "learning_rate": 5.206969393891197e-06, "loss": 1.2336030006408691, "step": 1793 }, { "epoch": 1.6162162162162161, "grad_norm": 12.967977313463441, "learning_rate": 5.201732544379718e-06, "loss": 1.5091674327850342, "step": 1794 }, { "epoch": 1.6171171171171173, "grad_norm": 35.3412053116069, "learning_rate": 5.196495473200656e-06, "loss": 2.655040979385376, "step": 1795 }, { "epoch": 1.618018018018018, "grad_norm": 10.153236749107771, "learning_rate": 5.191258186108608e-06, "loss": 2.105454921722412, "step": 1796 }, { "epoch": 1.618918918918919, "grad_norm": 9.554663494178893, "learning_rate": 5.1860206888584e-06, "loss": 1.0198447704315186, "step": 1797 }, { "epoch": 1.6198198198198197, "grad_norm": 14.959304262963258, "learning_rate": 5.180782987205096e-06, "loss": 1.8990309238433838, "step": 1798 }, { "epoch": 1.6207207207207208, "grad_norm": 7.995055189206548, "learning_rate": 5.175545086903985e-06, "loss": 1.9752475023269653, "step": 1799 }, { "epoch": 1.6216216216216215, "grad_norm": 7.132154343339352, "learning_rate": 5.170306993710569e-06, "loss": 1.1116163730621338, "step": 1800 }, { "epoch": 1.6225225225225226, "grad_norm": 20.544392234808946, "learning_rate": 5.165068713380568e-06, "loss": 2.2245161533355713, "step": 1801 }, { "epoch": 1.6234234234234233, "grad_norm": 8.74079077333362, "learning_rate": 5.159830251669904e-06, "loss": 1.6176211833953857, "step": 1802 }, { "epoch": 1.6243243243243244, "grad_norm": 17.127210113345892, "learning_rate": 5.154591614334698e-06, "loss": 1.2219548225402832, "step": 1803 }, { "epoch": 1.6252252252252253, "grad_norm": 6.946111506813401, "learning_rate": 5.149352807131266e-06, "loss": 1.9225727319717407, "step": 1804 }, { "epoch": 1.6261261261261262, "grad_norm": 11.802021809675379, "learning_rate": 5.14411383581611e-06, "loss": 1.5231215953826904, "step": 1805 }, { "epoch": 1.627027027027027, "grad_norm": 11.43398591743184, "learning_rate": 5.138874706145912e-06, "loss": 2.293747663497925, "step": 1806 }, { "epoch": 1.627927927927928, "grad_norm": 12.066185236189112, "learning_rate": 5.133635423877524e-06, "loss": 1.5310258865356445, "step": 1807 }, { "epoch": 1.6288288288288288, "grad_norm": 8.440505711955788, "learning_rate": 5.128395994767976e-06, "loss": 0.7817059755325317, "step": 1808 }, { "epoch": 1.6297297297297297, "grad_norm": 10.759503137406137, "learning_rate": 5.123156424574449e-06, "loss": 1.421698808670044, "step": 1809 }, { "epoch": 1.6306306306306306, "grad_norm": 11.512030401380272, "learning_rate": 5.117916719054285e-06, "loss": 1.6906330585479736, "step": 1810 }, { "epoch": 1.6315315315315315, "grad_norm": 13.917023003366065, "learning_rate": 5.112676883964972e-06, "loss": 2.2094297409057617, "step": 1811 }, { "epoch": 1.6324324324324324, "grad_norm": 13.373602393330966, "learning_rate": 5.107436925064141e-06, "loss": 1.8133379220962524, "step": 1812 }, { "epoch": 1.6333333333333333, "grad_norm": 8.79091168804635, "learning_rate": 5.102196848109558e-06, "loss": 1.3521404266357422, "step": 1813 }, { "epoch": 1.6342342342342342, "grad_norm": 9.774103988395167, "learning_rate": 5.096956658859122e-06, "loss": 1.5047627687454224, "step": 1814 }, { "epoch": 1.635135135135135, "grad_norm": 9.749121232105614, "learning_rate": 5.0917163630708535e-06, "loss": 0.7041552066802979, "step": 1815 }, { "epoch": 1.6360360360360362, "grad_norm": 9.645488273598785, "learning_rate": 5.0864759665028875e-06, "loss": 2.0276386737823486, "step": 1816 }, { "epoch": 1.6369369369369369, "grad_norm": 16.155605642009423, "learning_rate": 5.081235474913474e-06, "loss": 1.315154790878296, "step": 1817 }, { "epoch": 1.637837837837838, "grad_norm": 10.45011539448401, "learning_rate": 5.075994894060965e-06, "loss": 1.2299561500549316, "step": 1818 }, { "epoch": 1.6387387387387387, "grad_norm": 6.78701649907053, "learning_rate": 5.070754229703811e-06, "loss": 1.5950348377227783, "step": 1819 }, { "epoch": 1.6396396396396398, "grad_norm": 7.831513199875119, "learning_rate": 5.065513487600555e-06, "loss": 0.42251333594322205, "step": 1820 }, { "epoch": 1.6405405405405404, "grad_norm": 9.441732636387309, "learning_rate": 5.060272673509824e-06, "loss": 1.9003760814666748, "step": 1821 }, { "epoch": 1.6414414414414416, "grad_norm": 13.226111228686847, "learning_rate": 5.0550317931903236e-06, "loss": 1.4831870794296265, "step": 1822 }, { "epoch": 1.6423423423423422, "grad_norm": 21.681457173780473, "learning_rate": 5.049790852400837e-06, "loss": 2.1507914066314697, "step": 1823 }, { "epoch": 1.6432432432432433, "grad_norm": 9.197443881739524, "learning_rate": 5.044549856900207e-06, "loss": 0.9067004919052124, "step": 1824 }, { "epoch": 1.644144144144144, "grad_norm": 6.586339834129427, "learning_rate": 5.039308812447342e-06, "loss": 2.0299181938171387, "step": 1825 }, { "epoch": 1.6450450450450451, "grad_norm": 7.470865346958301, "learning_rate": 5.0340677248012e-06, "loss": 1.4668736457824707, "step": 1826 }, { "epoch": 1.6459459459459458, "grad_norm": 11.646958846660729, "learning_rate": 5.028826599720791e-06, "loss": 1.1238493919372559, "step": 1827 }, { "epoch": 1.646846846846847, "grad_norm": 12.009980530239458, "learning_rate": 5.023585442965162e-06, "loss": 1.087951421737671, "step": 1828 }, { "epoch": 1.6477477477477478, "grad_norm": 13.671737196875641, "learning_rate": 5.018344260293394e-06, "loss": 1.4537632465362549, "step": 1829 }, { "epoch": 1.6486486486486487, "grad_norm": 12.275631728640773, "learning_rate": 5.013103057464604e-06, "loss": 1.1609238386154175, "step": 1830 }, { "epoch": 1.6495495495495496, "grad_norm": 13.909632331811517, "learning_rate": 5.0078618402379235e-06, "loss": 0.9822266697883606, "step": 1831 }, { "epoch": 1.6504504504504505, "grad_norm": 12.335022091123836, "learning_rate": 5.002620614372502e-06, "loss": 1.4582247734069824, "step": 1832 }, { "epoch": 1.6513513513513514, "grad_norm": 15.567399091027983, "learning_rate": 4.997379385627499e-06, "loss": 1.7853538990020752, "step": 1833 }, { "epoch": 1.6522522522522523, "grad_norm": 8.448600924161148, "learning_rate": 4.992138159762077e-06, "loss": 1.6177634000778198, "step": 1834 }, { "epoch": 1.6531531531531531, "grad_norm": 18.989389160269997, "learning_rate": 4.986896942535397e-06, "loss": 1.8286075592041016, "step": 1835 }, { "epoch": 1.654054054054054, "grad_norm": 14.777730822153895, "learning_rate": 4.981655739706606e-06, "loss": 2.3525032997131348, "step": 1836 }, { "epoch": 1.654954954954955, "grad_norm": 12.034893940187724, "learning_rate": 4.97641455703484e-06, "loss": 1.6599555015563965, "step": 1837 }, { "epoch": 1.6558558558558558, "grad_norm": 11.424236746033749, "learning_rate": 4.971173400279211e-06, "loss": 1.7991001605987549, "step": 1838 }, { "epoch": 1.6567567567567567, "grad_norm": 17.93729333887496, "learning_rate": 4.965932275198801e-06, "loss": 1.4123523235321045, "step": 1839 }, { "epoch": 1.6576576576576576, "grad_norm": 14.405695155183214, "learning_rate": 4.9606911875526595e-06, "loss": 1.6130017042160034, "step": 1840 }, { "epoch": 1.6585585585585587, "grad_norm": 8.550653806048487, "learning_rate": 4.9554501430997935e-06, "loss": 1.1157453060150146, "step": 1841 }, { "epoch": 1.6594594594594594, "grad_norm": 13.963548010924033, "learning_rate": 4.950209147599164e-06, "loss": 1.7340162992477417, "step": 1842 }, { "epoch": 1.6603603603603605, "grad_norm": 12.618807401986361, "learning_rate": 4.944968206809678e-06, "loss": 1.807329535484314, "step": 1843 }, { "epoch": 1.6612612612612612, "grad_norm": 9.469283753876082, "learning_rate": 4.939727326490179e-06, "loss": 1.7297239303588867, "step": 1844 }, { "epoch": 1.6621621621621623, "grad_norm": 12.69181008288232, "learning_rate": 4.934486512399448e-06, "loss": 1.4359819889068604, "step": 1845 }, { "epoch": 1.663063063063063, "grad_norm": 10.506967926904066, "learning_rate": 4.929245770296191e-06, "loss": 0.88924640417099, "step": 1846 }, { "epoch": 1.663963963963964, "grad_norm": 12.951834717347314, "learning_rate": 4.924005105939037e-06, "loss": 2.525228500366211, "step": 1847 }, { "epoch": 1.6648648648648647, "grad_norm": 11.919812373158688, "learning_rate": 4.918764525086526e-06, "loss": 2.649840831756592, "step": 1848 }, { "epoch": 1.6657657657657658, "grad_norm": 12.59000885635357, "learning_rate": 4.9135240334971125e-06, "loss": 1.0599262714385986, "step": 1849 }, { "epoch": 1.6666666666666665, "grad_norm": 13.510155903609554, "learning_rate": 4.908283636929148e-06, "loss": 1.9727380275726318, "step": 1850 }, { "epoch": 1.6675675675675676, "grad_norm": 10.023354144746676, "learning_rate": 4.903043341140879e-06, "loss": 1.5703070163726807, "step": 1851 }, { "epoch": 1.6684684684684683, "grad_norm": 12.25730601159229, "learning_rate": 4.8978031518904426e-06, "loss": 2.0800280570983887, "step": 1852 }, { "epoch": 1.6693693693693694, "grad_norm": 13.031520450060414, "learning_rate": 4.892563074935861e-06, "loss": 1.8855412006378174, "step": 1853 }, { "epoch": 1.6702702702702703, "grad_norm": 12.694316598911586, "learning_rate": 4.88732311603503e-06, "loss": 1.389702320098877, "step": 1854 }, { "epoch": 1.6711711711711712, "grad_norm": 13.196528378231587, "learning_rate": 4.882083280945716e-06, "loss": 1.3638392686843872, "step": 1855 }, { "epoch": 1.672072072072072, "grad_norm": 36.57858549033672, "learning_rate": 4.876843575425552e-06, "loss": 1.2863314151763916, "step": 1856 }, { "epoch": 1.672972972972973, "grad_norm": 11.214173760175132, "learning_rate": 4.871604005232025e-06, "loss": 2.1213583946228027, "step": 1857 }, { "epoch": 1.6738738738738739, "grad_norm": 15.26985886525236, "learning_rate": 4.866364576122477e-06, "loss": 1.1910454034805298, "step": 1858 }, { "epoch": 1.6747747747747748, "grad_norm": 7.650950317476867, "learning_rate": 4.8611252938540905e-06, "loss": 1.617013931274414, "step": 1859 }, { "epoch": 1.6756756756756757, "grad_norm": 6.697009447144261, "learning_rate": 4.8558861641838914e-06, "loss": 1.7851452827453613, "step": 1860 }, { "epoch": 1.6765765765765765, "grad_norm": 36.276983437220125, "learning_rate": 4.8506471928687355e-06, "loss": 0.9960858821868896, "step": 1861 }, { "epoch": 1.6774774774774774, "grad_norm": 12.587915346906154, "learning_rate": 4.845408385665304e-06, "loss": 1.7082439661026, "step": 1862 }, { "epoch": 1.6783783783783783, "grad_norm": 9.952914402871658, "learning_rate": 4.840169748330096e-06, "loss": 2.0728540420532227, "step": 1863 }, { "epoch": 1.6792792792792792, "grad_norm": 8.780047498997055, "learning_rate": 4.834931286619432e-06, "loss": 1.2778596878051758, "step": 1864 }, { "epoch": 1.6801801801801801, "grad_norm": 12.42477262833344, "learning_rate": 4.829693006289431e-06, "loss": 1.4588884115219116, "step": 1865 }, { "epoch": 1.6810810810810812, "grad_norm": 23.60159068391552, "learning_rate": 4.824454913096017e-06, "loss": 1.6938081979751587, "step": 1866 }, { "epoch": 1.681981981981982, "grad_norm": 10.090972321761608, "learning_rate": 4.819217012794905e-06, "loss": 1.3040815591812134, "step": 1867 }, { "epoch": 1.682882882882883, "grad_norm": 14.158069020228154, "learning_rate": 4.813979311141602e-06, "loss": 1.5980745553970337, "step": 1868 }, { "epoch": 1.6837837837837837, "grad_norm": 13.735311953873568, "learning_rate": 4.808741813891394e-06, "loss": 1.662085771560669, "step": 1869 }, { "epoch": 1.6846846846846848, "grad_norm": 6.729855693752189, "learning_rate": 4.8035045267993445e-06, "loss": 1.634281873703003, "step": 1870 }, { "epoch": 1.6855855855855855, "grad_norm": 22.744365791952895, "learning_rate": 4.798267455620283e-06, "loss": 1.6238887310028076, "step": 1871 }, { "epoch": 1.6864864864864866, "grad_norm": 18.216565260056957, "learning_rate": 4.793030606108805e-06, "loss": 2.6606647968292236, "step": 1872 }, { "epoch": 1.6873873873873872, "grad_norm": 9.77827469578083, "learning_rate": 4.78779398401926e-06, "loss": 1.3882408142089844, "step": 1873 }, { "epoch": 1.6882882882882884, "grad_norm": 12.694831394117665, "learning_rate": 4.782557595105749e-06, "loss": 1.601452112197876, "step": 1874 }, { "epoch": 1.689189189189189, "grad_norm": 10.809963035776216, "learning_rate": 4.7773214451221165e-06, "loss": 1.9865121841430664, "step": 1875 }, { "epoch": 1.6900900900900901, "grad_norm": 10.18731847385368, "learning_rate": 4.772085539821945e-06, "loss": 2.2083988189697266, "step": 1876 }, { "epoch": 1.690990990990991, "grad_norm": 30.780898769798974, "learning_rate": 4.766849884958546e-06, "loss": 1.2041118144989014, "step": 1877 }, { "epoch": 1.691891891891892, "grad_norm": 10.945051063745945, "learning_rate": 4.7616144862849585e-06, "loss": 1.120283603668213, "step": 1878 }, { "epoch": 1.6927927927927928, "grad_norm": 12.9577090634596, "learning_rate": 4.7563793495539395e-06, "loss": 1.7876710891723633, "step": 1879 }, { "epoch": 1.6936936936936937, "grad_norm": 10.344690917084307, "learning_rate": 4.751144480517956e-06, "loss": 1.6284270286560059, "step": 1880 }, { "epoch": 1.6945945945945946, "grad_norm": 22.650389896351047, "learning_rate": 4.745909884929184e-06, "loss": 1.6889030933380127, "step": 1881 }, { "epoch": 1.6954954954954955, "grad_norm": 8.62342726183916, "learning_rate": 4.740675568539495e-06, "loss": 1.451640009880066, "step": 1882 }, { "epoch": 1.6963963963963964, "grad_norm": 10.835080585069939, "learning_rate": 4.735441537100458e-06, "loss": 1.3494681119918823, "step": 1883 }, { "epoch": 1.6972972972972973, "grad_norm": 9.526831764008596, "learning_rate": 4.730207796363327e-06, "loss": 1.5760974884033203, "step": 1884 }, { "epoch": 1.6981981981981982, "grad_norm": 10.205558172881307, "learning_rate": 4.724974352079036e-06, "loss": 1.8778183460235596, "step": 1885 }, { "epoch": 1.699099099099099, "grad_norm": 13.132706909089642, "learning_rate": 4.719741209998192e-06, "loss": 1.5913633108139038, "step": 1886 }, { "epoch": 1.7, "grad_norm": 8.838642913832645, "learning_rate": 4.714508375871075e-06, "loss": 2.1396777629852295, "step": 1887 }, { "epoch": 1.7009009009009008, "grad_norm": 10.790780937843527, "learning_rate": 4.7092758554476215e-06, "loss": 1.3998993635177612, "step": 1888 }, { "epoch": 1.701801801801802, "grad_norm": 13.017536027213275, "learning_rate": 4.704043654477426e-06, "loss": 1.4358144998550415, "step": 1889 }, { "epoch": 1.7027027027027026, "grad_norm": 11.746019198220758, "learning_rate": 4.6988117787097306e-06, "loss": 1.917391061782837, "step": 1890 }, { "epoch": 1.7036036036036037, "grad_norm": 8.085372419455812, "learning_rate": 4.69358023389342e-06, "loss": 1.343592882156372, "step": 1891 }, { "epoch": 1.7045045045045044, "grad_norm": 14.492111970042975, "learning_rate": 4.688349025777015e-06, "loss": 1.737886905670166, "step": 1892 }, { "epoch": 1.7054054054054055, "grad_norm": 11.989231765241929, "learning_rate": 4.683118160108669e-06, "loss": 1.2686560153961182, "step": 1893 }, { "epoch": 1.7063063063063062, "grad_norm": 10.070388036081532, "learning_rate": 4.6778876426361594e-06, "loss": 1.584463357925415, "step": 1894 }, { "epoch": 1.7072072072072073, "grad_norm": 11.768188931183234, "learning_rate": 4.672657479106875e-06, "loss": 1.7363247871398926, "step": 1895 }, { "epoch": 1.708108108108108, "grad_norm": 23.170647656149796, "learning_rate": 4.667427675267823e-06, "loss": 1.6051385402679443, "step": 1896 }, { "epoch": 1.709009009009009, "grad_norm": 6.505681805310561, "learning_rate": 4.662198236865609e-06, "loss": 1.6003258228302002, "step": 1897 }, { "epoch": 1.7099099099099098, "grad_norm": 15.029094086005635, "learning_rate": 4.656969169646441e-06, "loss": 0.6877315044403076, "step": 1898 }, { "epoch": 1.7108108108108109, "grad_norm": 12.380703102012152, "learning_rate": 4.65174047935612e-06, "loss": 1.3788490295410156, "step": 1899 }, { "epoch": 1.7117117117117115, "grad_norm": 11.386064056229571, "learning_rate": 4.646512171740028e-06, "loss": 2.066871404647827, "step": 1900 }, { "epoch": 1.7126126126126127, "grad_norm": 17.705891829662622, "learning_rate": 4.641284252543131e-06, "loss": 1.9048995971679688, "step": 1901 }, { "epoch": 1.7135135135135136, "grad_norm": 9.36166670759488, "learning_rate": 4.636056727509968e-06, "loss": 1.6398744583129883, "step": 1902 }, { "epoch": 1.7144144144144144, "grad_norm": 9.126165176019333, "learning_rate": 4.630829602384641e-06, "loss": 1.5823161602020264, "step": 1903 }, { "epoch": 1.7153153153153153, "grad_norm": 34.45379460464936, "learning_rate": 4.625602882910818e-06, "loss": 1.6188974380493164, "step": 1904 }, { "epoch": 1.7162162162162162, "grad_norm": 9.611917464319482, "learning_rate": 4.620376574831717e-06, "loss": 1.4678549766540527, "step": 1905 }, { "epoch": 1.7171171171171171, "grad_norm": 13.379680968485829, "learning_rate": 4.615150683890105e-06, "loss": 1.2325408458709717, "step": 1906 }, { "epoch": 1.718018018018018, "grad_norm": 9.333383096793895, "learning_rate": 4.60992521582829e-06, "loss": 1.7048616409301758, "step": 1907 }, { "epoch": 1.718918918918919, "grad_norm": 8.377997409792348, "learning_rate": 4.604700176388119e-06, "loss": 1.4476325511932373, "step": 1908 }, { "epoch": 1.7198198198198198, "grad_norm": 15.18463966415058, "learning_rate": 4.599475571310965e-06, "loss": 2.0498039722442627, "step": 1909 }, { "epoch": 1.7207207207207207, "grad_norm": 13.635293999015182, "learning_rate": 4.594251406337723e-06, "loss": 1.41183340549469, "step": 1910 }, { "epoch": 1.7216216216216216, "grad_norm": 9.619050169190075, "learning_rate": 4.589027687208806e-06, "loss": 1.4390678405761719, "step": 1911 }, { "epoch": 1.7225225225225225, "grad_norm": 8.691323448200201, "learning_rate": 4.583804419664137e-06, "loss": 1.912278175354004, "step": 1912 }, { "epoch": 1.7234234234234234, "grad_norm": 13.907079897704138, "learning_rate": 4.578581609443141e-06, "loss": 1.561707615852356, "step": 1913 }, { "epoch": 1.7243243243243245, "grad_norm": 12.471220009886467, "learning_rate": 4.573359262284744e-06, "loss": 1.1934659481048584, "step": 1914 }, { "epoch": 1.7252252252252251, "grad_norm": 9.836124267121887, "learning_rate": 4.568137383927359e-06, "loss": 1.3664239645004272, "step": 1915 }, { "epoch": 1.7261261261261263, "grad_norm": 13.055513206884156, "learning_rate": 4.562915980108888e-06, "loss": 1.6567761898040771, "step": 1916 }, { "epoch": 1.727027027027027, "grad_norm": 12.097454766477828, "learning_rate": 4.557695056566707e-06, "loss": 1.1626321077346802, "step": 1917 }, { "epoch": 1.727927927927928, "grad_norm": 23.041165401436277, "learning_rate": 4.552474619037669e-06, "loss": 2.7990541458129883, "step": 1918 }, { "epoch": 1.7288288288288287, "grad_norm": 7.122789479043114, "learning_rate": 4.547254673258089e-06, "loss": 1.6740970611572266, "step": 1919 }, { "epoch": 1.7297297297297298, "grad_norm": 9.542192387325457, "learning_rate": 4.5420352249637445e-06, "loss": 1.7170947790145874, "step": 1920 }, { "epoch": 1.7306306306306305, "grad_norm": 8.482009364764991, "learning_rate": 4.5368162798898655e-06, "loss": 0.8922778964042664, "step": 1921 }, { "epoch": 1.7315315315315316, "grad_norm": 18.77987322013907, "learning_rate": 4.531597843771125e-06, "loss": 1.7073520421981812, "step": 1922 }, { "epoch": 1.7324324324324323, "grad_norm": 11.291633682693858, "learning_rate": 4.5263799223416476e-06, "loss": 0.8407334089279175, "step": 1923 }, { "epoch": 1.7333333333333334, "grad_norm": 8.443523222421167, "learning_rate": 4.521162521334981e-06, "loss": 1.8258352279663086, "step": 1924 }, { "epoch": 1.7342342342342343, "grad_norm": 12.141582208802781, "learning_rate": 4.515945646484105e-06, "loss": 2.1571366786956787, "step": 1925 }, { "epoch": 1.7351351351351352, "grad_norm": 8.941945890884975, "learning_rate": 4.5107293035214224e-06, "loss": 1.1934655904769897, "step": 1926 }, { "epoch": 1.736036036036036, "grad_norm": 14.470333982533512, "learning_rate": 4.505513498178752e-06, "loss": 1.6931910514831543, "step": 1927 }, { "epoch": 1.736936936936937, "grad_norm": 11.323491588803767, "learning_rate": 4.500298236187318e-06, "loss": 1.9419209957122803, "step": 1928 }, { "epoch": 1.7378378378378379, "grad_norm": 10.616763433886799, "learning_rate": 4.495083523277752e-06, "loss": 1.0829826593399048, "step": 1929 }, { "epoch": 1.7387387387387387, "grad_norm": 12.848320162302201, "learning_rate": 4.48986936518008e-06, "loss": 1.0987441539764404, "step": 1930 }, { "epoch": 1.7396396396396396, "grad_norm": 13.297450232626861, "learning_rate": 4.484655767623719e-06, "loss": 1.629490852355957, "step": 1931 }, { "epoch": 1.7405405405405405, "grad_norm": 12.042664808161364, "learning_rate": 4.47944273633747e-06, "loss": 1.637264370918274, "step": 1932 }, { "epoch": 1.7414414414414414, "grad_norm": 13.081268204915583, "learning_rate": 4.47423027704951e-06, "loss": 1.182558536529541, "step": 1933 }, { "epoch": 1.7423423423423423, "grad_norm": 11.812617882184464, "learning_rate": 4.46901839548739e-06, "loss": 0.9834614992141724, "step": 1934 }, { "epoch": 1.7432432432432432, "grad_norm": 8.428541833076737, "learning_rate": 4.463807097378026e-06, "loss": 1.5438402891159058, "step": 1935 }, { "epoch": 1.744144144144144, "grad_norm": 15.459648349861814, "learning_rate": 4.458596388447691e-06, "loss": 2.0940542221069336, "step": 1936 }, { "epoch": 1.7450450450450452, "grad_norm": 10.376271715098074, "learning_rate": 4.453386274422013e-06, "loss": 2.4595947265625, "step": 1937 }, { "epoch": 1.7459459459459459, "grad_norm": 12.982535008277809, "learning_rate": 4.448176761025964e-06, "loss": 1.204673409461975, "step": 1938 }, { "epoch": 1.746846846846847, "grad_norm": 8.878620923656074, "learning_rate": 4.442967853983858e-06, "loss": 1.2832019329071045, "step": 1939 }, { "epoch": 1.7477477477477477, "grad_norm": 18.68623231270244, "learning_rate": 4.4377595590193425e-06, "loss": 1.9358878135681152, "step": 1940 }, { "epoch": 1.7486486486486488, "grad_norm": 7.264887516302695, "learning_rate": 4.432551881855389e-06, "loss": 1.9682033061981201, "step": 1941 }, { "epoch": 1.7495495495495494, "grad_norm": 9.547635344931772, "learning_rate": 4.4273448282142955e-06, "loss": 1.5022704601287842, "step": 1942 }, { "epoch": 1.7504504504504506, "grad_norm": 9.1275832195882, "learning_rate": 4.4221384038176715e-06, "loss": 1.5394527912139893, "step": 1943 }, { "epoch": 1.7513513513513512, "grad_norm": 9.38133220802807, "learning_rate": 4.416932614386436e-06, "loss": 2.5494089126586914, "step": 1944 }, { "epoch": 1.7522522522522523, "grad_norm": 11.591914168149954, "learning_rate": 4.411727465640808e-06, "loss": 0.8944557905197144, "step": 1945 }, { "epoch": 1.753153153153153, "grad_norm": 22.8172078054111, "learning_rate": 4.4065229633003075e-06, "loss": 1.611649751663208, "step": 1946 }, { "epoch": 1.7540540540540541, "grad_norm": 11.109839607950518, "learning_rate": 4.401319113083739e-06, "loss": 1.329512119293213, "step": 1947 }, { "epoch": 1.7549549549549548, "grad_norm": 21.985675185072974, "learning_rate": 4.3961159207091956e-06, "loss": 1.5959854125976562, "step": 1948 }, { "epoch": 1.755855855855856, "grad_norm": 17.05162633790248, "learning_rate": 4.390913391894042e-06, "loss": 1.1783095598220825, "step": 1949 }, { "epoch": 1.7567567567567568, "grad_norm": 16.460328325388797, "learning_rate": 4.385711532354918e-06, "loss": 1.4390449523925781, "step": 1950 }, { "epoch": 1.7576576576576577, "grad_norm": 16.566363080592808, "learning_rate": 4.380510347807725e-06, "loss": 1.1258951425552368, "step": 1951 }, { "epoch": 1.7585585585585586, "grad_norm": 12.633480932184735, "learning_rate": 4.375309843967626e-06, "loss": 1.5847431421279907, "step": 1952 }, { "epoch": 1.7594594594594595, "grad_norm": 10.331704659363535, "learning_rate": 4.370110026549034e-06, "loss": 1.314765214920044, "step": 1953 }, { "epoch": 1.7603603603603604, "grad_norm": 11.237550533850884, "learning_rate": 4.364910901265607e-06, "loss": 1.1468602418899536, "step": 1954 }, { "epoch": 1.7612612612612613, "grad_norm": 24.912977361024677, "learning_rate": 4.359712473830243e-06, "loss": 1.5914771556854248, "step": 1955 }, { "epoch": 1.7621621621621621, "grad_norm": 10.381393773513075, "learning_rate": 4.354514749955076e-06, "loss": 1.424102544784546, "step": 1956 }, { "epoch": 1.763063063063063, "grad_norm": 9.357062157493605, "learning_rate": 4.3493177353514624e-06, "loss": 1.5307369232177734, "step": 1957 }, { "epoch": 1.763963963963964, "grad_norm": 12.328032504587473, "learning_rate": 4.344121435729982e-06, "loss": 2.0486297607421875, "step": 1958 }, { "epoch": 1.7648648648648648, "grad_norm": 10.662234297040511, "learning_rate": 4.338925856800427e-06, "loss": 1.174267292022705, "step": 1959 }, { "epoch": 1.7657657657657657, "grad_norm": 17.990451738218127, "learning_rate": 4.333731004271802e-06, "loss": 1.854561448097229, "step": 1960 }, { "epoch": 1.7666666666666666, "grad_norm": 9.265543688096725, "learning_rate": 4.328536883852308e-06, "loss": 1.2113375663757324, "step": 1961 }, { "epoch": 1.7675675675675677, "grad_norm": 10.947719937058705, "learning_rate": 4.323343501249346e-06, "loss": 1.1589584350585938, "step": 1962 }, { "epoch": 1.7684684684684684, "grad_norm": 10.753799580953297, "learning_rate": 4.318150862169503e-06, "loss": 1.3148674964904785, "step": 1963 }, { "epoch": 1.7693693693693695, "grad_norm": 12.437389422374402, "learning_rate": 4.312958972318549e-06, "loss": 1.6055564880371094, "step": 1964 }, { "epoch": 1.7702702702702702, "grad_norm": 9.17029071274015, "learning_rate": 4.307767837401432e-06, "loss": 1.801576018333435, "step": 1965 }, { "epoch": 1.7711711711711713, "grad_norm": 9.802356077955512, "learning_rate": 4.302577463122272e-06, "loss": 1.3246842622756958, "step": 1966 }, { "epoch": 1.772072072072072, "grad_norm": 10.63612762092608, "learning_rate": 4.297387855184353e-06, "loss": 1.4052033424377441, "step": 1967 }, { "epoch": 1.772972972972973, "grad_norm": 9.098823159090228, "learning_rate": 4.292199019290113e-06, "loss": 1.206218957901001, "step": 1968 }, { "epoch": 1.7738738738738737, "grad_norm": 11.419309891598209, "learning_rate": 4.287010961141146e-06, "loss": 1.620813012123108, "step": 1969 }, { "epoch": 1.7747747747747749, "grad_norm": 24.34857001055044, "learning_rate": 4.281823686438189e-06, "loss": 0.8904290199279785, "step": 1970 }, { "epoch": 1.7756756756756755, "grad_norm": 22.417586388160636, "learning_rate": 4.2766372008811185e-06, "loss": 0.9551545977592468, "step": 1971 }, { "epoch": 1.7765765765765766, "grad_norm": 15.312906156689655, "learning_rate": 4.2714515101689434e-06, "loss": 1.7072341442108154, "step": 1972 }, { "epoch": 1.7774774774774775, "grad_norm": 7.434001990764085, "learning_rate": 4.2662666199998015e-06, "loss": 1.5110739469528198, "step": 1973 }, { "epoch": 1.7783783783783784, "grad_norm": 13.199118475520589, "learning_rate": 4.261082536070949e-06, "loss": 1.3560364246368408, "step": 1974 }, { "epoch": 1.7792792792792793, "grad_norm": 14.73411806424218, "learning_rate": 4.255899264078756e-06, "loss": 2.025747299194336, "step": 1975 }, { "epoch": 1.7801801801801802, "grad_norm": 7.606460147575554, "learning_rate": 4.250716809718702e-06, "loss": 1.3578187227249146, "step": 1976 }, { "epoch": 1.781081081081081, "grad_norm": 10.633263492608826, "learning_rate": 4.245535178685365e-06, "loss": 1.8711591958999634, "step": 1977 }, { "epoch": 1.781981981981982, "grad_norm": 12.640152634057468, "learning_rate": 4.240354376672423e-06, "loss": 1.2626956701278687, "step": 1978 }, { "epoch": 1.7828828828828829, "grad_norm": 8.48581873036355, "learning_rate": 4.235174409372639e-06, "loss": 1.4237213134765625, "step": 1979 }, { "epoch": 1.7837837837837838, "grad_norm": 9.10623029922968, "learning_rate": 4.229995282477861e-06, "loss": 1.2612204551696777, "step": 1980 }, { "epoch": 1.7846846846846847, "grad_norm": 12.122076224231822, "learning_rate": 4.224817001679011e-06, "loss": 2.193499803543091, "step": 1981 }, { "epoch": 1.7855855855855856, "grad_norm": 16.09150960239607, "learning_rate": 4.219639572666086e-06, "loss": 1.6633172035217285, "step": 1982 }, { "epoch": 1.7864864864864864, "grad_norm": 12.738883623853681, "learning_rate": 4.214463001128142e-06, "loss": 1.3485755920410156, "step": 1983 }, { "epoch": 1.7873873873873873, "grad_norm": 9.290550916969964, "learning_rate": 4.209287292753296e-06, "loss": 1.4145152568817139, "step": 1984 }, { "epoch": 1.7882882882882885, "grad_norm": 18.95130486017209, "learning_rate": 4.2041124532287144e-06, "loss": 2.6751809120178223, "step": 1985 }, { "epoch": 1.7891891891891891, "grad_norm": 8.393607675732447, "learning_rate": 4.198938488240612e-06, "loss": 1.5532233715057373, "step": 1986 }, { "epoch": 1.7900900900900902, "grad_norm": 11.174186055406079, "learning_rate": 4.193765403474239e-06, "loss": 2.401888370513916, "step": 1987 }, { "epoch": 1.790990990990991, "grad_norm": 17.38838180555356, "learning_rate": 4.18859320461388e-06, "loss": 2.247199058532715, "step": 1988 }, { "epoch": 1.791891891891892, "grad_norm": 13.023152953286782, "learning_rate": 4.183421897342847e-06, "loss": 0.7688824534416199, "step": 1989 }, { "epoch": 1.7927927927927927, "grad_norm": 12.317225228851253, "learning_rate": 4.178251487343471e-06, "loss": 1.072916030883789, "step": 1990 }, { "epoch": 1.7936936936936938, "grad_norm": 25.611755204557735, "learning_rate": 4.173081980297097e-06, "loss": 1.5963506698608398, "step": 1991 }, { "epoch": 1.7945945945945945, "grad_norm": 9.788119112625036, "learning_rate": 4.167913381884078e-06, "loss": 2.787838935852051, "step": 1992 }, { "epoch": 1.7954954954954956, "grad_norm": 11.369520756068557, "learning_rate": 4.162745697783771e-06, "loss": 1.4114885330200195, "step": 1993 }, { "epoch": 1.7963963963963963, "grad_norm": 14.440209012246697, "learning_rate": 4.157578933674523e-06, "loss": 1.4251551628112793, "step": 1994 }, { "epoch": 1.7972972972972974, "grad_norm": 11.441179344455747, "learning_rate": 4.152413095233675e-06, "loss": 1.057724118232727, "step": 1995 }, { "epoch": 1.798198198198198, "grad_norm": 8.968340436397817, "learning_rate": 4.147248188137552e-06, "loss": 1.0680241584777832, "step": 1996 }, { "epoch": 1.7990990990990992, "grad_norm": 8.745257607417827, "learning_rate": 4.142084218061449e-06, "loss": 1.6237149238586426, "step": 1997 }, { "epoch": 1.8, "grad_norm": 10.871073066257322, "learning_rate": 4.1369211906796365e-06, "loss": 1.2855372428894043, "step": 1998 }, { "epoch": 1.800900900900901, "grad_norm": 10.979385256727682, "learning_rate": 4.131759111665349e-06, "loss": 1.7986273765563965, "step": 1999 }, { "epoch": 1.8018018018018018, "grad_norm": 14.11274565981479, "learning_rate": 4.126597986690775e-06, "loss": 1.5659823417663574, "step": 2000 }, { "epoch": 1.8027027027027027, "grad_norm": 16.204666886335296, "learning_rate": 4.12143782142706e-06, "loss": 1.4156365394592285, "step": 2001 }, { "epoch": 1.8036036036036036, "grad_norm": 9.815542164364134, "learning_rate": 4.1162786215442925e-06, "loss": 1.0431914329528809, "step": 2002 }, { "epoch": 1.8045045045045045, "grad_norm": 10.84105100773819, "learning_rate": 4.111120392711498e-06, "loss": 2.210622787475586, "step": 2003 }, { "epoch": 1.8054054054054054, "grad_norm": 9.559989900443458, "learning_rate": 4.105963140596639e-06, "loss": 1.242011308670044, "step": 2004 }, { "epoch": 1.8063063063063063, "grad_norm": 8.568770279944586, "learning_rate": 4.1008068708666014e-06, "loss": 1.619928002357483, "step": 2005 }, { "epoch": 1.8072072072072072, "grad_norm": 14.442304362577028, "learning_rate": 4.095651589187194e-06, "loss": 1.4707170724868774, "step": 2006 }, { "epoch": 1.808108108108108, "grad_norm": 14.493957616165693, "learning_rate": 4.090497301223139e-06, "loss": 1.3726030588150024, "step": 2007 }, { "epoch": 1.809009009009009, "grad_norm": 11.818396980596093, "learning_rate": 4.085344012638067e-06, "loss": 1.193473219871521, "step": 2008 }, { "epoch": 1.8099099099099099, "grad_norm": 11.66989118748204, "learning_rate": 4.080191729094511e-06, "loss": 2.0238192081451416, "step": 2009 }, { "epoch": 1.810810810810811, "grad_norm": 9.628899081637774, "learning_rate": 4.075040456253895e-06, "loss": 0.8842580318450928, "step": 2010 }, { "epoch": 1.8117117117117116, "grad_norm": 12.899685441367888, "learning_rate": 4.06989019977654e-06, "loss": 1.4153761863708496, "step": 2011 }, { "epoch": 1.8126126126126128, "grad_norm": 10.384908596450975, "learning_rate": 4.064740965321645e-06, "loss": 1.5833828449249268, "step": 2012 }, { "epoch": 1.8135135135135134, "grad_norm": 11.176020026024228, "learning_rate": 4.059592758547289e-06, "loss": 1.6319794654846191, "step": 2013 }, { "epoch": 1.8144144144144145, "grad_norm": 17.27729200701665, "learning_rate": 4.054445585110418e-06, "loss": 1.8185272216796875, "step": 2014 }, { "epoch": 1.8153153153153152, "grad_norm": 8.923348346894493, "learning_rate": 4.049299450666847e-06, "loss": 1.4681587219238281, "step": 2015 }, { "epoch": 1.8162162162162163, "grad_norm": 19.601648325047385, "learning_rate": 4.044154360871246e-06, "loss": 1.7325223684310913, "step": 2016 }, { "epoch": 1.817117117117117, "grad_norm": 24.610158924892566, "learning_rate": 4.039010321377137e-06, "loss": 1.5184123516082764, "step": 2017 }, { "epoch": 1.818018018018018, "grad_norm": 14.186394154039885, "learning_rate": 4.03386733783689e-06, "loss": 1.391357183456421, "step": 2018 }, { "epoch": 1.8189189189189188, "grad_norm": 11.364046322795115, "learning_rate": 4.028725415901714e-06, "loss": 1.3705822229385376, "step": 2019 }, { "epoch": 1.8198198198198199, "grad_norm": 7.1758672636868255, "learning_rate": 4.023584561221651e-06, "loss": 1.6241412162780762, "step": 2020 }, { "epoch": 1.8207207207207208, "grad_norm": 19.287585618713152, "learning_rate": 4.018444779445571e-06, "loss": 1.5234278440475464, "step": 2021 }, { "epoch": 1.8216216216216217, "grad_norm": 12.373016754651946, "learning_rate": 4.013306076221164e-06, "loss": 1.373924732208252, "step": 2022 }, { "epoch": 1.8225225225225226, "grad_norm": 8.830996176619, "learning_rate": 4.008168457194937e-06, "loss": 2.0185816287994385, "step": 2023 }, { "epoch": 1.8234234234234235, "grad_norm": 16.200118685216125, "learning_rate": 4.003031928012202e-06, "loss": 1.0929310321807861, "step": 2024 }, { "epoch": 1.8243243243243243, "grad_norm": 13.89448554252347, "learning_rate": 3.997896494317076e-06, "loss": 1.8505454063415527, "step": 2025 }, { "epoch": 1.8252252252252252, "grad_norm": 7.941034603458671, "learning_rate": 3.992762161752474e-06, "loss": 1.5198599100112915, "step": 2026 }, { "epoch": 1.8261261261261261, "grad_norm": 13.682362887924588, "learning_rate": 3.987628935960098e-06, "loss": 0.6701310873031616, "step": 2027 }, { "epoch": 1.827027027027027, "grad_norm": 56.10692524433349, "learning_rate": 3.982496822580434e-06, "loss": 1.7380964756011963, "step": 2028 }, { "epoch": 1.827927927927928, "grad_norm": 13.889223460128088, "learning_rate": 3.977365827252746e-06, "loss": 1.376246690750122, "step": 2029 }, { "epoch": 1.8288288288288288, "grad_norm": 11.281727205358694, "learning_rate": 3.972235955615071e-06, "loss": 1.7691125869750977, "step": 2030 }, { "epoch": 1.8297297297297297, "grad_norm": 13.021620717233445, "learning_rate": 3.9671072133042105e-06, "loss": 1.7032856941223145, "step": 2031 }, { "epoch": 1.8306306306306306, "grad_norm": 18.866824844356582, "learning_rate": 3.961979605955724e-06, "loss": 0.9565985798835754, "step": 2032 }, { "epoch": 1.8315315315315317, "grad_norm": 12.209220860886418, "learning_rate": 3.956853139203925e-06, "loss": 2.214951992034912, "step": 2033 }, { "epoch": 1.8324324324324324, "grad_norm": 11.197802721466198, "learning_rate": 3.951727818681873e-06, "loss": 1.6807488203048706, "step": 2034 }, { "epoch": 1.8333333333333335, "grad_norm": 7.408176024204171, "learning_rate": 3.94660365002137e-06, "loss": 1.29439115524292, "step": 2035 }, { "epoch": 1.8342342342342342, "grad_norm": 11.600022628467501, "learning_rate": 3.941480638852948e-06, "loss": 1.2479655742645264, "step": 2036 }, { "epoch": 1.8351351351351353, "grad_norm": 17.70228851131073, "learning_rate": 3.936358790805871e-06, "loss": 1.2770302295684814, "step": 2037 }, { "epoch": 1.836036036036036, "grad_norm": 11.067481542528773, "learning_rate": 3.931238111508124e-06, "loss": 1.8459141254425049, "step": 2038 }, { "epoch": 1.836936936936937, "grad_norm": 8.970115289359327, "learning_rate": 3.926118606586406e-06, "loss": 1.5907042026519775, "step": 2039 }, { "epoch": 1.8378378378378377, "grad_norm": 10.064834908224393, "learning_rate": 3.921000281666127e-06, "loss": 0.8717818260192871, "step": 2040 }, { "epoch": 1.8387387387387388, "grad_norm": 12.644321201888557, "learning_rate": 3.915883142371404e-06, "loss": 1.409450650215149, "step": 2041 }, { "epoch": 1.8396396396396395, "grad_norm": 10.237275872132916, "learning_rate": 3.910767194325045e-06, "loss": 1.5621337890625, "step": 2042 }, { "epoch": 1.8405405405405406, "grad_norm": 11.149022032267956, "learning_rate": 3.905652443148553e-06, "loss": 1.0682541131973267, "step": 2043 }, { "epoch": 1.8414414414414413, "grad_norm": 18.164644817812203, "learning_rate": 3.900538894462112e-06, "loss": 1.3991050720214844, "step": 2044 }, { "epoch": 1.8423423423423424, "grad_norm": 10.057496316068189, "learning_rate": 3.89542655388459e-06, "loss": 2.2244114875793457, "step": 2045 }, { "epoch": 1.8432432432432433, "grad_norm": 11.730127903497804, "learning_rate": 3.890315427033522e-06, "loss": 2.156032085418701, "step": 2046 }, { "epoch": 1.8441441441441442, "grad_norm": 8.810198754812639, "learning_rate": 3.8852055195251146e-06, "loss": 1.881744384765625, "step": 2047 }, { "epoch": 1.845045045045045, "grad_norm": 14.642924662376911, "learning_rate": 3.8800968369742305e-06, "loss": 1.666663408279419, "step": 2048 }, { "epoch": 1.845945945945946, "grad_norm": 11.472829693779188, "learning_rate": 3.874989384994389e-06, "loss": 1.7958859205245972, "step": 2049 }, { "epoch": 1.8468468468468469, "grad_norm": 10.161389492280458, "learning_rate": 3.869883169197755e-06, "loss": 1.6539031267166138, "step": 2050 }, { "epoch": 1.8477477477477477, "grad_norm": 7.353611430960883, "learning_rate": 3.864778195195138e-06, "loss": 1.7106571197509766, "step": 2051 }, { "epoch": 1.8486486486486486, "grad_norm": 20.54870601764021, "learning_rate": 3.859674468595979e-06, "loss": 1.6156736612319946, "step": 2052 }, { "epoch": 1.8495495495495495, "grad_norm": 9.524807801494127, "learning_rate": 3.854571995008351e-06, "loss": 2.0353589057922363, "step": 2053 }, { "epoch": 1.8504504504504504, "grad_norm": 12.56311071345327, "learning_rate": 3.84947078003895e-06, "loss": 1.525078535079956, "step": 2054 }, { "epoch": 1.8513513513513513, "grad_norm": 10.92868839801951, "learning_rate": 3.8443708292930894e-06, "loss": 1.5466748476028442, "step": 2055 }, { "epoch": 1.8522522522522522, "grad_norm": 14.1266404726735, "learning_rate": 3.839272148374692e-06, "loss": 1.6088988780975342, "step": 2056 }, { "epoch": 1.853153153153153, "grad_norm": 10.38242638563608, "learning_rate": 3.834174742886287e-06, "loss": 1.7105833292007446, "step": 2057 }, { "epoch": 1.8540540540540542, "grad_norm": 17.121790212262344, "learning_rate": 3.829078618429e-06, "loss": 1.685943603515625, "step": 2058 }, { "epoch": 1.8549549549549549, "grad_norm": 9.579254857244152, "learning_rate": 3.823983780602551e-06, "loss": 1.669414758682251, "step": 2059 }, { "epoch": 1.855855855855856, "grad_norm": 10.041877543818236, "learning_rate": 3.818890235005243e-06, "loss": 2.121381998062134, "step": 2060 }, { "epoch": 1.8567567567567567, "grad_norm": 11.146971685571359, "learning_rate": 3.813797987233965e-06, "loss": 1.9388622045516968, "step": 2061 }, { "epoch": 1.8576576576576578, "grad_norm": 14.236058720958708, "learning_rate": 3.808707042884176e-06, "loss": 1.1108877658843994, "step": 2062 }, { "epoch": 1.8585585585585584, "grad_norm": 15.905826190064678, "learning_rate": 3.803617407549901e-06, "loss": 1.5356963872909546, "step": 2063 }, { "epoch": 1.8594594594594596, "grad_norm": 12.655561573381654, "learning_rate": 3.798529086823729e-06, "loss": 1.0193653106689453, "step": 2064 }, { "epoch": 1.8603603603603602, "grad_norm": 10.153514091883679, "learning_rate": 3.7934420862968045e-06, "loss": 1.2517184019088745, "step": 2065 }, { "epoch": 1.8612612612612613, "grad_norm": 8.058012080834468, "learning_rate": 3.7883564115588223e-06, "loss": 1.4862089157104492, "step": 2066 }, { "epoch": 1.862162162162162, "grad_norm": 18.065678174274783, "learning_rate": 3.7832720681980183e-06, "loss": 0.9980199337005615, "step": 2067 }, { "epoch": 1.8630630630630631, "grad_norm": 24.62683147181859, "learning_rate": 3.7781890618011667e-06, "loss": 1.6335004568099976, "step": 2068 }, { "epoch": 1.8639639639639638, "grad_norm": 8.125913454982724, "learning_rate": 3.7731073979535706e-06, "loss": 1.5620460510253906, "step": 2069 }, { "epoch": 1.864864864864865, "grad_norm": 10.208284037925765, "learning_rate": 3.768027082239062e-06, "loss": 1.3698681592941284, "step": 2070 }, { "epoch": 1.8657657657657658, "grad_norm": 15.872653901224046, "learning_rate": 3.7629481202399886e-06, "loss": 1.7767051458358765, "step": 2071 }, { "epoch": 1.8666666666666667, "grad_norm": 11.434399437622693, "learning_rate": 3.75787051753721e-06, "loss": 1.7132368087768555, "step": 2072 }, { "epoch": 1.8675675675675676, "grad_norm": 11.78079658204757, "learning_rate": 3.752794279710094e-06, "loss": 2.404029130935669, "step": 2073 }, { "epoch": 1.8684684684684685, "grad_norm": 16.2721496234969, "learning_rate": 3.747719412336508e-06, "loss": 1.6003530025482178, "step": 2074 }, { "epoch": 1.8693693693693694, "grad_norm": 8.750799125851948, "learning_rate": 3.7426459209928133e-06, "loss": 1.8089443445205688, "step": 2075 }, { "epoch": 1.8702702702702703, "grad_norm": 7.842869294288517, "learning_rate": 3.737573811253859e-06, "loss": 1.1806695461273193, "step": 2076 }, { "epoch": 1.8711711711711712, "grad_norm": 8.460636971047276, "learning_rate": 3.7325030886929767e-06, "loss": 1.5709047317504883, "step": 2077 }, { "epoch": 1.872072072072072, "grad_norm": 10.385083814761153, "learning_rate": 3.7274337588819743e-06, "loss": 1.1126965284347534, "step": 2078 }, { "epoch": 1.872972972972973, "grad_norm": 12.998565648430477, "learning_rate": 3.7223658273911267e-06, "loss": 1.9489595890045166, "step": 2079 }, { "epoch": 1.8738738738738738, "grad_norm": 13.096941955859592, "learning_rate": 3.7172992997891756e-06, "loss": 1.5732641220092773, "step": 2080 }, { "epoch": 1.8747747747747747, "grad_norm": 15.28789120739809, "learning_rate": 3.7122341816433173e-06, "loss": 1.1677942276000977, "step": 2081 }, { "epoch": 1.8756756756756756, "grad_norm": 9.603294742851899, "learning_rate": 3.707170478519203e-06, "loss": 1.341094970703125, "step": 2082 }, { "epoch": 1.8765765765765767, "grad_norm": 9.288471831260694, "learning_rate": 3.7021081959809237e-06, "loss": 1.9096192121505737, "step": 2083 }, { "epoch": 1.8774774774774774, "grad_norm": 8.025780045255638, "learning_rate": 3.6970473395910115e-06, "loss": 1.5327026844024658, "step": 2084 }, { "epoch": 1.8783783783783785, "grad_norm": 10.867053119082584, "learning_rate": 3.691987914910437e-06, "loss": 1.304749608039856, "step": 2085 }, { "epoch": 1.8792792792792792, "grad_norm": 12.023574933479532, "learning_rate": 3.6869299274985903e-06, "loss": 1.155665636062622, "step": 2086 }, { "epoch": 1.8801801801801803, "grad_norm": 11.74997341729003, "learning_rate": 3.6818733829132845e-06, "loss": 1.6288031339645386, "step": 2087 }, { "epoch": 1.881081081081081, "grad_norm": 9.644983289206198, "learning_rate": 3.6768182867107485e-06, "loss": 1.62825608253479, "step": 2088 }, { "epoch": 1.881981981981982, "grad_norm": 12.911515113377188, "learning_rate": 3.6717646444456196e-06, "loss": 1.0108637809753418, "step": 2089 }, { "epoch": 1.8828828828828827, "grad_norm": 11.753309993592506, "learning_rate": 3.6667124616709337e-06, "loss": 1.2084550857543945, "step": 2090 }, { "epoch": 1.8837837837837839, "grad_norm": 13.616855367290347, "learning_rate": 3.6616617439381286e-06, "loss": 1.5335865020751953, "step": 2091 }, { "epoch": 1.8846846846846845, "grad_norm": 9.235423436766794, "learning_rate": 3.6566124967970286e-06, "loss": 1.4122389554977417, "step": 2092 }, { "epoch": 1.8855855855855856, "grad_norm": 10.201106705652542, "learning_rate": 3.651564725795843e-06, "loss": 1.2618002891540527, "step": 2093 }, { "epoch": 1.8864864864864865, "grad_norm": 15.52679894691076, "learning_rate": 3.64651843648116e-06, "loss": 1.1889724731445312, "step": 2094 }, { "epoch": 1.8873873873873874, "grad_norm": 10.143947076783371, "learning_rate": 3.6414736343979383e-06, "loss": 1.6448466777801514, "step": 2095 }, { "epoch": 1.8882882882882883, "grad_norm": 9.168651128851902, "learning_rate": 3.6364303250895032e-06, "loss": 1.091215968132019, "step": 2096 }, { "epoch": 1.8891891891891892, "grad_norm": 10.798141475360113, "learning_rate": 3.63138851409754e-06, "loss": 2.1353302001953125, "step": 2097 }, { "epoch": 1.89009009009009, "grad_norm": 8.01749766654077, "learning_rate": 3.626348206962087e-06, "loss": 1.6346862316131592, "step": 2098 }, { "epoch": 1.890990990990991, "grad_norm": 8.849643010393113, "learning_rate": 3.6213094092215284e-06, "loss": 1.44803786277771, "step": 2099 }, { "epoch": 1.8918918918918919, "grad_norm": 12.374021630104043, "learning_rate": 3.6162721264125943e-06, "loss": 1.6346116065979004, "step": 2100 }, { "epoch": 1.8927927927927928, "grad_norm": 9.596398515770996, "learning_rate": 3.6112363640703474e-06, "loss": 1.6425296068191528, "step": 2101 }, { "epoch": 1.8936936936936937, "grad_norm": 12.020509416799094, "learning_rate": 3.606202127728178e-06, "loss": 1.595349669456482, "step": 2102 }, { "epoch": 1.8945945945945946, "grad_norm": 12.07767233630599, "learning_rate": 3.6011694229178027e-06, "loss": 1.664405107498169, "step": 2103 }, { "epoch": 1.8954954954954955, "grad_norm": 6.388417918849233, "learning_rate": 3.596138255169254e-06, "loss": 1.658945083618164, "step": 2104 }, { "epoch": 1.8963963963963963, "grad_norm": 16.91008304211875, "learning_rate": 3.591108630010874e-06, "loss": 1.4404268264770508, "step": 2105 }, { "epoch": 1.8972972972972975, "grad_norm": 13.151558828904543, "learning_rate": 3.586080552969312e-06, "loss": 2.126494884490967, "step": 2106 }, { "epoch": 1.8981981981981981, "grad_norm": 15.00565265407829, "learning_rate": 3.581054029569516e-06, "loss": 1.6209547519683838, "step": 2107 }, { "epoch": 1.8990990990990992, "grad_norm": 13.23640949470636, "learning_rate": 3.576029065334725e-06, "loss": 2.09350323677063, "step": 2108 }, { "epoch": 1.9, "grad_norm": 10.747731590654224, "learning_rate": 3.5710056657864683e-06, "loss": 1.1772202253341675, "step": 2109 }, { "epoch": 1.900900900900901, "grad_norm": 9.279118499789343, "learning_rate": 3.5659838364445505e-06, "loss": 1.4582226276397705, "step": 2110 }, { "epoch": 1.9018018018018017, "grad_norm": 6.78203843235967, "learning_rate": 3.5609635828270545e-06, "loss": 1.1998049020767212, "step": 2111 }, { "epoch": 1.9027027027027028, "grad_norm": 14.825188149461917, "learning_rate": 3.555944910450332e-06, "loss": 1.1677322387695312, "step": 2112 }, { "epoch": 1.9036036036036035, "grad_norm": 15.56932293596776, "learning_rate": 3.5509278248289957e-06, "loss": 2.0692138671875, "step": 2113 }, { "epoch": 1.9045045045045046, "grad_norm": 12.284534694391617, "learning_rate": 3.5459123314759137e-06, "loss": 1.833665370941162, "step": 2114 }, { "epoch": 1.9054054054054053, "grad_norm": 12.717321831126291, "learning_rate": 3.54089843590221e-06, "loss": 1.417661190032959, "step": 2115 }, { "epoch": 1.9063063063063064, "grad_norm": 12.746155447547558, "learning_rate": 3.5358861436172487e-06, "loss": 1.0046995878219604, "step": 2116 }, { "epoch": 1.907207207207207, "grad_norm": 16.910773646485442, "learning_rate": 3.53087546012863e-06, "loss": 1.0042613744735718, "step": 2117 }, { "epoch": 1.9081081081081082, "grad_norm": 14.210173017449977, "learning_rate": 3.5258663909421893e-06, "loss": 1.5375925302505493, "step": 2118 }, { "epoch": 1.909009009009009, "grad_norm": 11.302587028164004, "learning_rate": 3.5208589415619886e-06, "loss": 0.9769325256347656, "step": 2119 }, { "epoch": 1.90990990990991, "grad_norm": 7.415677960684316, "learning_rate": 3.5158531174903086e-06, "loss": 1.5886321067810059, "step": 2120 }, { "epoch": 1.9108108108108108, "grad_norm": 14.621743259705289, "learning_rate": 3.5108489242276455e-06, "loss": 1.2349965572357178, "step": 2121 }, { "epoch": 1.9117117117117117, "grad_norm": 10.825543596160664, "learning_rate": 3.5058463672727015e-06, "loss": 1.5445003509521484, "step": 2122 }, { "epoch": 1.9126126126126126, "grad_norm": 8.560089551257851, "learning_rate": 3.5008454521223833e-06, "loss": 1.0274848937988281, "step": 2123 }, { "epoch": 1.9135135135135135, "grad_norm": 16.895379816187045, "learning_rate": 3.4958461842717916e-06, "loss": 1.7597068548202515, "step": 2124 }, { "epoch": 1.9144144144144144, "grad_norm": 9.569345852519174, "learning_rate": 3.4908485692142167e-06, "loss": 0.9353008270263672, "step": 2125 }, { "epoch": 1.9153153153153153, "grad_norm": 14.676927795457946, "learning_rate": 3.4858526124411356e-06, "loss": 1.1858336925506592, "step": 2126 }, { "epoch": 1.9162162162162162, "grad_norm": 12.758919264221314, "learning_rate": 3.4808583194421996e-06, "loss": 0.9151158332824707, "step": 2127 }, { "epoch": 1.917117117117117, "grad_norm": 16.25087984986083, "learning_rate": 3.475865695705234e-06, "loss": 1.7295141220092773, "step": 2128 }, { "epoch": 1.918018018018018, "grad_norm": 9.315357505018591, "learning_rate": 3.47087474671623e-06, "loss": 1.395228385925293, "step": 2129 }, { "epoch": 1.9189189189189189, "grad_norm": 14.907889654172648, "learning_rate": 3.4658854779593375e-06, "loss": 1.025083303451538, "step": 2130 }, { "epoch": 1.91981981981982, "grad_norm": 27.159363933495012, "learning_rate": 3.4608978949168615e-06, "loss": 1.0848424434661865, "step": 2131 }, { "epoch": 1.9207207207207206, "grad_norm": 7.798929240714956, "learning_rate": 3.4559120030692516e-06, "loss": 1.5797157287597656, "step": 2132 }, { "epoch": 1.9216216216216218, "grad_norm": 11.79739917942566, "learning_rate": 3.450927807895103e-06, "loss": 1.4331327676773071, "step": 2133 }, { "epoch": 1.9225225225225224, "grad_norm": 16.084261449585295, "learning_rate": 3.4459453148711443e-06, "loss": 2.5673320293426514, "step": 2134 }, { "epoch": 1.9234234234234235, "grad_norm": 9.48737229453046, "learning_rate": 3.440964529472235e-06, "loss": 1.3175442218780518, "step": 2135 }, { "epoch": 1.9243243243243242, "grad_norm": 12.449238180735826, "learning_rate": 3.435985457171356e-06, "loss": 1.2205897569656372, "step": 2136 }, { "epoch": 1.9252252252252253, "grad_norm": 10.017944527452725, "learning_rate": 3.431008103439608e-06, "loss": 1.3516299724578857, "step": 2137 }, { "epoch": 1.926126126126126, "grad_norm": 9.489999545183824, "learning_rate": 3.4260324737462024e-06, "loss": 2.0574636459350586, "step": 2138 }, { "epoch": 1.927027027027027, "grad_norm": 8.714596265903568, "learning_rate": 3.4210585735584566e-06, "loss": 1.8997373580932617, "step": 2139 }, { "epoch": 1.9279279279279278, "grad_norm": 9.410757997128773, "learning_rate": 3.4160864083417876e-06, "loss": 1.4841136932373047, "step": 2140 }, { "epoch": 1.928828828828829, "grad_norm": 7.0051548060462325, "learning_rate": 3.4111159835597053e-06, "loss": 1.2358179092407227, "step": 2141 }, { "epoch": 1.9297297297297298, "grad_norm": 7.660808937674649, "learning_rate": 3.406147304673808e-06, "loss": 1.7413294315338135, "step": 2142 }, { "epoch": 1.9306306306306307, "grad_norm": 10.034783141698217, "learning_rate": 3.401180377143774e-06, "loss": 1.3427538871765137, "step": 2143 }, { "epoch": 1.9315315315315316, "grad_norm": 11.602028892577103, "learning_rate": 3.39621520642736e-06, "loss": 2.106019973754883, "step": 2144 }, { "epoch": 1.9324324324324325, "grad_norm": 13.051085698353752, "learning_rate": 3.391251797980391e-06, "loss": 1.3242912292480469, "step": 2145 }, { "epoch": 1.9333333333333333, "grad_norm": 11.717082233526542, "learning_rate": 3.386290157256754e-06, "loss": 1.0651776790618896, "step": 2146 }, { "epoch": 1.9342342342342342, "grad_norm": 12.567908857251185, "learning_rate": 3.3813302897083955e-06, "loss": 1.1179988384246826, "step": 2147 }, { "epoch": 1.9351351351351351, "grad_norm": 12.175470882803538, "learning_rate": 3.376372200785312e-06, "loss": 1.5827984809875488, "step": 2148 }, { "epoch": 1.936036036036036, "grad_norm": 9.893188916383231, "learning_rate": 3.371415895935548e-06, "loss": 1.4257056713104248, "step": 2149 }, { "epoch": 1.936936936936937, "grad_norm": 12.599640421518274, "learning_rate": 3.366461380605185e-06, "loss": 1.3703809976577759, "step": 2150 }, { "epoch": 1.9378378378378378, "grad_norm": 9.390541515646184, "learning_rate": 3.3615086602383394e-06, "loss": 1.428502082824707, "step": 2151 }, { "epoch": 1.9387387387387387, "grad_norm": 11.01785269445174, "learning_rate": 3.3565577402771566e-06, "loss": 1.135480284690857, "step": 2152 }, { "epoch": 1.9396396396396396, "grad_norm": 18.738315054280818, "learning_rate": 3.3516086261618e-06, "loss": 1.8752212524414062, "step": 2153 }, { "epoch": 1.9405405405405407, "grad_norm": 7.545214626155076, "learning_rate": 3.346661323330453e-06, "loss": 2.4570391178131104, "step": 2154 }, { "epoch": 1.9414414414414414, "grad_norm": 6.884765031860276, "learning_rate": 3.3417158372193064e-06, "loss": 1.3177688121795654, "step": 2155 }, { "epoch": 1.9423423423423425, "grad_norm": 20.957711722167538, "learning_rate": 3.3367721732625537e-06, "loss": 1.6490633487701416, "step": 2156 }, { "epoch": 1.9432432432432432, "grad_norm": 15.566743430273707, "learning_rate": 3.331830336892388e-06, "loss": 1.9409370422363281, "step": 2157 }, { "epoch": 1.9441441441441443, "grad_norm": 10.7281788973491, "learning_rate": 3.3268903335389923e-06, "loss": 1.3840687274932861, "step": 2158 }, { "epoch": 1.945045045045045, "grad_norm": 16.113661929909828, "learning_rate": 3.3219521686305413e-06, "loss": 1.8886069059371948, "step": 2159 }, { "epoch": 1.945945945945946, "grad_norm": 11.2864377632836, "learning_rate": 3.317015847593181e-06, "loss": 1.6758947372436523, "step": 2160 }, { "epoch": 1.9468468468468467, "grad_norm": 9.345236184007552, "learning_rate": 3.3120813758510385e-06, "loss": 1.097003698348999, "step": 2161 }, { "epoch": 1.9477477477477478, "grad_norm": 18.376686260812527, "learning_rate": 3.3071487588262045e-06, "loss": 2.1929831504821777, "step": 2162 }, { "epoch": 1.9486486486486485, "grad_norm": 7.93695872362548, "learning_rate": 3.302218001938732e-06, "loss": 1.305293083190918, "step": 2163 }, { "epoch": 1.9495495495495496, "grad_norm": 7.1626307957024435, "learning_rate": 3.297289110606633e-06, "loss": 1.2249070405960083, "step": 2164 }, { "epoch": 1.9504504504504503, "grad_norm": 14.021793433378013, "learning_rate": 3.2923620902458652e-06, "loss": 1.3577923774719238, "step": 2165 }, { "epoch": 1.9513513513513514, "grad_norm": 10.25590694676422, "learning_rate": 3.2874369462703353e-06, "loss": 1.4521949291229248, "step": 2166 }, { "epoch": 1.9522522522522523, "grad_norm": 12.249003629414416, "learning_rate": 3.282513684091885e-06, "loss": 1.2066313028335571, "step": 2167 }, { "epoch": 1.9531531531531532, "grad_norm": 15.955002113763394, "learning_rate": 3.277592309120289e-06, "loss": 2.387820243835449, "step": 2168 }, { "epoch": 1.954054054054054, "grad_norm": 14.008301907388773, "learning_rate": 3.2726728267632478e-06, "loss": 1.7124426364898682, "step": 2169 }, { "epoch": 1.954954954954955, "grad_norm": 12.527364105021245, "learning_rate": 3.2677552424263836e-06, "loss": 2.0390710830688477, "step": 2170 }, { "epoch": 1.9558558558558559, "grad_norm": 11.774335869558978, "learning_rate": 3.262839561513232e-06, "loss": 1.5456371307373047, "step": 2171 }, { "epoch": 1.9567567567567568, "grad_norm": 10.339122054581102, "learning_rate": 3.257925789425237e-06, "loss": 1.7115483283996582, "step": 2172 }, { "epoch": 1.9576576576576576, "grad_norm": 16.063537164982147, "learning_rate": 3.2530139315617457e-06, "loss": 1.4859074354171753, "step": 2173 }, { "epoch": 1.9585585585585585, "grad_norm": 13.855946479261425, "learning_rate": 3.248103993320002e-06, "loss": 0.8990108370780945, "step": 2174 }, { "epoch": 1.9594594594594594, "grad_norm": 16.241122011618927, "learning_rate": 3.24319598009514e-06, "loss": 2.6057894229888916, "step": 2175 }, { "epoch": 1.9603603603603603, "grad_norm": 12.743938722701444, "learning_rate": 3.2382898972801787e-06, "loss": 1.8687529563903809, "step": 2176 }, { "epoch": 1.9612612612612612, "grad_norm": 12.14750333699258, "learning_rate": 3.233385750266015e-06, "loss": 1.365946888923645, "step": 2177 }, { "epoch": 1.962162162162162, "grad_norm": 25.643814303577955, "learning_rate": 3.2284835444414203e-06, "loss": 1.5597602128982544, "step": 2178 }, { "epoch": 1.9630630630630632, "grad_norm": 10.678586368174491, "learning_rate": 3.2235832851930322e-06, "loss": 1.0392903089523315, "step": 2179 }, { "epoch": 1.9639639639639639, "grad_norm": 12.384699252942765, "learning_rate": 3.2186849779053493e-06, "loss": 1.0315731763839722, "step": 2180 }, { "epoch": 1.964864864864865, "grad_norm": 10.355602715542794, "learning_rate": 3.213788627960725e-06, "loss": 1.3578027486801147, "step": 2181 }, { "epoch": 1.9657657657657657, "grad_norm": 9.43903416775586, "learning_rate": 3.2088942407393642e-06, "loss": 1.8253684043884277, "step": 2182 }, { "epoch": 1.9666666666666668, "grad_norm": 8.013148187820535, "learning_rate": 3.20400182161931e-06, "loss": 1.3881949186325073, "step": 2183 }, { "epoch": 1.9675675675675675, "grad_norm": 10.28255390921646, "learning_rate": 3.1991113759764493e-06, "loss": 1.4869511127471924, "step": 2184 }, { "epoch": 1.9684684684684686, "grad_norm": 9.249386883195186, "learning_rate": 3.1942229091844955e-06, "loss": 1.9359171390533447, "step": 2185 }, { "epoch": 1.9693693693693692, "grad_norm": 6.886190591874404, "learning_rate": 3.1893364266149907e-06, "loss": 1.3109782934188843, "step": 2186 }, { "epoch": 1.9702702702702704, "grad_norm": 10.028840789202924, "learning_rate": 3.1844519336372925e-06, "loss": 1.5913902521133423, "step": 2187 }, { "epoch": 1.971171171171171, "grad_norm": 9.587699091912263, "learning_rate": 3.1795694356185803e-06, "loss": 1.4712733030319214, "step": 2188 }, { "epoch": 1.9720720720720721, "grad_norm": 10.581855803545576, "learning_rate": 3.1746889379238354e-06, "loss": 1.532201886177063, "step": 2189 }, { "epoch": 1.972972972972973, "grad_norm": 10.449874477114864, "learning_rate": 3.169810445915839e-06, "loss": 1.3963842391967773, "step": 2190 }, { "epoch": 1.973873873873874, "grad_norm": 15.404997460341296, "learning_rate": 3.1649339649551736e-06, "loss": 0.887641429901123, "step": 2191 }, { "epoch": 1.9747747747747748, "grad_norm": 7.810739451336037, "learning_rate": 3.16005950040021e-06, "loss": 1.6909675598144531, "step": 2192 }, { "epoch": 1.9756756756756757, "grad_norm": 9.349427591093411, "learning_rate": 3.155187057607102e-06, "loss": 1.4221792221069336, "step": 2193 }, { "epoch": 1.9765765765765766, "grad_norm": 11.823817597071073, "learning_rate": 3.150316641929785e-06, "loss": 1.5854582786560059, "step": 2194 }, { "epoch": 1.9774774774774775, "grad_norm": 10.292856583888586, "learning_rate": 3.1454482587199627e-06, "loss": 2.2465217113494873, "step": 2195 }, { "epoch": 1.9783783783783784, "grad_norm": 8.958204490058126, "learning_rate": 3.140581913327109e-06, "loss": 1.9419772624969482, "step": 2196 }, { "epoch": 1.9792792792792793, "grad_norm": 10.313590809109224, "learning_rate": 3.1357176110984578e-06, "loss": 1.76797616481781, "step": 2197 }, { "epoch": 1.9801801801801802, "grad_norm": 15.937345081257261, "learning_rate": 3.130855357378997e-06, "loss": 1.9342007637023926, "step": 2198 }, { "epoch": 1.981081081081081, "grad_norm": 11.655201390613755, "learning_rate": 3.125995157511464e-06, "loss": 1.3997079133987427, "step": 2199 }, { "epoch": 1.981981981981982, "grad_norm": 15.250413697057784, "learning_rate": 3.1211370168363397e-06, "loss": 0.9736800193786621, "step": 2200 }, { "epoch": 1.9828828828828828, "grad_norm": 15.229979109688355, "learning_rate": 3.116280940691843e-06, "loss": 1.5279369354248047, "step": 2201 }, { "epoch": 1.983783783783784, "grad_norm": 9.31071979938882, "learning_rate": 3.1114269344139196e-06, "loss": 1.9810595512390137, "step": 2202 }, { "epoch": 1.9846846846846846, "grad_norm": 9.441805416816392, "learning_rate": 3.1065750033362497e-06, "loss": 1.2492730617523193, "step": 2203 }, { "epoch": 1.9855855855855857, "grad_norm": 11.741858941472692, "learning_rate": 3.1017251527902255e-06, "loss": 1.3416674137115479, "step": 2204 }, { "epoch": 1.9864864864864864, "grad_norm": 10.130905276857453, "learning_rate": 3.096877388104956e-06, "loss": 1.4120872020721436, "step": 2205 }, { "epoch": 1.9873873873873875, "grad_norm": 8.173921131163244, "learning_rate": 3.0920317146072577e-06, "loss": 1.6116995811462402, "step": 2206 }, { "epoch": 1.9882882882882882, "grad_norm": 7.992464699997041, "learning_rate": 3.0871881376216497e-06, "loss": 1.5697150230407715, "step": 2207 }, { "epoch": 1.9891891891891893, "grad_norm": 9.383321545011542, "learning_rate": 3.082346662470347e-06, "loss": 1.4324170351028442, "step": 2208 }, { "epoch": 1.99009009009009, "grad_norm": 9.371028759509135, "learning_rate": 3.0775072944732553e-06, "loss": 1.3041894435882568, "step": 2209 }, { "epoch": 1.990990990990991, "grad_norm": 15.416080366857384, "learning_rate": 3.0726700389479647e-06, "loss": 1.6367117166519165, "step": 2210 }, { "epoch": 1.9918918918918918, "grad_norm": 12.165896647533119, "learning_rate": 3.067834901209744e-06, "loss": 1.6322283744812012, "step": 2211 }, { "epoch": 1.9927927927927929, "grad_norm": 10.085776784278893, "learning_rate": 3.063001886571536e-06, "loss": 1.5652577877044678, "step": 2212 }, { "epoch": 1.9936936936936935, "grad_norm": 12.6253638540615, "learning_rate": 3.0581710003439484e-06, "loss": 1.5706499814987183, "step": 2213 }, { "epoch": 1.9945945945945946, "grad_norm": 12.404191542392223, "learning_rate": 3.0533422478352525e-06, "loss": 1.4519004821777344, "step": 2214 }, { "epoch": 1.9954954954954955, "grad_norm": 11.307118564755154, "learning_rate": 3.0485156343513733e-06, "loss": 2.0829999446868896, "step": 2215 }, { "epoch": 1.9963963963963964, "grad_norm": 15.119484663636769, "learning_rate": 3.043691165195887e-06, "loss": 1.5040124654769897, "step": 2216 }, { "epoch": 1.9972972972972973, "grad_norm": 11.039823353111718, "learning_rate": 3.0388688456700117e-06, "loss": 1.6364296674728394, "step": 2217 }, { "epoch": 1.9981981981981982, "grad_norm": 10.258552856771084, "learning_rate": 3.0340486810726055e-06, "loss": 1.7006560564041138, "step": 2218 }, { "epoch": 1.999099099099099, "grad_norm": 10.452562391291803, "learning_rate": 3.029230676700157e-06, "loss": 1.1743242740631104, "step": 2219 }, { "epoch": 2.0, "grad_norm": 8.519149823556289, "learning_rate": 3.024414837846782e-06, "loss": 0.8965996503829956, "step": 2220 }, { "epoch": 2.000900900900901, "grad_norm": 14.116860160303586, "learning_rate": 3.019601169804216e-06, "loss": 0.5894768238067627, "step": 2221 }, { "epoch": 2.001801801801802, "grad_norm": 8.801877484178394, "learning_rate": 3.0147896778618103e-06, "loss": 1.0011879205703735, "step": 2222 }, { "epoch": 2.002702702702703, "grad_norm": 7.8630344967600685, "learning_rate": 3.0099803673065235e-06, "loss": 0.34475117921829224, "step": 2223 }, { "epoch": 2.0036036036036036, "grad_norm": 13.092072196546898, "learning_rate": 3.0051732434229185e-06, "loss": 1.7805842161178589, "step": 2224 }, { "epoch": 2.0045045045045047, "grad_norm": 9.695686016646027, "learning_rate": 3.0003683114931557e-06, "loss": 0.5842613577842712, "step": 2225 }, { "epoch": 2.0054054054054054, "grad_norm": 9.629312273647948, "learning_rate": 2.9955655767969854e-06, "loss": 0.5329399108886719, "step": 2226 }, { "epoch": 2.0063063063063065, "grad_norm": 8.255293186613402, "learning_rate": 2.9907650446117446e-06, "loss": 0.24428929388523102, "step": 2227 }, { "epoch": 2.007207207207207, "grad_norm": 16.23335526618716, "learning_rate": 2.9859667202123514e-06, "loss": 0.8648971319198608, "step": 2228 }, { "epoch": 2.0081081081081082, "grad_norm": 6.750088414031487, "learning_rate": 2.9811706088712946e-06, "loss": 0.22527940571308136, "step": 2229 }, { "epoch": 2.009009009009009, "grad_norm": 13.697241509002817, "learning_rate": 2.9763767158586343e-06, "loss": 0.7499334812164307, "step": 2230 }, { "epoch": 2.00990990990991, "grad_norm": 8.598969444244288, "learning_rate": 2.9715850464419905e-06, "loss": 1.1645452976226807, "step": 2231 }, { "epoch": 2.0108108108108107, "grad_norm": 9.372572117775338, "learning_rate": 2.966795605886541e-06, "loss": 0.2751765847206116, "step": 2232 }, { "epoch": 2.011711711711712, "grad_norm": 11.278482227681451, "learning_rate": 2.9620083994550187e-06, "loss": 0.341278612613678, "step": 2233 }, { "epoch": 2.0126126126126125, "grad_norm": 13.289995082559464, "learning_rate": 2.9572234324076944e-06, "loss": 0.52141273021698, "step": 2234 }, { "epoch": 2.0135135135135136, "grad_norm": 11.311949250244826, "learning_rate": 2.952440710002384e-06, "loss": 0.41022777557373047, "step": 2235 }, { "epoch": 2.0144144144144143, "grad_norm": 12.825290538173615, "learning_rate": 2.947660237494432e-06, "loss": 0.3243304193019867, "step": 2236 }, { "epoch": 2.0153153153153154, "grad_norm": 12.675229879172527, "learning_rate": 2.942882020136713e-06, "loss": 0.6357255578041077, "step": 2237 }, { "epoch": 2.016216216216216, "grad_norm": 13.204185864484606, "learning_rate": 2.9381060631796256e-06, "loss": 0.9667474031448364, "step": 2238 }, { "epoch": 2.017117117117117, "grad_norm": 8.490586888118822, "learning_rate": 2.933332371871081e-06, "loss": 0.49755173921585083, "step": 2239 }, { "epoch": 2.018018018018018, "grad_norm": 11.33330600264706, "learning_rate": 2.928560951456504e-06, "loss": 0.7523829936981201, "step": 2240 }, { "epoch": 2.018918918918919, "grad_norm": 16.910769932644783, "learning_rate": 2.9237918071788217e-06, "loss": 0.4810546934604645, "step": 2241 }, { "epoch": 2.0198198198198196, "grad_norm": 8.234150024101922, "learning_rate": 2.9190249442784623e-06, "loss": 0.1807146668434143, "step": 2242 }, { "epoch": 2.0207207207207207, "grad_norm": 8.8698820322946, "learning_rate": 2.9142603679933466e-06, "loss": 0.6779142022132874, "step": 2243 }, { "epoch": 2.0216216216216214, "grad_norm": 10.512189441751989, "learning_rate": 2.909498083558879e-06, "loss": 0.616098165512085, "step": 2244 }, { "epoch": 2.0225225225225225, "grad_norm": 10.0793341178136, "learning_rate": 2.9047380962079525e-06, "loss": 0.28480350971221924, "step": 2245 }, { "epoch": 2.0234234234234236, "grad_norm": 11.802225328280493, "learning_rate": 2.899980411170927e-06, "loss": 0.3633064329624176, "step": 2246 }, { "epoch": 2.0243243243243243, "grad_norm": 12.639280973121657, "learning_rate": 2.8952250336756455e-06, "loss": 1.3999977111816406, "step": 2247 }, { "epoch": 2.0252252252252254, "grad_norm": 17.626132922762903, "learning_rate": 2.8904719689474026e-06, "loss": 1.0454301834106445, "step": 2248 }, { "epoch": 2.026126126126126, "grad_norm": 9.016040345520308, "learning_rate": 2.885721222208959e-06, "loss": 0.2405223548412323, "step": 2249 }, { "epoch": 2.027027027027027, "grad_norm": 11.25056348026538, "learning_rate": 2.880972798680527e-06, "loss": 0.3542478680610657, "step": 2250 }, { "epoch": 2.027927927927928, "grad_norm": 9.950745388942867, "learning_rate": 2.8762267035797607e-06, "loss": 0.23745930194854736, "step": 2251 }, { "epoch": 2.028828828828829, "grad_norm": 16.22337232432947, "learning_rate": 2.871482942121766e-06, "loss": 0.83488929271698, "step": 2252 }, { "epoch": 2.0297297297297296, "grad_norm": 11.399313632222292, "learning_rate": 2.8667415195190745e-06, "loss": 0.5515332221984863, "step": 2253 }, { "epoch": 2.0306306306306308, "grad_norm": 11.004201434799734, "learning_rate": 2.8620024409816555e-06, "loss": 0.6944783329963684, "step": 2254 }, { "epoch": 2.0315315315315314, "grad_norm": 4.012907996201843, "learning_rate": 2.8572657117168956e-06, "loss": 0.11357761919498444, "step": 2255 }, { "epoch": 2.0324324324324325, "grad_norm": 5.943422057916982, "learning_rate": 2.852531336929608e-06, "loss": 0.10978563874959946, "step": 2256 }, { "epoch": 2.033333333333333, "grad_norm": 10.8917138227732, "learning_rate": 2.84779932182201e-06, "loss": 0.23231631517410278, "step": 2257 }, { "epoch": 2.0342342342342343, "grad_norm": 11.957463438051688, "learning_rate": 2.843069671593734e-06, "loss": 0.5335648059844971, "step": 2258 }, { "epoch": 2.035135135135135, "grad_norm": 13.27800383619201, "learning_rate": 2.8383423914418074e-06, "loss": 0.3874744176864624, "step": 2259 }, { "epoch": 2.036036036036036, "grad_norm": 11.29022345693228, "learning_rate": 2.8336174865606587e-06, "loss": 0.31073588132858276, "step": 2260 }, { "epoch": 2.036936936936937, "grad_norm": 8.270076970900663, "learning_rate": 2.8288949621421015e-06, "loss": 0.22705663740634918, "step": 2261 }, { "epoch": 2.037837837837838, "grad_norm": 22.317803802467708, "learning_rate": 2.8241748233753362e-06, "loss": 0.627781867980957, "step": 2262 }, { "epoch": 2.0387387387387386, "grad_norm": 12.282119291554915, "learning_rate": 2.819457075446945e-06, "loss": 0.4912683963775635, "step": 2263 }, { "epoch": 2.0396396396396397, "grad_norm": 17.042188440229676, "learning_rate": 2.814741723540876e-06, "loss": 0.5708624720573425, "step": 2264 }, { "epoch": 2.0405405405405403, "grad_norm": 12.985899407068953, "learning_rate": 2.8100287728384508e-06, "loss": 0.5778407454490662, "step": 2265 }, { "epoch": 2.0414414414414415, "grad_norm": 8.102473536840277, "learning_rate": 2.8053182285183466e-06, "loss": 0.396592378616333, "step": 2266 }, { "epoch": 2.042342342342342, "grad_norm": 11.470603839558542, "learning_rate": 2.800610095756604e-06, "loss": 0.4478808641433716, "step": 2267 }, { "epoch": 2.0432432432432432, "grad_norm": 8.865998347270581, "learning_rate": 2.7959043797266074e-06, "loss": 0.38251596689224243, "step": 2268 }, { "epoch": 2.0441441441441444, "grad_norm": 9.78253761748689, "learning_rate": 2.7912010855990845e-06, "loss": 0.333384245634079, "step": 2269 }, { "epoch": 2.045045045045045, "grad_norm": 10.835170947223752, "learning_rate": 2.786500218542111e-06, "loss": 0.3847394585609436, "step": 2270 }, { "epoch": 2.045945945945946, "grad_norm": 15.716206208288712, "learning_rate": 2.7818017837210842e-06, "loss": 0.4596790671348572, "step": 2271 }, { "epoch": 2.046846846846847, "grad_norm": 9.434308340402769, "learning_rate": 2.7771057862987384e-06, "loss": 0.42317095398902893, "step": 2272 }, { "epoch": 2.047747747747748, "grad_norm": 10.941664955305084, "learning_rate": 2.772412231435122e-06, "loss": 0.8684549331665039, "step": 2273 }, { "epoch": 2.0486486486486486, "grad_norm": 8.072982259982698, "learning_rate": 2.7677211242876064e-06, "loss": 0.3451632857322693, "step": 2274 }, { "epoch": 2.0495495495495497, "grad_norm": 24.927221655778474, "learning_rate": 2.7630324700108665e-06, "loss": 1.1120212078094482, "step": 2275 }, { "epoch": 2.0504504504504504, "grad_norm": 9.417859518363821, "learning_rate": 2.7583462737568866e-06, "loss": 0.31997615098953247, "step": 2276 }, { "epoch": 2.0513513513513515, "grad_norm": 8.542657946567818, "learning_rate": 2.753662540674952e-06, "loss": 0.29756706953048706, "step": 2277 }, { "epoch": 2.052252252252252, "grad_norm": 10.937563611004684, "learning_rate": 2.748981275911633e-06, "loss": 0.2215685397386551, "step": 2278 }, { "epoch": 2.0531531531531533, "grad_norm": 11.318570631865057, "learning_rate": 2.7443024846107987e-06, "loss": 0.3994358479976654, "step": 2279 }, { "epoch": 2.054054054054054, "grad_norm": 11.700050923938079, "learning_rate": 2.739626171913589e-06, "loss": 0.4751622974872589, "step": 2280 }, { "epoch": 2.054954954954955, "grad_norm": 8.401848897882775, "learning_rate": 2.7349523429584307e-06, "loss": 0.5750836730003357, "step": 2281 }, { "epoch": 2.0558558558558557, "grad_norm": 11.685087826628124, "learning_rate": 2.7302810028810124e-06, "loss": 0.5777537226676941, "step": 2282 }, { "epoch": 2.056756756756757, "grad_norm": 9.849935676447956, "learning_rate": 2.725612156814296e-06, "loss": 0.5181484222412109, "step": 2283 }, { "epoch": 2.0576576576576575, "grad_norm": 11.001241896604622, "learning_rate": 2.720945809888494e-06, "loss": 0.8288261294364929, "step": 2284 }, { "epoch": 2.0585585585585586, "grad_norm": 11.29434899953877, "learning_rate": 2.716281967231083e-06, "loss": 0.36716341972351074, "step": 2285 }, { "epoch": 2.0594594594594593, "grad_norm": 14.91314367439175, "learning_rate": 2.711620633966778e-06, "loss": 0.35981863737106323, "step": 2286 }, { "epoch": 2.0603603603603604, "grad_norm": 10.481206407261784, "learning_rate": 2.706961815217547e-06, "loss": 0.43312573432922363, "step": 2287 }, { "epoch": 2.061261261261261, "grad_norm": 18.697687947776096, "learning_rate": 2.7023055161025846e-06, "loss": 0.5223169922828674, "step": 2288 }, { "epoch": 2.062162162162162, "grad_norm": 12.203868038942481, "learning_rate": 2.6976517417383207e-06, "loss": 0.9200209379196167, "step": 2289 }, { "epoch": 2.063063063063063, "grad_norm": 22.045402305638788, "learning_rate": 2.693000497238416e-06, "loss": 0.523088812828064, "step": 2290 }, { "epoch": 2.063963963963964, "grad_norm": 12.213912299040938, "learning_rate": 2.6883517877137405e-06, "loss": 0.6578672528266907, "step": 2291 }, { "epoch": 2.064864864864865, "grad_norm": 10.268307278686262, "learning_rate": 2.683705618272393e-06, "loss": 0.5833814144134521, "step": 2292 }, { "epoch": 2.0657657657657658, "grad_norm": 11.559777703090857, "learning_rate": 2.679061994019669e-06, "loss": 0.5228210687637329, "step": 2293 }, { "epoch": 2.066666666666667, "grad_norm": 10.769271588186752, "learning_rate": 2.674420920058074e-06, "loss": 0.6461083292961121, "step": 2294 }, { "epoch": 2.0675675675675675, "grad_norm": 9.509527098309293, "learning_rate": 2.6697824014873076e-06, "loss": 0.43987417221069336, "step": 2295 }, { "epoch": 2.0684684684684687, "grad_norm": 13.96428443975603, "learning_rate": 2.66514644340426e-06, "loss": 0.8247560262680054, "step": 2296 }, { "epoch": 2.0693693693693693, "grad_norm": 8.226336945205592, "learning_rate": 2.660513050903016e-06, "loss": 0.2795591354370117, "step": 2297 }, { "epoch": 2.0702702702702704, "grad_norm": 13.102258837455198, "learning_rate": 2.655882229074832e-06, "loss": 0.37356728315353394, "step": 2298 }, { "epoch": 2.071171171171171, "grad_norm": 12.490435999993819, "learning_rate": 2.6512539830081476e-06, "loss": 0.5807406306266785, "step": 2299 }, { "epoch": 2.0720720720720722, "grad_norm": 6.102017937500685, "learning_rate": 2.646628317788563e-06, "loss": 0.19707541167736053, "step": 2300 }, { "epoch": 2.072972972972973, "grad_norm": 8.814493990300647, "learning_rate": 2.6420052384988524e-06, "loss": 0.2348020076751709, "step": 2301 }, { "epoch": 2.073873873873874, "grad_norm": 10.314523249976816, "learning_rate": 2.637384750218941e-06, "loss": 0.7949020266532898, "step": 2302 }, { "epoch": 2.0747747747747747, "grad_norm": 6.35103823999349, "learning_rate": 2.6327668580259123e-06, "loss": 0.29874366521835327, "step": 2303 }, { "epoch": 2.075675675675676, "grad_norm": 8.725783477885596, "learning_rate": 2.628151566993991e-06, "loss": 0.31836336851119995, "step": 2304 }, { "epoch": 2.0765765765765765, "grad_norm": 12.662520675582003, "learning_rate": 2.6235388821945497e-06, "loss": 0.6175304055213928, "step": 2305 }, { "epoch": 2.0774774774774776, "grad_norm": 15.111180942349304, "learning_rate": 2.6189288086960967e-06, "loss": 0.29739707708358765, "step": 2306 }, { "epoch": 2.0783783783783782, "grad_norm": 8.652008298695993, "learning_rate": 2.614321351564265e-06, "loss": 0.5761679410934448, "step": 2307 }, { "epoch": 2.0792792792792794, "grad_norm": 8.931196997633322, "learning_rate": 2.6097165158618205e-06, "loss": 0.6768096685409546, "step": 2308 }, { "epoch": 2.08018018018018, "grad_norm": 8.950653312806862, "learning_rate": 2.6051143066486407e-06, "loss": 0.5083034634590149, "step": 2309 }, { "epoch": 2.081081081081081, "grad_norm": 9.7095512236923, "learning_rate": 2.6005147289817256e-06, "loss": 0.3268805146217346, "step": 2310 }, { "epoch": 2.081981981981982, "grad_norm": 10.69210399126161, "learning_rate": 2.5959177879151757e-06, "loss": 0.8243169784545898, "step": 2311 }, { "epoch": 2.082882882882883, "grad_norm": 8.184778880258797, "learning_rate": 2.5913234885002015e-06, "loss": 0.1818707138299942, "step": 2312 }, { "epoch": 2.0837837837837836, "grad_norm": 12.379077514532058, "learning_rate": 2.5867318357851023e-06, "loss": 0.38324978947639465, "step": 2313 }, { "epoch": 2.0846846846846847, "grad_norm": 9.331217811685404, "learning_rate": 2.582142834815279e-06, "loss": 0.3204289376735687, "step": 2314 }, { "epoch": 2.0855855855855854, "grad_norm": 8.261765748612266, "learning_rate": 2.5775564906332114e-06, "loss": 0.35750338435173035, "step": 2315 }, { "epoch": 2.0864864864864865, "grad_norm": 15.324060478489404, "learning_rate": 2.5729728082784606e-06, "loss": 0.3415447771549225, "step": 2316 }, { "epoch": 2.0873873873873876, "grad_norm": 13.385202003955543, "learning_rate": 2.568391792787668e-06, "loss": 1.0079660415649414, "step": 2317 }, { "epoch": 2.0882882882882883, "grad_norm": 8.717041160105445, "learning_rate": 2.5638134491945375e-06, "loss": 0.38941991329193115, "step": 2318 }, { "epoch": 2.0891891891891894, "grad_norm": 9.207171337601402, "learning_rate": 2.5592377825298454e-06, "loss": 0.4124867916107178, "step": 2319 }, { "epoch": 2.09009009009009, "grad_norm": 10.13164632452567, "learning_rate": 2.5546647978214144e-06, "loss": 0.2182263880968094, "step": 2320 }, { "epoch": 2.090990990990991, "grad_norm": 16.035761249089337, "learning_rate": 2.550094500094137e-06, "loss": 0.9349597692489624, "step": 2321 }, { "epoch": 2.091891891891892, "grad_norm": 9.86133341431888, "learning_rate": 2.545526894369939e-06, "loss": 0.2269417643547058, "step": 2322 }, { "epoch": 2.092792792792793, "grad_norm": 18.080941203665855, "learning_rate": 2.5409619856677914e-06, "loss": 0.5008091926574707, "step": 2323 }, { "epoch": 2.0936936936936936, "grad_norm": 12.781689854501156, "learning_rate": 2.5363997790037076e-06, "loss": 0.37303346395492554, "step": 2324 }, { "epoch": 2.0945945945945947, "grad_norm": 10.169949813847717, "learning_rate": 2.5318402793907225e-06, "loss": 0.2436217963695526, "step": 2325 }, { "epoch": 2.0954954954954954, "grad_norm": 9.588900664652744, "learning_rate": 2.5272834918389072e-06, "loss": 0.5392529964447021, "step": 2326 }, { "epoch": 2.0963963963963965, "grad_norm": 12.280038732928787, "learning_rate": 2.522729421355342e-06, "loss": 0.5733280181884766, "step": 2327 }, { "epoch": 2.097297297297297, "grad_norm": 9.180285847143764, "learning_rate": 2.5181780729441313e-06, "loss": 0.42448052763938904, "step": 2328 }, { "epoch": 2.0981981981981983, "grad_norm": 10.287915105781291, "learning_rate": 2.5136294516063796e-06, "loss": 0.46256181597709656, "step": 2329 }, { "epoch": 2.099099099099099, "grad_norm": 12.602324598544971, "learning_rate": 2.5090835623402033e-06, "loss": 0.5333715677261353, "step": 2330 }, { "epoch": 2.1, "grad_norm": 8.603536738831417, "learning_rate": 2.504540410140708e-06, "loss": 0.46317631006240845, "step": 2331 }, { "epoch": 2.1009009009009008, "grad_norm": 15.252614102903824, "learning_rate": 2.5000000000000015e-06, "loss": 0.2843453884124756, "step": 2332 }, { "epoch": 2.101801801801802, "grad_norm": 11.515043773530387, "learning_rate": 2.495462336907168e-06, "loss": 0.28911393880844116, "step": 2333 }, { "epoch": 2.1027027027027025, "grad_norm": 7.601978267814978, "learning_rate": 2.490927425848284e-06, "loss": 0.2602662444114685, "step": 2334 }, { "epoch": 2.1036036036036037, "grad_norm": 17.107273943657816, "learning_rate": 2.486395271806392e-06, "loss": 0.8394905924797058, "step": 2335 }, { "epoch": 2.1045045045045043, "grad_norm": 11.895873989178591, "learning_rate": 2.481865879761511e-06, "loss": 0.6935086250305176, "step": 2336 }, { "epoch": 2.1054054054054054, "grad_norm": 15.929921426123189, "learning_rate": 2.4773392546906265e-06, "loss": 0.45318153500556946, "step": 2337 }, { "epoch": 2.106306306306306, "grad_norm": 12.85090334195187, "learning_rate": 2.472815401567677e-06, "loss": 0.6440577507019043, "step": 2338 }, { "epoch": 2.1072072072072072, "grad_norm": 13.249989711629635, "learning_rate": 2.468294325363562e-06, "loss": 1.3613526821136475, "step": 2339 }, { "epoch": 2.108108108108108, "grad_norm": 9.83621689947193, "learning_rate": 2.463776031046124e-06, "loss": 0.7230061292648315, "step": 2340 }, { "epoch": 2.109009009009009, "grad_norm": 8.95457234014136, "learning_rate": 2.4592605235801544e-06, "loss": 0.49418580532073975, "step": 2341 }, { "epoch": 2.10990990990991, "grad_norm": 9.937013635860467, "learning_rate": 2.454747807927377e-06, "loss": 0.23637472093105316, "step": 2342 }, { "epoch": 2.110810810810811, "grad_norm": 8.702918528271233, "learning_rate": 2.4502378890464483e-06, "loss": 0.18265977501869202, "step": 2343 }, { "epoch": 2.111711711711712, "grad_norm": 10.920778573170063, "learning_rate": 2.4457307718929583e-06, "loss": 0.46716073155403137, "step": 2344 }, { "epoch": 2.1126126126126126, "grad_norm": 13.178911538717738, "learning_rate": 2.4412264614194094e-06, "loss": 0.37622249126434326, "step": 2345 }, { "epoch": 2.1135135135135137, "grad_norm": 9.556232375847543, "learning_rate": 2.4367249625752277e-06, "loss": 0.3927081227302551, "step": 2346 }, { "epoch": 2.1144144144144144, "grad_norm": 10.204043001488875, "learning_rate": 2.4322262803067426e-06, "loss": 0.43625408411026, "step": 2347 }, { "epoch": 2.1153153153153155, "grad_norm": 9.301710826830018, "learning_rate": 2.427730419557196e-06, "loss": 0.19002637267112732, "step": 2348 }, { "epoch": 2.116216216216216, "grad_norm": 11.528515744715996, "learning_rate": 2.423237385266723e-06, "loss": 0.4054466485977173, "step": 2349 }, { "epoch": 2.1171171171171173, "grad_norm": 6.451910594086666, "learning_rate": 2.4187471823723558e-06, "loss": 0.26523804664611816, "step": 2350 }, { "epoch": 2.118018018018018, "grad_norm": 8.509331181666335, "learning_rate": 2.414259815808019e-06, "loss": 0.24853050708770752, "step": 2351 }, { "epoch": 2.118918918918919, "grad_norm": 10.073616000957662, "learning_rate": 2.4097752905045124e-06, "loss": 0.4062986969947815, "step": 2352 }, { "epoch": 2.1198198198198197, "grad_norm": 14.787379079784657, "learning_rate": 2.4052936113895215e-06, "loss": 0.603924572467804, "step": 2353 }, { "epoch": 2.120720720720721, "grad_norm": 10.804554462370408, "learning_rate": 2.4008147833875984e-06, "loss": 0.1669190526008606, "step": 2354 }, { "epoch": 2.1216216216216215, "grad_norm": 22.251089583499112, "learning_rate": 2.396338811420168e-06, "loss": 0.5073699951171875, "step": 2355 }, { "epoch": 2.1225225225225226, "grad_norm": 8.348405715573369, "learning_rate": 2.391865700405511e-06, "loss": 0.36968138813972473, "step": 2356 }, { "epoch": 2.1234234234234233, "grad_norm": 12.40229749896702, "learning_rate": 2.3873954552587706e-06, "loss": 0.3594987392425537, "step": 2357 }, { "epoch": 2.1243243243243244, "grad_norm": 11.519223455557784, "learning_rate": 2.382928080891934e-06, "loss": 0.7377095222473145, "step": 2358 }, { "epoch": 2.125225225225225, "grad_norm": 8.814689830976269, "learning_rate": 2.3784635822138424e-06, "loss": 0.40884578227996826, "step": 2359 }, { "epoch": 2.126126126126126, "grad_norm": 8.118374260597792, "learning_rate": 2.3740019641301678e-06, "loss": 0.7081960439682007, "step": 2360 }, { "epoch": 2.127027027027027, "grad_norm": 8.206424945616865, "learning_rate": 2.369543231543425e-06, "loss": 0.39920416474342346, "step": 2361 }, { "epoch": 2.127927927927928, "grad_norm": 9.865017992158648, "learning_rate": 2.3650873893529543e-06, "loss": 0.6542052030563354, "step": 2362 }, { "epoch": 2.1288288288288286, "grad_norm": 9.835055140204027, "learning_rate": 2.3606344424549165e-06, "loss": 0.324113130569458, "step": 2363 }, { "epoch": 2.1297297297297297, "grad_norm": 14.045904630933688, "learning_rate": 2.356184395742299e-06, "loss": 0.39475977420806885, "step": 2364 }, { "epoch": 2.1306306306306304, "grad_norm": 8.889340169827602, "learning_rate": 2.3517372541048967e-06, "loss": 0.38433074951171875, "step": 2365 }, { "epoch": 2.1315315315315315, "grad_norm": 10.743076400389564, "learning_rate": 2.347293022429317e-06, "loss": 0.33674681186676025, "step": 2366 }, { "epoch": 2.1324324324324326, "grad_norm": 10.001442027595337, "learning_rate": 2.342851705598962e-06, "loss": 0.28496724367141724, "step": 2367 }, { "epoch": 2.1333333333333333, "grad_norm": 11.78723436159302, "learning_rate": 2.3384133084940404e-06, "loss": 0.23608997464179993, "step": 2368 }, { "epoch": 2.1342342342342344, "grad_norm": 8.147787293186816, "learning_rate": 2.333977835991545e-06, "loss": 0.4251982569694519, "step": 2369 }, { "epoch": 2.135135135135135, "grad_norm": 37.97208785539425, "learning_rate": 2.3295452929652566e-06, "loss": 1.9654648303985596, "step": 2370 }, { "epoch": 2.136036036036036, "grad_norm": 7.966149846560465, "learning_rate": 2.325115684285743e-06, "loss": 0.26858100295066833, "step": 2371 }, { "epoch": 2.136936936936937, "grad_norm": 9.07230141009553, "learning_rate": 2.320689014820338e-06, "loss": 0.48784181475639343, "step": 2372 }, { "epoch": 2.137837837837838, "grad_norm": 14.146368634764396, "learning_rate": 2.316265289433155e-06, "loss": 0.5792821049690247, "step": 2373 }, { "epoch": 2.1387387387387387, "grad_norm": 17.39659726184654, "learning_rate": 2.3118445129850643e-06, "loss": 0.9232209920883179, "step": 2374 }, { "epoch": 2.1396396396396398, "grad_norm": 9.256364427624968, "learning_rate": 2.307426690333704e-06, "loss": 0.34143272042274475, "step": 2375 }, { "epoch": 2.1405405405405404, "grad_norm": 9.824285456445423, "learning_rate": 2.303011826333458e-06, "loss": 0.4122055470943451, "step": 2376 }, { "epoch": 2.1414414414414416, "grad_norm": 7.0748215703635795, "learning_rate": 2.2985999258354662e-06, "loss": 0.11493399739265442, "step": 2377 }, { "epoch": 2.142342342342342, "grad_norm": 9.884795345718247, "learning_rate": 2.2941909936876076e-06, "loss": 0.7939780950546265, "step": 2378 }, { "epoch": 2.1432432432432433, "grad_norm": 17.440534655010104, "learning_rate": 2.2897850347345023e-06, "loss": 0.5796934366226196, "step": 2379 }, { "epoch": 2.144144144144144, "grad_norm": 22.7012418504902, "learning_rate": 2.285382053817504e-06, "loss": 2.958590507507324, "step": 2380 }, { "epoch": 2.145045045045045, "grad_norm": 11.634567805869871, "learning_rate": 2.2809820557746888e-06, "loss": 0.31657877564430237, "step": 2381 }, { "epoch": 2.145945945945946, "grad_norm": 11.937121610028802, "learning_rate": 2.2765850454408622e-06, "loss": 0.471710741519928, "step": 2382 }, { "epoch": 2.146846846846847, "grad_norm": 10.086431126572682, "learning_rate": 2.27219102764754e-06, "loss": 0.4611928164958954, "step": 2383 }, { "epoch": 2.1477477477477476, "grad_norm": 12.466178277328451, "learning_rate": 2.267800007222957e-06, "loss": 0.8834566473960876, "step": 2384 }, { "epoch": 2.1486486486486487, "grad_norm": 13.903990390370952, "learning_rate": 2.2634119889920468e-06, "loss": 0.31093859672546387, "step": 2385 }, { "epoch": 2.1495495495495494, "grad_norm": 9.985430338321182, "learning_rate": 2.2590269777764516e-06, "loss": 0.4155240058898926, "step": 2386 }, { "epoch": 2.1504504504504505, "grad_norm": 8.882716847913825, "learning_rate": 2.2546449783945017e-06, "loss": 1.355088472366333, "step": 2387 }, { "epoch": 2.1513513513513516, "grad_norm": 7.520588110764399, "learning_rate": 2.2502659956612255e-06, "loss": 0.5796226263046265, "step": 2388 }, { "epoch": 2.1522522522522523, "grad_norm": 9.52188001072593, "learning_rate": 2.2458900343883316e-06, "loss": 0.500851571559906, "step": 2389 }, { "epoch": 2.153153153153153, "grad_norm": 28.729014518090274, "learning_rate": 2.2415170993842086e-06, "loss": 2.38733172416687, "step": 2390 }, { "epoch": 2.154054054054054, "grad_norm": 10.034144108723407, "learning_rate": 2.2371471954539236e-06, "loss": 0.6553759574890137, "step": 2391 }, { "epoch": 2.154954954954955, "grad_norm": 15.19632098007346, "learning_rate": 2.2327803273992083e-06, "loss": 0.6325008273124695, "step": 2392 }, { "epoch": 2.155855855855856, "grad_norm": 7.364135092643524, "learning_rate": 2.2284165000184643e-06, "loss": 0.1823403388261795, "step": 2393 }, { "epoch": 2.156756756756757, "grad_norm": 13.138761298931799, "learning_rate": 2.2240557181067428e-06, "loss": 0.4858241379261017, "step": 2394 }, { "epoch": 2.1576576576576576, "grad_norm": 6.964027184356961, "learning_rate": 2.2196979864557624e-06, "loss": 0.20069685578346252, "step": 2395 }, { "epoch": 2.1585585585585587, "grad_norm": 6.45942914944898, "learning_rate": 2.2153433098538775e-06, "loss": 0.17389008402824402, "step": 2396 }, { "epoch": 2.1594594594594594, "grad_norm": 15.783068952470174, "learning_rate": 2.2109916930860894e-06, "loss": 0.6951287388801575, "step": 2397 }, { "epoch": 2.1603603603603605, "grad_norm": 13.483242899827614, "learning_rate": 2.2066431409340406e-06, "loss": 1.5758002996444702, "step": 2398 }, { "epoch": 2.161261261261261, "grad_norm": 7.773860546466419, "learning_rate": 2.202297658176001e-06, "loss": 0.2947731614112854, "step": 2399 }, { "epoch": 2.1621621621621623, "grad_norm": 10.921931898275625, "learning_rate": 2.197955249586873e-06, "loss": 0.29223906993865967, "step": 2400 }, { "epoch": 2.163063063063063, "grad_norm": 11.433754138024282, "learning_rate": 2.1936159199381744e-06, "loss": 0.3557126224040985, "step": 2401 }, { "epoch": 2.163963963963964, "grad_norm": 12.581255246756, "learning_rate": 2.189279673998048e-06, "loss": 0.3648565411567688, "step": 2402 }, { "epoch": 2.1648648648648647, "grad_norm": 8.992064868829692, "learning_rate": 2.1849465165312406e-06, "loss": 0.576091468334198, "step": 2403 }, { "epoch": 2.165765765765766, "grad_norm": 9.631203893299503, "learning_rate": 2.1806164522991118e-06, "loss": 1.0459250211715698, "step": 2404 }, { "epoch": 2.1666666666666665, "grad_norm": 11.29732154517533, "learning_rate": 2.176289486059615e-06, "loss": 0.18195028603076935, "step": 2405 }, { "epoch": 2.1675675675675676, "grad_norm": 10.942530797224242, "learning_rate": 2.171965622567308e-06, "loss": 0.20159509778022766, "step": 2406 }, { "epoch": 2.1684684684684683, "grad_norm": 10.717915076320711, "learning_rate": 2.1676448665733326e-06, "loss": 0.20560519397258759, "step": 2407 }, { "epoch": 2.1693693693693694, "grad_norm": 10.880165426298795, "learning_rate": 2.1633272228254166e-06, "loss": 1.1594582796096802, "step": 2408 }, { "epoch": 2.17027027027027, "grad_norm": 10.20619152670396, "learning_rate": 2.159012696067871e-06, "loss": 0.9054312109947205, "step": 2409 }, { "epoch": 2.171171171171171, "grad_norm": 12.232009152826985, "learning_rate": 2.1547012910415804e-06, "loss": 0.3813351094722748, "step": 2410 }, { "epoch": 2.172072072072072, "grad_norm": 9.682627987639773, "learning_rate": 2.1503930124840017e-06, "loss": 0.43170493841171265, "step": 2411 }, { "epoch": 2.172972972972973, "grad_norm": 10.393687565580198, "learning_rate": 2.14608786512915e-06, "loss": 0.3984493017196655, "step": 2412 }, { "epoch": 2.173873873873874, "grad_norm": 7.4258436830740795, "learning_rate": 2.141785853707607e-06, "loss": 0.35718607902526855, "step": 2413 }, { "epoch": 2.1747747747747748, "grad_norm": 9.237375290222118, "learning_rate": 2.1374869829465016e-06, "loss": 0.6362316608428955, "step": 2414 }, { "epoch": 2.175675675675676, "grad_norm": 36.668523947173654, "learning_rate": 2.1331912575695197e-06, "loss": 2.187713384628296, "step": 2415 }, { "epoch": 2.1765765765765765, "grad_norm": 17.290448536633644, "learning_rate": 2.128898682296884e-06, "loss": 1.1021785736083984, "step": 2416 }, { "epoch": 2.1774774774774777, "grad_norm": 10.308633095025803, "learning_rate": 2.1246092618453562e-06, "loss": 0.6403588056564331, "step": 2417 }, { "epoch": 2.1783783783783783, "grad_norm": 8.007542686060276, "learning_rate": 2.1203230009282383e-06, "loss": 0.3731953501701355, "step": 2418 }, { "epoch": 2.1792792792792794, "grad_norm": 17.30734817093238, "learning_rate": 2.116039904255352e-06, "loss": 0.32193267345428467, "step": 2419 }, { "epoch": 2.18018018018018, "grad_norm": 34.71799983037904, "learning_rate": 2.111759976533049e-06, "loss": 0.6704785823822021, "step": 2420 }, { "epoch": 2.1810810810810812, "grad_norm": 8.985538491621071, "learning_rate": 2.107483222464193e-06, "loss": 0.456855833530426, "step": 2421 }, { "epoch": 2.181981981981982, "grad_norm": 9.028039853273526, "learning_rate": 2.1032096467481665e-06, "loss": 0.3212411403656006, "step": 2422 }, { "epoch": 2.182882882882883, "grad_norm": 7.9969597536234724, "learning_rate": 2.098939254080853e-06, "loss": 0.2175375372171402, "step": 2423 }, { "epoch": 2.1837837837837837, "grad_norm": 8.233360129183177, "learning_rate": 2.094672049154643e-06, "loss": 0.40003734827041626, "step": 2424 }, { "epoch": 2.184684684684685, "grad_norm": 10.031265273306468, "learning_rate": 2.0904080366584252e-06, "loss": 0.6713770031929016, "step": 2425 }, { "epoch": 2.1855855855855855, "grad_norm": 8.430225178171233, "learning_rate": 2.086147221277574e-06, "loss": 0.5254551768302917, "step": 2426 }, { "epoch": 2.1864864864864866, "grad_norm": 8.961104515774558, "learning_rate": 2.0818896076939597e-06, "loss": 0.3879295587539673, "step": 2427 }, { "epoch": 2.1873873873873872, "grad_norm": 17.770233303825282, "learning_rate": 2.0776352005859253e-06, "loss": 1.496819019317627, "step": 2428 }, { "epoch": 2.1882882882882884, "grad_norm": 8.795502706383143, "learning_rate": 2.0733840046282976e-06, "loss": 0.17569570243358612, "step": 2429 }, { "epoch": 2.189189189189189, "grad_norm": 9.95446266550367, "learning_rate": 2.06913602449237e-06, "loss": 0.5640667676925659, "step": 2430 }, { "epoch": 2.19009009009009, "grad_norm": 7.81021561141362, "learning_rate": 2.0648912648459072e-06, "loss": 0.3175254762172699, "step": 2431 }, { "epoch": 2.190990990990991, "grad_norm": 15.207805329851729, "learning_rate": 2.0606497303531297e-06, "loss": 0.40925803780555725, "step": 2432 }, { "epoch": 2.191891891891892, "grad_norm": 7.485286956715557, "learning_rate": 2.056411425674719e-06, "loss": 0.16778123378753662, "step": 2433 }, { "epoch": 2.1927927927927926, "grad_norm": 8.262220768520656, "learning_rate": 2.0521763554678048e-06, "loss": 0.7765547633171082, "step": 2434 }, { "epoch": 2.1936936936936937, "grad_norm": 10.882030688721331, "learning_rate": 2.0479445243859608e-06, "loss": 0.33397090435028076, "step": 2435 }, { "epoch": 2.1945945945945944, "grad_norm": 14.131737948130377, "learning_rate": 2.0437159370792083e-06, "loss": 0.49660491943359375, "step": 2436 }, { "epoch": 2.1954954954954955, "grad_norm": 12.712321066414388, "learning_rate": 2.0394905981939956e-06, "loss": 0.31156596541404724, "step": 2437 }, { "epoch": 2.1963963963963966, "grad_norm": 9.8263724714149, "learning_rate": 2.035268512373208e-06, "loss": 0.25942644476890564, "step": 2438 }, { "epoch": 2.1972972972972973, "grad_norm": 13.133466660245679, "learning_rate": 2.031049684256155e-06, "loss": 0.3258337378501892, "step": 2439 }, { "epoch": 2.1981981981981984, "grad_norm": 10.77065756877913, "learning_rate": 2.0268341184785674e-06, "loss": 0.4338672161102295, "step": 2440 }, { "epoch": 2.199099099099099, "grad_norm": 14.090799988470666, "learning_rate": 2.0226218196725865e-06, "loss": 0.865012526512146, "step": 2441 }, { "epoch": 2.2, "grad_norm": 7.940566930932142, "learning_rate": 2.0184127924667667e-06, "loss": 0.2968350350856781, "step": 2442 }, { "epoch": 2.200900900900901, "grad_norm": 13.81979852268089, "learning_rate": 2.0142070414860704e-06, "loss": 0.9437194466590881, "step": 2443 }, { "epoch": 2.201801801801802, "grad_norm": 7.361096080603361, "learning_rate": 2.010004571351854e-06, "loss": 0.6113213896751404, "step": 2444 }, { "epoch": 2.2027027027027026, "grad_norm": 15.194571416317675, "learning_rate": 2.0058053866818757e-06, "loss": 0.6607286930084229, "step": 2445 }, { "epoch": 2.2036036036036037, "grad_norm": 17.15512647139195, "learning_rate": 2.001609492090276e-06, "loss": 0.37218135595321655, "step": 2446 }, { "epoch": 2.2045045045045044, "grad_norm": 13.243856861488625, "learning_rate": 1.9974168921875886e-06, "loss": 0.5706183910369873, "step": 2447 }, { "epoch": 2.2054054054054055, "grad_norm": 9.72539519842245, "learning_rate": 1.9932275915807187e-06, "loss": 0.37227556109428406, "step": 2448 }, { "epoch": 2.206306306306306, "grad_norm": 8.531348273807625, "learning_rate": 1.9890415948729537e-06, "loss": 0.4036311209201813, "step": 2449 }, { "epoch": 2.2072072072072073, "grad_norm": 10.99128499408585, "learning_rate": 1.984858906663943e-06, "loss": 0.612502932548523, "step": 2450 }, { "epoch": 2.208108108108108, "grad_norm": 12.58053319709825, "learning_rate": 1.9806795315497078e-06, "loss": 0.931442379951477, "step": 2451 }, { "epoch": 2.209009009009009, "grad_norm": 11.15221590061521, "learning_rate": 1.9765034741226234e-06, "loss": 1.0826082229614258, "step": 2452 }, { "epoch": 2.2099099099099098, "grad_norm": 9.658617398693465, "learning_rate": 1.972330738971422e-06, "loss": 0.4539331793785095, "step": 2453 }, { "epoch": 2.210810810810811, "grad_norm": 10.455885997282982, "learning_rate": 1.968161330681187e-06, "loss": 0.7638237476348877, "step": 2454 }, { "epoch": 2.2117117117117115, "grad_norm": 9.098237441832358, "learning_rate": 1.9639952538333413e-06, "loss": 0.4162856340408325, "step": 2455 }, { "epoch": 2.2126126126126127, "grad_norm": 16.96984265892915, "learning_rate": 1.959832513005652e-06, "loss": 0.4629131555557251, "step": 2456 }, { "epoch": 2.2135135135135133, "grad_norm": 9.889359503482789, "learning_rate": 1.9556731127722167e-06, "loss": 0.4515780806541443, "step": 2457 }, { "epoch": 2.2144144144144144, "grad_norm": 14.105699173442542, "learning_rate": 1.9515170577034657e-06, "loss": 0.4987364411354065, "step": 2458 }, { "epoch": 2.215315315315315, "grad_norm": 9.125740656362229, "learning_rate": 1.94736435236615e-06, "loss": 0.3197804093360901, "step": 2459 }, { "epoch": 2.2162162162162162, "grad_norm": 13.964508580925338, "learning_rate": 1.9432150013233442e-06, "loss": 0.40308842062950134, "step": 2460 }, { "epoch": 2.217117117117117, "grad_norm": 8.975587496434065, "learning_rate": 1.9390690091344334e-06, "loss": 0.3318456709384918, "step": 2461 }, { "epoch": 2.218018018018018, "grad_norm": 12.20975301109764, "learning_rate": 1.9349263803551112e-06, "loss": 0.43268558382987976, "step": 2462 }, { "epoch": 2.218918918918919, "grad_norm": 10.080429017858357, "learning_rate": 1.930787119537381e-06, "loss": 0.6931536793708801, "step": 2463 }, { "epoch": 2.21981981981982, "grad_norm": 9.745510024467253, "learning_rate": 1.9266512312295387e-06, "loss": 0.27146437764167786, "step": 2464 }, { "epoch": 2.220720720720721, "grad_norm": 8.48493869634266, "learning_rate": 1.922518719976181e-06, "loss": 0.4015350937843323, "step": 2465 }, { "epoch": 2.2216216216216216, "grad_norm": 14.834625904722294, "learning_rate": 1.9183895903181865e-06, "loss": 0.6845652461051941, "step": 2466 }, { "epoch": 2.2225225225225227, "grad_norm": 7.359554443415203, "learning_rate": 1.9142638467927254e-06, "loss": 0.2753450870513916, "step": 2467 }, { "epoch": 2.2234234234234234, "grad_norm": 17.578800532609876, "learning_rate": 1.9101414939332408e-06, "loss": 0.5036535263061523, "step": 2468 }, { "epoch": 2.2243243243243245, "grad_norm": 14.711787094540576, "learning_rate": 1.9060225362694546e-06, "loss": 0.5625418424606323, "step": 2469 }, { "epoch": 2.225225225225225, "grad_norm": 16.115394126319416, "learning_rate": 1.9019069783273575e-06, "loss": 0.6296632289886475, "step": 2470 }, { "epoch": 2.2261261261261263, "grad_norm": 6.62237801067677, "learning_rate": 1.8977948246292e-06, "loss": 0.5151411294937134, "step": 2471 }, { "epoch": 2.227027027027027, "grad_norm": 10.63023502401902, "learning_rate": 1.8936860796934997e-06, "loss": 0.5274820923805237, "step": 2472 }, { "epoch": 2.227927927927928, "grad_norm": 13.538455346956445, "learning_rate": 1.8895807480350199e-06, "loss": 0.23670229315757751, "step": 2473 }, { "epoch": 2.2288288288288287, "grad_norm": 17.069500581244135, "learning_rate": 1.8854788341647812e-06, "loss": 0.9304606914520264, "step": 2474 }, { "epoch": 2.22972972972973, "grad_norm": 8.322299236674336, "learning_rate": 1.8813803425900418e-06, "loss": 0.3520248234272003, "step": 2475 }, { "epoch": 2.2306306306306305, "grad_norm": 14.283858524338202, "learning_rate": 1.8772852778143064e-06, "loss": 0.6242378950119019, "step": 2476 }, { "epoch": 2.2315315315315316, "grad_norm": 13.279737764878936, "learning_rate": 1.8731936443373067e-06, "loss": 0.545856773853302, "step": 2477 }, { "epoch": 2.2324324324324323, "grad_norm": 8.937322561874446, "learning_rate": 1.8691054466550117e-06, "loss": 0.38435298204421997, "step": 2478 }, { "epoch": 2.2333333333333334, "grad_norm": 11.025142158736418, "learning_rate": 1.8650206892596079e-06, "loss": 0.5997890830039978, "step": 2479 }, { "epoch": 2.234234234234234, "grad_norm": 9.921913825754329, "learning_rate": 1.8609393766395083e-06, "loss": 0.48749783635139465, "step": 2480 }, { "epoch": 2.235135135135135, "grad_norm": 6.977825819869559, "learning_rate": 1.8568615132793356e-06, "loss": 0.22039154171943665, "step": 2481 }, { "epoch": 2.236036036036036, "grad_norm": 13.279582479188377, "learning_rate": 1.852787103659922e-06, "loss": 0.3317052125930786, "step": 2482 }, { "epoch": 2.236936936936937, "grad_norm": 27.894743328137633, "learning_rate": 1.8487161522583092e-06, "loss": 1.0018060207366943, "step": 2483 }, { "epoch": 2.237837837837838, "grad_norm": 9.609542752211466, "learning_rate": 1.844648663547736e-06, "loss": 0.4543810784816742, "step": 2484 }, { "epoch": 2.2387387387387387, "grad_norm": 11.633358601966219, "learning_rate": 1.8405846419976397e-06, "loss": 0.2692357301712036, "step": 2485 }, { "epoch": 2.2396396396396394, "grad_norm": 10.667005608032856, "learning_rate": 1.8365240920736405e-06, "loss": 0.380027711391449, "step": 2486 }, { "epoch": 2.2405405405405405, "grad_norm": 9.950352591242341, "learning_rate": 1.8324670182375521e-06, "loss": 0.3946274518966675, "step": 2487 }, { "epoch": 2.2414414414414416, "grad_norm": 10.512109195252133, "learning_rate": 1.8284134249473629e-06, "loss": 0.5828955769538879, "step": 2488 }, { "epoch": 2.2423423423423423, "grad_norm": 10.123779883062127, "learning_rate": 1.8243633166572378e-06, "loss": 0.9141032695770264, "step": 2489 }, { "epoch": 2.2432432432432434, "grad_norm": 9.318644467876203, "learning_rate": 1.8203166978175163e-06, "loss": 0.4999552369117737, "step": 2490 }, { "epoch": 2.244144144144144, "grad_norm": 14.020312178464735, "learning_rate": 1.8162735728746978e-06, "loss": 0.4719406068325043, "step": 2491 }, { "epoch": 2.245045045045045, "grad_norm": 6.028506071219212, "learning_rate": 1.8122339462714477e-06, "loss": 0.23761707544326782, "step": 2492 }, { "epoch": 2.245945945945946, "grad_norm": 11.139492296855032, "learning_rate": 1.808197822446583e-06, "loss": 1.1314280033111572, "step": 2493 }, { "epoch": 2.246846846846847, "grad_norm": 13.521041085334653, "learning_rate": 1.8041652058350768e-06, "loss": 0.4234413504600525, "step": 2494 }, { "epoch": 2.2477477477477477, "grad_norm": 10.977329354311351, "learning_rate": 1.8001361008680413e-06, "loss": 0.28913938999176025, "step": 2495 }, { "epoch": 2.2486486486486488, "grad_norm": 8.074083512402291, "learning_rate": 1.7961105119727385e-06, "loss": 0.6953212022781372, "step": 2496 }, { "epoch": 2.2495495495495494, "grad_norm": 12.551896231855528, "learning_rate": 1.7920884435725594e-06, "loss": 0.5405001044273376, "step": 2497 }, { "epoch": 2.2504504504504506, "grad_norm": 13.524228680295014, "learning_rate": 1.78806990008703e-06, "loss": 0.3291250169277191, "step": 2498 }, { "epoch": 2.2513513513513512, "grad_norm": 16.330699738586897, "learning_rate": 1.7840548859318063e-06, "loss": 0.6878585815429688, "step": 2499 }, { "epoch": 2.2522522522522523, "grad_norm": 13.457245126255158, "learning_rate": 1.7800434055186583e-06, "loss": 0.419866681098938, "step": 2500 }, { "epoch": 2.253153153153153, "grad_norm": 9.657317852558684, "learning_rate": 1.776035463255481e-06, "loss": 0.5788836479187012, "step": 2501 }, { "epoch": 2.254054054054054, "grad_norm": 9.96528767047478, "learning_rate": 1.7720310635462751e-06, "loss": 0.3604414463043213, "step": 2502 }, { "epoch": 2.254954954954955, "grad_norm": 8.826596167069177, "learning_rate": 1.7680302107911546e-06, "loss": 0.182212233543396, "step": 2503 }, { "epoch": 2.255855855855856, "grad_norm": 11.671071130251336, "learning_rate": 1.76403290938633e-06, "loss": 1.0697684288024902, "step": 2504 }, { "epoch": 2.2567567567567566, "grad_norm": 8.512611959151466, "learning_rate": 1.760039163724116e-06, "loss": 0.6069210171699524, "step": 2505 }, { "epoch": 2.2576576576576577, "grad_norm": 8.335018038957672, "learning_rate": 1.7560489781929135e-06, "loss": 0.3821689486503601, "step": 2506 }, { "epoch": 2.2585585585585584, "grad_norm": 9.372113231197151, "learning_rate": 1.7520623571772182e-06, "loss": 0.8013373017311096, "step": 2507 }, { "epoch": 2.2594594594594595, "grad_norm": 8.966361160398138, "learning_rate": 1.7480793050576034e-06, "loss": 0.3552350699901581, "step": 2508 }, { "epoch": 2.2603603603603606, "grad_norm": 17.49916424185179, "learning_rate": 1.7440998262107223e-06, "loss": 1.0693227052688599, "step": 2509 }, { "epoch": 2.2612612612612613, "grad_norm": 13.105480325359768, "learning_rate": 1.740123925009305e-06, "loss": 1.0125956535339355, "step": 2510 }, { "epoch": 2.262162162162162, "grad_norm": 13.524544588888908, "learning_rate": 1.7361516058221445e-06, "loss": 0.38627469539642334, "step": 2511 }, { "epoch": 2.263063063063063, "grad_norm": 6.906671819286857, "learning_rate": 1.7321828730141039e-06, "loss": 0.3798505663871765, "step": 2512 }, { "epoch": 2.263963963963964, "grad_norm": 6.949145715764155, "learning_rate": 1.7282177309461019e-06, "loss": 0.27201440930366516, "step": 2513 }, { "epoch": 2.264864864864865, "grad_norm": 9.674123290954732, "learning_rate": 1.7242561839751138e-06, "loss": 0.8238488435745239, "step": 2514 }, { "epoch": 2.265765765765766, "grad_norm": 11.49045505197047, "learning_rate": 1.7202982364541626e-06, "loss": 0.5696356296539307, "step": 2515 }, { "epoch": 2.2666666666666666, "grad_norm": 12.344978291788385, "learning_rate": 1.716343892732315e-06, "loss": 0.5812622308731079, "step": 2516 }, { "epoch": 2.2675675675675677, "grad_norm": 11.180196352054196, "learning_rate": 1.7123931571546826e-06, "loss": 0.522317111492157, "step": 2517 }, { "epoch": 2.2684684684684684, "grad_norm": 11.142128684527036, "learning_rate": 1.708446034062406e-06, "loss": 0.23921099305152893, "step": 2518 }, { "epoch": 2.2693693693693695, "grad_norm": 12.743258867382897, "learning_rate": 1.7045025277926635e-06, "loss": 0.5311107635498047, "step": 2519 }, { "epoch": 2.27027027027027, "grad_norm": 8.283767953358945, "learning_rate": 1.700562642678651e-06, "loss": 0.43987730145454407, "step": 2520 }, { "epoch": 2.2711711711711713, "grad_norm": 10.346603653226152, "learning_rate": 1.6966263830495939e-06, "loss": 0.7479614615440369, "step": 2521 }, { "epoch": 2.272072072072072, "grad_norm": 8.60648393048622, "learning_rate": 1.6926937532307259e-06, "loss": 0.251346230506897, "step": 2522 }, { "epoch": 2.272972972972973, "grad_norm": 6.609265596451475, "learning_rate": 1.6887647575432992e-06, "loss": 0.1455269753932953, "step": 2523 }, { "epoch": 2.2738738738738737, "grad_norm": 13.412711990056366, "learning_rate": 1.6848394003045671e-06, "loss": 0.28844141960144043, "step": 2524 }, { "epoch": 2.274774774774775, "grad_norm": 10.126437662126426, "learning_rate": 1.6809176858277892e-06, "loss": 0.6623111963272095, "step": 2525 }, { "epoch": 2.2756756756756755, "grad_norm": 9.88890973645228, "learning_rate": 1.676999618422218e-06, "loss": 0.3788198232650757, "step": 2526 }, { "epoch": 2.2765765765765766, "grad_norm": 10.061979823159149, "learning_rate": 1.6730852023931027e-06, "loss": 0.2634226083755493, "step": 2527 }, { "epoch": 2.2774774774774773, "grad_norm": 11.74720974812679, "learning_rate": 1.66917444204168e-06, "loss": 0.5780117511749268, "step": 2528 }, { "epoch": 2.2783783783783784, "grad_norm": 12.010340649651225, "learning_rate": 1.665267341665165e-06, "loss": 0.42022332549095154, "step": 2529 }, { "epoch": 2.279279279279279, "grad_norm": 14.171202814097644, "learning_rate": 1.6613639055567583e-06, "loss": 0.5564107298851013, "step": 2530 }, { "epoch": 2.28018018018018, "grad_norm": 10.818458942171763, "learning_rate": 1.6574641380056255e-06, "loss": 0.5242595672607422, "step": 2531 }, { "epoch": 2.281081081081081, "grad_norm": 13.884314958956862, "learning_rate": 1.6535680432969104e-06, "loss": 0.39503562450408936, "step": 2532 }, { "epoch": 2.281981981981982, "grad_norm": 8.675564235796271, "learning_rate": 1.649675625711713e-06, "loss": 0.49121540784835815, "step": 2533 }, { "epoch": 2.282882882882883, "grad_norm": 9.267858274942872, "learning_rate": 1.6457868895270995e-06, "loss": 0.3930075466632843, "step": 2534 }, { "epoch": 2.2837837837837838, "grad_norm": 15.418742829045566, "learning_rate": 1.6419018390160857e-06, "loss": 0.4331180453300476, "step": 2535 }, { "epoch": 2.2846846846846844, "grad_norm": 12.741874420570895, "learning_rate": 1.6380204784476383e-06, "loss": 0.44317787885665894, "step": 2536 }, { "epoch": 2.2855855855855856, "grad_norm": 13.591914075283237, "learning_rate": 1.6341428120866737e-06, "loss": 0.330650269985199, "step": 2537 }, { "epoch": 2.2864864864864867, "grad_norm": 13.321801721382402, "learning_rate": 1.630268844194043e-06, "loss": 0.244010791182518, "step": 2538 }, { "epoch": 2.2873873873873873, "grad_norm": 12.7555133729587, "learning_rate": 1.6263985790265384e-06, "loss": 0.6554090976715088, "step": 2539 }, { "epoch": 2.2882882882882885, "grad_norm": 9.937192086504613, "learning_rate": 1.62253202083688e-06, "loss": 0.45852115750312805, "step": 2540 }, { "epoch": 2.289189189189189, "grad_norm": 18.11730435228161, "learning_rate": 1.6186691738737176e-06, "loss": 0.8002996444702148, "step": 2541 }, { "epoch": 2.2900900900900902, "grad_norm": 13.036139193085996, "learning_rate": 1.6148100423816188e-06, "loss": 0.39820799231529236, "step": 2542 }, { "epoch": 2.290990990990991, "grad_norm": 13.64142883016874, "learning_rate": 1.610954630601073e-06, "loss": 0.3235979676246643, "step": 2543 }, { "epoch": 2.291891891891892, "grad_norm": 8.455559823756069, "learning_rate": 1.6071029427684826e-06, "loss": 0.6476882696151733, "step": 2544 }, { "epoch": 2.2927927927927927, "grad_norm": 10.194436555749911, "learning_rate": 1.603254983116151e-06, "loss": 0.45526450872421265, "step": 2545 }, { "epoch": 2.293693693693694, "grad_norm": 22.08213491216057, "learning_rate": 1.599410755872295e-06, "loss": 1.0749083757400513, "step": 2546 }, { "epoch": 2.2945945945945945, "grad_norm": 8.341265281719512, "learning_rate": 1.5955702652610205e-06, "loss": 0.35646599531173706, "step": 2547 }, { "epoch": 2.2954954954954956, "grad_norm": 9.919880900975684, "learning_rate": 1.5917335155023368e-06, "loss": 0.47308945655822754, "step": 2548 }, { "epoch": 2.2963963963963963, "grad_norm": 10.866637431216345, "learning_rate": 1.587900510812133e-06, "loss": 0.41832196712493896, "step": 2549 }, { "epoch": 2.2972972972972974, "grad_norm": 8.53550263179792, "learning_rate": 1.584071255402193e-06, "loss": 0.4713675081729889, "step": 2550 }, { "epoch": 2.298198198198198, "grad_norm": 22.36561781790583, "learning_rate": 1.580245753480172e-06, "loss": 0.6573231220245361, "step": 2551 }, { "epoch": 2.299099099099099, "grad_norm": 13.552639250933119, "learning_rate": 1.5764240092496075e-06, "loss": 0.8595783710479736, "step": 2552 }, { "epoch": 2.3, "grad_norm": 12.396325659130758, "learning_rate": 1.572606026909903e-06, "loss": 0.3619859516620636, "step": 2553 }, { "epoch": 2.300900900900901, "grad_norm": 11.930996683693635, "learning_rate": 1.5687918106563326e-06, "loss": 0.3102957606315613, "step": 2554 }, { "epoch": 2.301801801801802, "grad_norm": 9.093928475670925, "learning_rate": 1.56498136468003e-06, "loss": 0.31744280457496643, "step": 2555 }, { "epoch": 2.3027027027027027, "grad_norm": 9.498696613955307, "learning_rate": 1.5611746931679823e-06, "loss": 0.2954990565776825, "step": 2556 }, { "epoch": 2.3036036036036034, "grad_norm": 10.863801651131498, "learning_rate": 1.557371800303039e-06, "loss": 0.2831108868122101, "step": 2557 }, { "epoch": 2.3045045045045045, "grad_norm": 6.796188005580747, "learning_rate": 1.5535726902638881e-06, "loss": 0.2826022505760193, "step": 2558 }, { "epoch": 2.3054054054054056, "grad_norm": 10.086167587831634, "learning_rate": 1.5497773672250665e-06, "loss": 0.9375733137130737, "step": 2559 }, { "epoch": 2.3063063063063063, "grad_norm": 7.873436154423617, "learning_rate": 1.5459858353569446e-06, "loss": 0.6757045984268188, "step": 2560 }, { "epoch": 2.3072072072072074, "grad_norm": 9.980499820582452, "learning_rate": 1.542198098825734e-06, "loss": 0.27272501587867737, "step": 2561 }, { "epoch": 2.308108108108108, "grad_norm": 12.057179868709511, "learning_rate": 1.5384141617934706e-06, "loss": 0.3203151226043701, "step": 2562 }, { "epoch": 2.309009009009009, "grad_norm": 10.549196804146696, "learning_rate": 1.534634028418015e-06, "loss": 0.3926043212413788, "step": 2563 }, { "epoch": 2.30990990990991, "grad_norm": 12.370776973760751, "learning_rate": 1.530857702853053e-06, "loss": 0.35921239852905273, "step": 2564 }, { "epoch": 2.310810810810811, "grad_norm": 12.012741041762762, "learning_rate": 1.5270851892480808e-06, "loss": 1.428120493888855, "step": 2565 }, { "epoch": 2.3117117117117116, "grad_norm": 10.442543731233824, "learning_rate": 1.5233164917484117e-06, "loss": 0.6229287385940552, "step": 2566 }, { "epoch": 2.3126126126126128, "grad_norm": 10.440185732887818, "learning_rate": 1.5195516144951594e-06, "loss": 0.5179519653320312, "step": 2567 }, { "epoch": 2.3135135135135134, "grad_norm": 9.965937114605188, "learning_rate": 1.515790561625246e-06, "loss": 0.4289321303367615, "step": 2568 }, { "epoch": 2.3144144144144145, "grad_norm": 13.707154743862104, "learning_rate": 1.5120333372713858e-06, "loss": 0.8441285490989685, "step": 2569 }, { "epoch": 2.315315315315315, "grad_norm": 9.826595994238435, "learning_rate": 1.5082799455620917e-06, "loss": 0.38468870520591736, "step": 2570 }, { "epoch": 2.3162162162162163, "grad_norm": 8.11546553655381, "learning_rate": 1.5045303906216596e-06, "loss": 0.2075853943824768, "step": 2571 }, { "epoch": 2.317117117117117, "grad_norm": 9.693915192091673, "learning_rate": 1.5007846765701734e-06, "loss": 0.9084070324897766, "step": 2572 }, { "epoch": 2.318018018018018, "grad_norm": 13.014854980149707, "learning_rate": 1.4970428075234966e-06, "loss": 0.46323809027671814, "step": 2573 }, { "epoch": 2.3189189189189188, "grad_norm": 10.01190665467692, "learning_rate": 1.493304787593265e-06, "loss": 0.48933979868888855, "step": 2574 }, { "epoch": 2.31981981981982, "grad_norm": 11.709740034571546, "learning_rate": 1.4895706208868876e-06, "loss": 0.284282386302948, "step": 2575 }, { "epoch": 2.3207207207207206, "grad_norm": 9.884925859468567, "learning_rate": 1.485840311507537e-06, "loss": 0.41380974650382996, "step": 2576 }, { "epoch": 2.3216216216216217, "grad_norm": 8.315359936098643, "learning_rate": 1.4821138635541505e-06, "loss": 0.537804901599884, "step": 2577 }, { "epoch": 2.3225225225225223, "grad_norm": 14.054287179309345, "learning_rate": 1.4783912811214179e-06, "loss": 0.37012577056884766, "step": 2578 }, { "epoch": 2.3234234234234235, "grad_norm": 15.041886225994705, "learning_rate": 1.474672568299787e-06, "loss": 0.7804893851280212, "step": 2579 }, { "epoch": 2.3243243243243246, "grad_norm": 12.958133604832721, "learning_rate": 1.470957729175449e-06, "loss": 0.8584966063499451, "step": 2580 }, { "epoch": 2.3252252252252252, "grad_norm": 12.234597497681884, "learning_rate": 1.4672467678303386e-06, "loss": 1.5729799270629883, "step": 2581 }, { "epoch": 2.326126126126126, "grad_norm": 12.290000307843565, "learning_rate": 1.4635396883421348e-06, "loss": 0.6885578036308289, "step": 2582 }, { "epoch": 2.327027027027027, "grad_norm": 9.643336005698746, "learning_rate": 1.459836494784244e-06, "loss": 0.2617989182472229, "step": 2583 }, { "epoch": 2.327927927927928, "grad_norm": 13.168974770375483, "learning_rate": 1.45613719122581e-06, "loss": 0.24258741736412048, "step": 2584 }, { "epoch": 2.328828828828829, "grad_norm": 7.904785567343135, "learning_rate": 1.4524417817316943e-06, "loss": 0.31178921461105347, "step": 2585 }, { "epoch": 2.32972972972973, "grad_norm": 10.8750654487972, "learning_rate": 1.448750270362486e-06, "loss": 0.8041453957557678, "step": 2586 }, { "epoch": 2.3306306306306306, "grad_norm": 12.17876529259977, "learning_rate": 1.4450626611744878e-06, "loss": 1.0193281173706055, "step": 2587 }, { "epoch": 2.3315315315315317, "grad_norm": 12.944920777841014, "learning_rate": 1.4413789582197174e-06, "loss": 0.7654247879981995, "step": 2588 }, { "epoch": 2.3324324324324324, "grad_norm": 13.56035559244477, "learning_rate": 1.437699165545896e-06, "loss": 0.27251487970352173, "step": 2589 }, { "epoch": 2.3333333333333335, "grad_norm": 7.364928049136203, "learning_rate": 1.4340232871964494e-06, "loss": 0.5466756224632263, "step": 2590 }, { "epoch": 2.334234234234234, "grad_norm": 16.054428607299386, "learning_rate": 1.4303513272105057e-06, "loss": 0.7691991329193115, "step": 2591 }, { "epoch": 2.3351351351351353, "grad_norm": 10.12932624025978, "learning_rate": 1.426683289622882e-06, "loss": 0.5401148796081543, "step": 2592 }, { "epoch": 2.336036036036036, "grad_norm": 11.568363376502854, "learning_rate": 1.4230191784640911e-06, "loss": 0.434628427028656, "step": 2593 }, { "epoch": 2.336936936936937, "grad_norm": 12.54318580685461, "learning_rate": 1.4193589977603261e-06, "loss": 1.3618683815002441, "step": 2594 }, { "epoch": 2.3378378378378377, "grad_norm": 9.090731008706989, "learning_rate": 1.415702751533466e-06, "loss": 0.3896636366844177, "step": 2595 }, { "epoch": 2.338738738738739, "grad_norm": 12.430186836267136, "learning_rate": 1.4120504438010608e-06, "loss": 0.7276833653450012, "step": 2596 }, { "epoch": 2.3396396396396395, "grad_norm": 20.635176549077414, "learning_rate": 1.4084020785763403e-06, "loss": 1.3383803367614746, "step": 2597 }, { "epoch": 2.3405405405405406, "grad_norm": 8.024414170177936, "learning_rate": 1.4047576598681945e-06, "loss": 0.5920839309692383, "step": 2598 }, { "epoch": 2.3414414414414413, "grad_norm": 10.529672228076668, "learning_rate": 1.4011171916811833e-06, "loss": 0.7410644292831421, "step": 2599 }, { "epoch": 2.3423423423423424, "grad_norm": 16.447855063474016, "learning_rate": 1.397480678015522e-06, "loss": 1.6644569635391235, "step": 2600 }, { "epoch": 2.343243243243243, "grad_norm": 18.214932581055802, "learning_rate": 1.3938481228670775e-06, "loss": 0.43077927827835083, "step": 2601 }, { "epoch": 2.344144144144144, "grad_norm": 19.84624411854799, "learning_rate": 1.390219530227378e-06, "loss": 0.26516780257225037, "step": 2602 }, { "epoch": 2.345045045045045, "grad_norm": 7.62250931529997, "learning_rate": 1.3865949040835851e-06, "loss": 0.8737815618515015, "step": 2603 }, { "epoch": 2.345945945945946, "grad_norm": 17.033630577429417, "learning_rate": 1.382974248418511e-06, "loss": 0.43705296516418457, "step": 2604 }, { "epoch": 2.346846846846847, "grad_norm": 8.45217583005999, "learning_rate": 1.3793575672105986e-06, "loss": 0.24107986688613892, "step": 2605 }, { "epoch": 2.3477477477477477, "grad_norm": 17.71263020332587, "learning_rate": 1.3757448644339288e-06, "loss": 0.3925956189632416, "step": 2606 }, { "epoch": 2.3486486486486484, "grad_norm": 8.510473014540176, "learning_rate": 1.372136144058208e-06, "loss": 0.36841726303100586, "step": 2607 }, { "epoch": 2.3495495495495495, "grad_norm": 11.87396863630054, "learning_rate": 1.368531410048765e-06, "loss": 0.9469922780990601, "step": 2608 }, { "epoch": 2.3504504504504506, "grad_norm": 12.730564315864262, "learning_rate": 1.364930666366554e-06, "loss": 0.6587448120117188, "step": 2609 }, { "epoch": 2.3513513513513513, "grad_norm": 11.508251706554734, "learning_rate": 1.3613339169681377e-06, "loss": 0.5975847840309143, "step": 2610 }, { "epoch": 2.3522522522522524, "grad_norm": 12.133444595845356, "learning_rate": 1.3577411658056965e-06, "loss": 0.3198261857032776, "step": 2611 }, { "epoch": 2.353153153153153, "grad_norm": 14.218660516269727, "learning_rate": 1.3541524168270115e-06, "loss": 0.30245745182037354, "step": 2612 }, { "epoch": 2.354054054054054, "grad_norm": 13.287873155532413, "learning_rate": 1.3505676739754715e-06, "loss": 0.5945942401885986, "step": 2613 }, { "epoch": 2.354954954954955, "grad_norm": 11.50578039641458, "learning_rate": 1.3469869411900572e-06, "loss": 0.33322668075561523, "step": 2614 }, { "epoch": 2.355855855855856, "grad_norm": 7.968826858711949, "learning_rate": 1.343410222405348e-06, "loss": 0.9494017362594604, "step": 2615 }, { "epoch": 2.3567567567567567, "grad_norm": 10.911644363403369, "learning_rate": 1.339837521551513e-06, "loss": 0.6252211928367615, "step": 2616 }, { "epoch": 2.357657657657658, "grad_norm": 11.911146566416196, "learning_rate": 1.3362688425543014e-06, "loss": 0.6049370169639587, "step": 2617 }, { "epoch": 2.3585585585585584, "grad_norm": 11.090868085078775, "learning_rate": 1.332704189335048e-06, "loss": 1.1241182088851929, "step": 2618 }, { "epoch": 2.3594594594594596, "grad_norm": 15.578730816301656, "learning_rate": 1.32914356581066e-06, "loss": 0.2764025926589966, "step": 2619 }, { "epoch": 2.3603603603603602, "grad_norm": 8.021171358061203, "learning_rate": 1.3255869758936214e-06, "loss": 0.45589882135391235, "step": 2620 }, { "epoch": 2.3612612612612613, "grad_norm": 6.953521470957379, "learning_rate": 1.322034423491978e-06, "loss": 0.16293835639953613, "step": 2621 }, { "epoch": 2.362162162162162, "grad_norm": 12.559698917511975, "learning_rate": 1.3184859125093458e-06, "loss": 1.8385086059570312, "step": 2622 }, { "epoch": 2.363063063063063, "grad_norm": 11.707425053406114, "learning_rate": 1.3149414468448934e-06, "loss": 0.46540600061416626, "step": 2623 }, { "epoch": 2.363963963963964, "grad_norm": 12.447645842698154, "learning_rate": 1.311401030393351e-06, "loss": 0.5401040315628052, "step": 2624 }, { "epoch": 2.364864864864865, "grad_norm": 11.019357377234464, "learning_rate": 1.307864667044993e-06, "loss": 0.8212162256240845, "step": 2625 }, { "epoch": 2.3657657657657656, "grad_norm": 10.156610792104589, "learning_rate": 1.304332360685645e-06, "loss": 0.20631080865859985, "step": 2626 }, { "epoch": 2.3666666666666667, "grad_norm": 8.326111524727171, "learning_rate": 1.3008041151966727e-06, "loss": 0.23290196061134338, "step": 2627 }, { "epoch": 2.3675675675675674, "grad_norm": 9.268162081954008, "learning_rate": 1.297279934454978e-06, "loss": 0.18848250806331635, "step": 2628 }, { "epoch": 2.3684684684684685, "grad_norm": 10.031325843891333, "learning_rate": 1.2937598223330006e-06, "loss": 0.4422239065170288, "step": 2629 }, { "epoch": 2.3693693693693696, "grad_norm": 11.211182341662486, "learning_rate": 1.290243782698703e-06, "loss": 0.7510225176811218, "step": 2630 }, { "epoch": 2.3702702702702703, "grad_norm": 13.481796126843822, "learning_rate": 1.2867318194155832e-06, "loss": 0.4507480263710022, "step": 2631 }, { "epoch": 2.371171171171171, "grad_norm": 12.15068939344174, "learning_rate": 1.283223936342649e-06, "loss": 0.5214511156082153, "step": 2632 }, { "epoch": 2.372072072072072, "grad_norm": 10.949746757845631, "learning_rate": 1.279720137334432e-06, "loss": 0.24835021793842316, "step": 2633 }, { "epoch": 2.372972972972973, "grad_norm": 12.252618985064036, "learning_rate": 1.2762204262409728e-06, "loss": 0.7337615489959717, "step": 2634 }, { "epoch": 2.373873873873874, "grad_norm": 9.277229362366574, "learning_rate": 1.2727248069078197e-06, "loss": 0.5765893459320068, "step": 2635 }, { "epoch": 2.374774774774775, "grad_norm": 10.925858500937643, "learning_rate": 1.2692332831760278e-06, "loss": 0.40767624974250793, "step": 2636 }, { "epoch": 2.3756756756756756, "grad_norm": 10.538126233935106, "learning_rate": 1.2657458588821485e-06, "loss": 0.29305821657180786, "step": 2637 }, { "epoch": 2.3765765765765767, "grad_norm": 8.258931318940817, "learning_rate": 1.2622625378582332e-06, "loss": 0.19992494583129883, "step": 2638 }, { "epoch": 2.3774774774774774, "grad_norm": 7.827790881705758, "learning_rate": 1.2587833239318187e-06, "loss": 0.27060243487358093, "step": 2639 }, { "epoch": 2.3783783783783785, "grad_norm": 23.502798506925522, "learning_rate": 1.2553082209259343e-06, "loss": 1.5089101791381836, "step": 2640 }, { "epoch": 2.379279279279279, "grad_norm": 19.300407883234428, "learning_rate": 1.2518372326590878e-06, "loss": 0.9108871817588806, "step": 2641 }, { "epoch": 2.3801801801801803, "grad_norm": 19.059018043051722, "learning_rate": 1.2483703629452693e-06, "loss": 0.7407408952713013, "step": 2642 }, { "epoch": 2.381081081081081, "grad_norm": 13.05390812421338, "learning_rate": 1.2449076155939398e-06, "loss": 0.6685821413993835, "step": 2643 }, { "epoch": 2.381981981981982, "grad_norm": 9.377555362163923, "learning_rate": 1.241448994410035e-06, "loss": 0.7205036878585815, "step": 2644 }, { "epoch": 2.3828828828828827, "grad_norm": 12.141375001759368, "learning_rate": 1.2379945031939505e-06, "loss": 0.7298334836959839, "step": 2645 }, { "epoch": 2.383783783783784, "grad_norm": 8.749528904058588, "learning_rate": 1.2345441457415502e-06, "loss": 0.613317608833313, "step": 2646 }, { "epoch": 2.3846846846846845, "grad_norm": 10.962029488738938, "learning_rate": 1.231097925844153e-06, "loss": 0.26030468940734863, "step": 2647 }, { "epoch": 2.3855855855855856, "grad_norm": 18.964485987080764, "learning_rate": 1.2276558472885292e-06, "loss": 0.3774060010910034, "step": 2648 }, { "epoch": 2.3864864864864863, "grad_norm": 14.988039416352846, "learning_rate": 1.2242179138569034e-06, "loss": 1.1203635931015015, "step": 2649 }, { "epoch": 2.3873873873873874, "grad_norm": 9.620256470059601, "learning_rate": 1.2207841293269396e-06, "loss": 0.40332576632499695, "step": 2650 }, { "epoch": 2.388288288288288, "grad_norm": 10.415620579539851, "learning_rate": 1.2173544974717495e-06, "loss": 0.2817927896976471, "step": 2651 }, { "epoch": 2.389189189189189, "grad_norm": 14.655852119082983, "learning_rate": 1.2139290220598742e-06, "loss": 0.28211984038352966, "step": 2652 }, { "epoch": 2.39009009009009, "grad_norm": 14.218803553584383, "learning_rate": 1.2105077068552956e-06, "loss": 0.5699129700660706, "step": 2653 }, { "epoch": 2.390990990990991, "grad_norm": 12.564004231447765, "learning_rate": 1.207090555617419e-06, "loss": 0.6292193531990051, "step": 2654 }, { "epoch": 2.391891891891892, "grad_norm": 15.224793030912462, "learning_rate": 1.2036775721010734e-06, "loss": 0.6220686435699463, "step": 2655 }, { "epoch": 2.3927927927927928, "grad_norm": 12.57642723926645, "learning_rate": 1.2002687600565138e-06, "loss": 0.4795664846897125, "step": 2656 }, { "epoch": 2.3936936936936934, "grad_norm": 13.192313397119912, "learning_rate": 1.1968641232294054e-06, "loss": 0.4635518789291382, "step": 2657 }, { "epoch": 2.3945945945945946, "grad_norm": 8.527948504974212, "learning_rate": 1.1934636653608306e-06, "loss": 0.2009531855583191, "step": 2658 }, { "epoch": 2.3954954954954957, "grad_norm": 7.423618142413011, "learning_rate": 1.1900673901872755e-06, "loss": 0.30849915742874146, "step": 2659 }, { "epoch": 2.3963963963963963, "grad_norm": 10.824429456393483, "learning_rate": 1.186675301440633e-06, "loss": 0.42854294180870056, "step": 2660 }, { "epoch": 2.3972972972972975, "grad_norm": 10.154317664268815, "learning_rate": 1.1832874028481978e-06, "loss": 0.29515066742897034, "step": 2661 }, { "epoch": 2.398198198198198, "grad_norm": 11.387489762098907, "learning_rate": 1.1799036981326534e-06, "loss": 0.4566013813018799, "step": 2662 }, { "epoch": 2.3990990990990992, "grad_norm": 10.031786853252997, "learning_rate": 1.1765241910120828e-06, "loss": 0.2901766002178192, "step": 2663 }, { "epoch": 2.4, "grad_norm": 10.77154888790233, "learning_rate": 1.1731488851999513e-06, "loss": 0.4467163681983948, "step": 2664 }, { "epoch": 2.400900900900901, "grad_norm": 10.077876677732808, "learning_rate": 1.1697777844051105e-06, "loss": 0.316597044467926, "step": 2665 }, { "epoch": 2.4018018018018017, "grad_norm": 8.624959871842753, "learning_rate": 1.166410892331789e-06, "loss": 0.49726009368896484, "step": 2666 }, { "epoch": 2.402702702702703, "grad_norm": 11.80420666971692, "learning_rate": 1.163048212679595e-06, "loss": 0.8117353320121765, "step": 2667 }, { "epoch": 2.4036036036036035, "grad_norm": 12.29945813337615, "learning_rate": 1.1596897491435023e-06, "loss": 0.3540371060371399, "step": 2668 }, { "epoch": 2.4045045045045046, "grad_norm": 13.554583419112339, "learning_rate": 1.1563355054138575e-06, "loss": 0.9296278953552246, "step": 2669 }, { "epoch": 2.4054054054054053, "grad_norm": 10.2306958358243, "learning_rate": 1.152985485176365e-06, "loss": 0.2750551700592041, "step": 2670 }, { "epoch": 2.4063063063063064, "grad_norm": 12.343688400189615, "learning_rate": 1.149639692112095e-06, "loss": 0.5621803998947144, "step": 2671 }, { "epoch": 2.407207207207207, "grad_norm": 8.874695614140608, "learning_rate": 1.1462981298974651e-06, "loss": 0.3487135171890259, "step": 2672 }, { "epoch": 2.408108108108108, "grad_norm": 12.80302754130961, "learning_rate": 1.142960802204251e-06, "loss": 0.9279593229293823, "step": 2673 }, { "epoch": 2.409009009009009, "grad_norm": 11.9350463131864, "learning_rate": 1.1396277126995709e-06, "loss": 0.3281620144844055, "step": 2674 }, { "epoch": 2.40990990990991, "grad_norm": 13.01048361599438, "learning_rate": 1.1362988650458845e-06, "loss": 0.44163113832473755, "step": 2675 }, { "epoch": 2.410810810810811, "grad_norm": 15.599219513642298, "learning_rate": 1.1329742629009987e-06, "loss": 0.6006256341934204, "step": 2676 }, { "epoch": 2.4117117117117117, "grad_norm": 7.713236398177726, "learning_rate": 1.1296539099180464e-06, "loss": 0.25887003540992737, "step": 2677 }, { "epoch": 2.4126126126126124, "grad_norm": 12.457655498223152, "learning_rate": 1.126337809745498e-06, "loss": 0.7419452667236328, "step": 2678 }, { "epoch": 2.4135135135135135, "grad_norm": 20.065914991776893, "learning_rate": 1.1230259660271443e-06, "loss": 0.9115269184112549, "step": 2679 }, { "epoch": 2.4144144144144146, "grad_norm": 18.767064528646536, "learning_rate": 1.1197183824021052e-06, "loss": 2.413602352142334, "step": 2680 }, { "epoch": 2.4153153153153153, "grad_norm": 5.151645980162742, "learning_rate": 1.1164150625048164e-06, "loss": 0.21344856917858124, "step": 2681 }, { "epoch": 2.4162162162162164, "grad_norm": 16.2651109314574, "learning_rate": 1.113116009965028e-06, "loss": 0.23965248465538025, "step": 2682 }, { "epoch": 2.417117117117117, "grad_norm": 12.271976718031095, "learning_rate": 1.1098212284078037e-06, "loss": 0.47895270586013794, "step": 2683 }, { "epoch": 2.418018018018018, "grad_norm": 13.982284238144109, "learning_rate": 1.1065307214535104e-06, "loss": 0.3592027425765991, "step": 2684 }, { "epoch": 2.418918918918919, "grad_norm": 14.500962983533217, "learning_rate": 1.1032444927178226e-06, "loss": 1.103131651878357, "step": 2685 }, { "epoch": 2.41981981981982, "grad_norm": 12.395740423285796, "learning_rate": 1.0999625458117092e-06, "loss": 0.6390451192855835, "step": 2686 }, { "epoch": 2.4207207207207206, "grad_norm": 7.942341981552817, "learning_rate": 1.0966848843414386e-06, "loss": 0.24363839626312256, "step": 2687 }, { "epoch": 2.4216216216216218, "grad_norm": 20.957466451914467, "learning_rate": 1.0934115119085647e-06, "loss": 0.8040348291397095, "step": 2688 }, { "epoch": 2.4225225225225224, "grad_norm": 13.423648051815215, "learning_rate": 1.0901424321099346e-06, "loss": 0.47693151235580444, "step": 2689 }, { "epoch": 2.4234234234234235, "grad_norm": 12.691923620714201, "learning_rate": 1.0868776485376763e-06, "loss": 0.7961431741714478, "step": 2690 }, { "epoch": 2.424324324324324, "grad_norm": 8.736337475525781, "learning_rate": 1.0836171647791938e-06, "loss": 0.21177604794502258, "step": 2691 }, { "epoch": 2.4252252252252253, "grad_norm": 9.383564788527963, "learning_rate": 1.080360984417172e-06, "loss": 0.18495744466781616, "step": 2692 }, { "epoch": 2.426126126126126, "grad_norm": 12.149946860892875, "learning_rate": 1.0771091110295612e-06, "loss": 0.4451485276222229, "step": 2693 }, { "epoch": 2.427027027027027, "grad_norm": 11.67811862675532, "learning_rate": 1.0738615481895853e-06, "loss": 0.5266054272651672, "step": 2694 }, { "epoch": 2.4279279279279278, "grad_norm": 14.849514912812584, "learning_rate": 1.0706182994657256e-06, "loss": 0.7762129306793213, "step": 2695 }, { "epoch": 2.428828828828829, "grad_norm": 11.60867834650254, "learning_rate": 1.0673793684217287e-06, "loss": 0.5322248339653015, "step": 2696 }, { "epoch": 2.4297297297297296, "grad_norm": 11.84218162284018, "learning_rate": 1.064144758616591e-06, "loss": 0.3062615990638733, "step": 2697 }, { "epoch": 2.4306306306306307, "grad_norm": 10.724465424230159, "learning_rate": 1.0609144736045668e-06, "loss": 0.8998652100563049, "step": 2698 }, { "epoch": 2.4315315315315313, "grad_norm": 9.500583181441455, "learning_rate": 1.0576885169351524e-06, "loss": 0.49134063720703125, "step": 2699 }, { "epoch": 2.4324324324324325, "grad_norm": 9.765274644223428, "learning_rate": 1.0544668921530932e-06, "loss": 0.40656042098999023, "step": 2700 }, { "epoch": 2.4333333333333336, "grad_norm": 14.180153492991261, "learning_rate": 1.0512496027983715e-06, "loss": 0.3051440417766571, "step": 2701 }, { "epoch": 2.4342342342342342, "grad_norm": 8.5215752388365, "learning_rate": 1.0480366524062041e-06, "loss": 0.3623253405094147, "step": 2702 }, { "epoch": 2.435135135135135, "grad_norm": 12.102979273045491, "learning_rate": 1.0448280445070458e-06, "loss": 0.4855523407459259, "step": 2703 }, { "epoch": 2.436036036036036, "grad_norm": 10.951566476655008, "learning_rate": 1.0416237826265723e-06, "loss": 0.3727246820926666, "step": 2704 }, { "epoch": 2.436936936936937, "grad_norm": 16.248462865581125, "learning_rate": 1.0384238702856935e-06, "loss": 0.33596110343933105, "step": 2705 }, { "epoch": 2.437837837837838, "grad_norm": 9.516995403990514, "learning_rate": 1.0352283110005296e-06, "loss": 0.21068738400936127, "step": 2706 }, { "epoch": 2.438738738738739, "grad_norm": 13.136869192821923, "learning_rate": 1.032037108282426e-06, "loss": 0.636005163192749, "step": 2707 }, { "epoch": 2.4396396396396396, "grad_norm": 13.883140995571818, "learning_rate": 1.0288502656379351e-06, "loss": 0.2538459897041321, "step": 2708 }, { "epoch": 2.4405405405405407, "grad_norm": 9.42431338236059, "learning_rate": 1.0256677865688197e-06, "loss": 0.4209163188934326, "step": 2709 }, { "epoch": 2.4414414414414414, "grad_norm": 11.444282007328898, "learning_rate": 1.0224896745720513e-06, "loss": 0.2533435821533203, "step": 2710 }, { "epoch": 2.4423423423423425, "grad_norm": 9.01683150092805, "learning_rate": 1.0193159331397977e-06, "loss": 0.7453408241271973, "step": 2711 }, { "epoch": 2.443243243243243, "grad_norm": 7.270793979304956, "learning_rate": 1.0161465657594293e-06, "loss": 0.2630300521850586, "step": 2712 }, { "epoch": 2.4441441441441443, "grad_norm": 11.058542885628082, "learning_rate": 1.0129815759135054e-06, "loss": 0.5038114190101624, "step": 2713 }, { "epoch": 2.445045045045045, "grad_norm": 11.35858670184428, "learning_rate": 1.00982096707978e-06, "loss": 0.5896291732788086, "step": 2714 }, { "epoch": 2.445945945945946, "grad_norm": 13.015325694641724, "learning_rate": 1.006664742731187e-06, "loss": 0.466558575630188, "step": 2715 }, { "epoch": 2.4468468468468467, "grad_norm": 13.1867455313214, "learning_rate": 1.00351290633585e-06, "loss": 0.5940424203872681, "step": 2716 }, { "epoch": 2.447747747747748, "grad_norm": 10.949615598143545, "learning_rate": 1.000365461357064e-06, "loss": 0.3536610007286072, "step": 2717 }, { "epoch": 2.4486486486486485, "grad_norm": 11.424826670440734, "learning_rate": 9.972224112533046e-07, "loss": 0.2909022867679596, "step": 2718 }, { "epoch": 2.4495495495495496, "grad_norm": 11.051438094626672, "learning_rate": 9.940837594782128e-07, "loss": 0.6904971599578857, "step": 2719 }, { "epoch": 2.4504504504504503, "grad_norm": 12.78015404531044, "learning_rate": 9.90949509480601e-07, "loss": 0.3737218976020813, "step": 2720 }, { "epoch": 2.4513513513513514, "grad_norm": 8.983036615223945, "learning_rate": 9.878196647044435e-07, "loss": 0.2659727931022644, "step": 2721 }, { "epoch": 2.452252252252252, "grad_norm": 9.558765853040109, "learning_rate": 9.846942285888716e-07, "loss": 0.9288277626037598, "step": 2722 }, { "epoch": 2.453153153153153, "grad_norm": 8.456022072720478, "learning_rate": 9.81573204568177e-07, "loss": 0.5387973189353943, "step": 2723 }, { "epoch": 2.454054054054054, "grad_norm": 11.695104597533478, "learning_rate": 9.784565960717978e-07, "loss": 0.20687323808670044, "step": 2724 }, { "epoch": 2.454954954954955, "grad_norm": 10.83457751472315, "learning_rate": 9.753444065243263e-07, "loss": 0.29569950699806213, "step": 2725 }, { "epoch": 2.455855855855856, "grad_norm": 11.602520717645529, "learning_rate": 9.722366393454929e-07, "loss": 0.41959670186042786, "step": 2726 }, { "epoch": 2.4567567567567568, "grad_norm": 9.845789746597728, "learning_rate": 9.691332979501738e-07, "loss": 0.8155454397201538, "step": 2727 }, { "epoch": 2.4576576576576574, "grad_norm": 12.3988207562276, "learning_rate": 9.660343857483801e-07, "loss": 0.25288331508636475, "step": 2728 }, { "epoch": 2.4585585585585585, "grad_norm": 10.14823444046449, "learning_rate": 9.629399061452533e-07, "loss": 0.5517836809158325, "step": 2729 }, { "epoch": 2.4594594594594597, "grad_norm": 8.837155572379947, "learning_rate": 9.598498625410695e-07, "loss": 0.310319185256958, "step": 2730 }, { "epoch": 2.4603603603603603, "grad_norm": 7.122769074595653, "learning_rate": 9.56764258331226e-07, "loss": 0.13699156045913696, "step": 2731 }, { "epoch": 2.4612612612612614, "grad_norm": 16.731880366793966, "learning_rate": 9.536830969062456e-07, "loss": 0.6171733140945435, "step": 2732 }, { "epoch": 2.462162162162162, "grad_norm": 6.928365489035181, "learning_rate": 9.506063816517652e-07, "loss": 0.24341315031051636, "step": 2733 }, { "epoch": 2.463063063063063, "grad_norm": 9.694267931522264, "learning_rate": 9.475341159485396e-07, "loss": 0.2506348490715027, "step": 2734 }, { "epoch": 2.463963963963964, "grad_norm": 20.161890960014716, "learning_rate": 9.444663031724349e-07, "loss": 0.6052705645561218, "step": 2735 }, { "epoch": 2.464864864864865, "grad_norm": 9.816328966461741, "learning_rate": 9.414029466944196e-07, "loss": 0.5910078883171082, "step": 2736 }, { "epoch": 2.4657657657657657, "grad_norm": 13.501478278364509, "learning_rate": 9.383440498805712e-07, "loss": 0.4284709095954895, "step": 2737 }, { "epoch": 2.466666666666667, "grad_norm": 14.078045085556997, "learning_rate": 9.35289616092061e-07, "loss": 0.4995902478694916, "step": 2738 }, { "epoch": 2.4675675675675675, "grad_norm": 10.095295580834787, "learning_rate": 9.322396486851626e-07, "loss": 0.282894492149353, "step": 2739 }, { "epoch": 2.4684684684684686, "grad_norm": 9.508335843182428, "learning_rate": 9.291941510112362e-07, "loss": 0.4157346785068512, "step": 2740 }, { "epoch": 2.4693693693693692, "grad_norm": 12.648189574783698, "learning_rate": 9.261531264167345e-07, "loss": 0.28353965282440186, "step": 2741 }, { "epoch": 2.4702702702702704, "grad_norm": 12.457127800370698, "learning_rate": 9.231165782431916e-07, "loss": 1.0120548009872437, "step": 2742 }, { "epoch": 2.471171171171171, "grad_norm": 26.724546681563147, "learning_rate": 9.200845098272276e-07, "loss": 0.886185884475708, "step": 2743 }, { "epoch": 2.472072072072072, "grad_norm": 12.488642280138567, "learning_rate": 9.170569245005345e-07, "loss": 0.41486501693725586, "step": 2744 }, { "epoch": 2.472972972972973, "grad_norm": 9.79116171773031, "learning_rate": 9.140338255898834e-07, "loss": 0.2732367515563965, "step": 2745 }, { "epoch": 2.473873873873874, "grad_norm": 15.016233353913933, "learning_rate": 9.110152164171127e-07, "loss": 0.9172552824020386, "step": 2746 }, { "epoch": 2.4747747747747746, "grad_norm": 8.961640954986999, "learning_rate": 9.080011002991257e-07, "loss": 0.29976850748062134, "step": 2747 }, { "epoch": 2.4756756756756757, "grad_norm": 13.086013721976759, "learning_rate": 9.049914805478932e-07, "loss": 0.8649406433105469, "step": 2748 }, { "epoch": 2.4765765765765764, "grad_norm": 16.055503602676147, "learning_rate": 9.019863604704421e-07, "loss": 0.8113471865653992, "step": 2749 }, { "epoch": 2.4774774774774775, "grad_norm": 9.10005654812221, "learning_rate": 8.989857433688576e-07, "loss": 0.3861403465270996, "step": 2750 }, { "epoch": 2.4783783783783786, "grad_norm": 12.038481207473978, "learning_rate": 8.959896325402728e-07, "loss": 0.3215363025665283, "step": 2751 }, { "epoch": 2.4792792792792793, "grad_norm": 10.720590684546965, "learning_rate": 8.929980312768738e-07, "loss": 0.5568872094154358, "step": 2752 }, { "epoch": 2.48018018018018, "grad_norm": 12.123947318739067, "learning_rate": 8.900109428658871e-07, "loss": 0.44781431555747986, "step": 2753 }, { "epoch": 2.481081081081081, "grad_norm": 9.79153280344517, "learning_rate": 8.870283705895855e-07, "loss": 0.3352649211883545, "step": 2754 }, { "epoch": 2.481981981981982, "grad_norm": 10.474425766680636, "learning_rate": 8.840503177252746e-07, "loss": 0.9820655584335327, "step": 2755 }, { "epoch": 2.482882882882883, "grad_norm": 13.449977009017616, "learning_rate": 8.810767875452952e-07, "loss": 0.8978846073150635, "step": 2756 }, { "epoch": 2.483783783783784, "grad_norm": 6.85527356723595, "learning_rate": 8.781077833170215e-07, "loss": 0.2883230447769165, "step": 2757 }, { "epoch": 2.4846846846846846, "grad_norm": 11.788202231972665, "learning_rate": 8.751433083028493e-07, "loss": 0.8722458481788635, "step": 2758 }, { "epoch": 2.4855855855855857, "grad_norm": 7.477429404941512, "learning_rate": 8.721833657602041e-07, "loss": 0.446788489818573, "step": 2759 }, { "epoch": 2.4864864864864864, "grad_norm": 10.823394975872683, "learning_rate": 8.692279589415237e-07, "loss": 0.6068504452705383, "step": 2760 }, { "epoch": 2.4873873873873875, "grad_norm": 12.683265359244906, "learning_rate": 8.662770910942691e-07, "loss": 0.5869661569595337, "step": 2761 }, { "epoch": 2.488288288288288, "grad_norm": 20.22563639414899, "learning_rate": 8.633307654609074e-07, "loss": 0.6758477687835693, "step": 2762 }, { "epoch": 2.4891891891891893, "grad_norm": 8.320916830477627, "learning_rate": 8.603889852789188e-07, "loss": 0.4772491455078125, "step": 2763 }, { "epoch": 2.49009009009009, "grad_norm": 15.250771793999402, "learning_rate": 8.574517537807897e-07, "loss": 0.7708054780960083, "step": 2764 }, { "epoch": 2.490990990990991, "grad_norm": 10.554761116145748, "learning_rate": 8.545190741940035e-07, "loss": 0.5152315497398376, "step": 2765 }, { "epoch": 2.4918918918918918, "grad_norm": 13.876390538075732, "learning_rate": 8.515909497410463e-07, "loss": 0.4591796398162842, "step": 2766 }, { "epoch": 2.492792792792793, "grad_norm": 8.38313064008368, "learning_rate": 8.48667383639396e-07, "loss": 0.29356512427330017, "step": 2767 }, { "epoch": 2.4936936936936935, "grad_norm": 12.98832797562063, "learning_rate": 8.457483791015247e-07, "loss": 0.4864840805530548, "step": 2768 }, { "epoch": 2.4945945945945946, "grad_norm": 11.411779432463732, "learning_rate": 8.428339393348889e-07, "loss": 0.45541438460350037, "step": 2769 }, { "epoch": 2.4954954954954953, "grad_norm": 7.802160511790263, "learning_rate": 8.399240675419324e-07, "loss": 0.5177165865898132, "step": 2770 }, { "epoch": 2.4963963963963964, "grad_norm": 10.767724983891256, "learning_rate": 8.370187669200763e-07, "loss": 0.24317491054534912, "step": 2771 }, { "epoch": 2.4972972972972975, "grad_norm": 8.393203214778845, "learning_rate": 8.341180406617222e-07, "loss": 0.3023187518119812, "step": 2772 }, { "epoch": 2.498198198198198, "grad_norm": 20.047387496417723, "learning_rate": 8.31221891954243e-07, "loss": 0.5361258387565613, "step": 2773 }, { "epoch": 2.499099099099099, "grad_norm": 12.704626402333194, "learning_rate": 8.283303239799812e-07, "loss": 0.41863399744033813, "step": 2774 }, { "epoch": 2.5, "grad_norm": 12.384659381413668, "learning_rate": 8.254433399162493e-07, "loss": 0.3420434594154358, "step": 2775 }, { "epoch": 2.500900900900901, "grad_norm": 12.849105785920203, "learning_rate": 8.225609429353187e-07, "loss": 0.26572927832603455, "step": 2776 }, { "epoch": 2.501801801801802, "grad_norm": 10.502186478945909, "learning_rate": 8.196831362044239e-07, "loss": 0.5782580971717834, "step": 2777 }, { "epoch": 2.5027027027027025, "grad_norm": 10.156006592062825, "learning_rate": 8.168099228857507e-07, "loss": 0.36796340346336365, "step": 2778 }, { "epoch": 2.5036036036036036, "grad_norm": 7.704635180486914, "learning_rate": 8.139413061364465e-07, "loss": 0.3754027485847473, "step": 2779 }, { "epoch": 2.5045045045045047, "grad_norm": 18.06017832752059, "learning_rate": 8.110772891085994e-07, "loss": 1.1590102910995483, "step": 2780 }, { "epoch": 2.5054054054054054, "grad_norm": 16.818311897507186, "learning_rate": 8.082178749492447e-07, "loss": 0.3621762692928314, "step": 2781 }, { "epoch": 2.5063063063063065, "grad_norm": 13.018408238805641, "learning_rate": 8.053630668003642e-07, "loss": 0.5368507504463196, "step": 2782 }, { "epoch": 2.507207207207207, "grad_norm": 9.370777511282409, "learning_rate": 8.02512867798873e-07, "loss": 0.6472790837287903, "step": 2783 }, { "epoch": 2.5081081081081082, "grad_norm": 14.778353065331993, "learning_rate": 7.996672810766271e-07, "loss": 0.42966964840888977, "step": 2784 }, { "epoch": 2.509009009009009, "grad_norm": 8.272991275208621, "learning_rate": 7.968263097604095e-07, "loss": 0.23913973569869995, "step": 2785 }, { "epoch": 2.50990990990991, "grad_norm": 8.85106374765664, "learning_rate": 7.939899569719356e-07, "loss": 0.7383207678794861, "step": 2786 }, { "epoch": 2.5108108108108107, "grad_norm": 12.949472758039786, "learning_rate": 7.911582258278422e-07, "loss": 0.7164812088012695, "step": 2787 }, { "epoch": 2.511711711711712, "grad_norm": 10.196875911406964, "learning_rate": 7.88331119439692e-07, "loss": 0.5378541946411133, "step": 2788 }, { "epoch": 2.5126126126126125, "grad_norm": 7.234108685746755, "learning_rate": 7.855086409139612e-07, "loss": 0.39583760499954224, "step": 2789 }, { "epoch": 2.5135135135135136, "grad_norm": 11.629473673891297, "learning_rate": 7.826907933520462e-07, "loss": 0.23455587029457092, "step": 2790 }, { "epoch": 2.5144144144144143, "grad_norm": 13.185689086343592, "learning_rate": 7.798775798502484e-07, "loss": 0.5355321764945984, "step": 2791 }, { "epoch": 2.5153153153153154, "grad_norm": 8.547138181766927, "learning_rate": 7.770690034997841e-07, "loss": 0.24441684782505035, "step": 2792 }, { "epoch": 2.516216216216216, "grad_norm": 6.980235224218154, "learning_rate": 7.742650673867675e-07, "loss": 0.22758543491363525, "step": 2793 }, { "epoch": 2.517117117117117, "grad_norm": 13.01033463435246, "learning_rate": 7.714657745922194e-07, "loss": 1.046520709991455, "step": 2794 }, { "epoch": 2.518018018018018, "grad_norm": 12.054871166483865, "learning_rate": 7.686711281920567e-07, "loss": 0.3230191767215729, "step": 2795 }, { "epoch": 2.518918918918919, "grad_norm": 10.624917173731054, "learning_rate": 7.658811312570885e-07, "loss": 0.25653040409088135, "step": 2796 }, { "epoch": 2.51981981981982, "grad_norm": 10.269390116412083, "learning_rate": 7.630957868530193e-07, "loss": 0.2573622763156891, "step": 2797 }, { "epoch": 2.5207207207207207, "grad_norm": 7.930807764459055, "learning_rate": 7.603150980404362e-07, "loss": 0.3527355194091797, "step": 2798 }, { "epoch": 2.5216216216216214, "grad_norm": 9.467168878249831, "learning_rate": 7.575390678748157e-07, "loss": 0.38093167543411255, "step": 2799 }, { "epoch": 2.5225225225225225, "grad_norm": 10.378097052204929, "learning_rate": 7.547676994065118e-07, "loss": 0.2759009897708893, "step": 2800 }, { "epoch": 2.5234234234234236, "grad_norm": 9.870493095046264, "learning_rate": 7.520009956807561e-07, "loss": 0.5271565914154053, "step": 2801 }, { "epoch": 2.5243243243243243, "grad_norm": 9.136099438325527, "learning_rate": 7.492389597376576e-07, "loss": 0.29306572675704956, "step": 2802 }, { "epoch": 2.525225225225225, "grad_norm": 28.319250158420076, "learning_rate": 7.464815946121929e-07, "loss": 0.5735443234443665, "step": 2803 }, { "epoch": 2.526126126126126, "grad_norm": 16.142434433625084, "learning_rate": 7.437289033342093e-07, "loss": 0.533798098564148, "step": 2804 }, { "epoch": 2.527027027027027, "grad_norm": 11.832880177795374, "learning_rate": 7.409808889284143e-07, "loss": 0.41538727283477783, "step": 2805 }, { "epoch": 2.527927927927928, "grad_norm": 6.876405183638195, "learning_rate": 7.382375544143811e-07, "loss": 0.32754501700401306, "step": 2806 }, { "epoch": 2.528828828828829, "grad_norm": 12.662910697067751, "learning_rate": 7.354989028065357e-07, "loss": 0.48083919286727905, "step": 2807 }, { "epoch": 2.5297297297297296, "grad_norm": 8.905363883940831, "learning_rate": 7.327649371141626e-07, "loss": 0.22961552441120148, "step": 2808 }, { "epoch": 2.5306306306306308, "grad_norm": 12.711584661950091, "learning_rate": 7.300356603413966e-07, "loss": 0.32955026626586914, "step": 2809 }, { "epoch": 2.5315315315315314, "grad_norm": 11.819959369245261, "learning_rate": 7.27311075487217e-07, "loss": 0.3045678436756134, "step": 2810 }, { "epoch": 2.5324324324324325, "grad_norm": 9.582711622109226, "learning_rate": 7.245911855454524e-07, "loss": 0.26437145471572876, "step": 2811 }, { "epoch": 2.533333333333333, "grad_norm": 10.159482289942703, "learning_rate": 7.218759935047665e-07, "loss": 0.4841468334197998, "step": 2812 }, { "epoch": 2.5342342342342343, "grad_norm": 13.78607407495029, "learning_rate": 7.191655023486682e-07, "loss": 0.5130227208137512, "step": 2813 }, { "epoch": 2.535135135135135, "grad_norm": 17.69403701962981, "learning_rate": 7.164597150554936e-07, "loss": 0.3569299876689911, "step": 2814 }, { "epoch": 2.536036036036036, "grad_norm": 12.797714777488547, "learning_rate": 7.137586345984165e-07, "loss": 0.454569548368454, "step": 2815 }, { "epoch": 2.536936936936937, "grad_norm": 15.035730903363023, "learning_rate": 7.110622639454335e-07, "loss": 0.8996487259864807, "step": 2816 }, { "epoch": 2.537837837837838, "grad_norm": 9.225561013089449, "learning_rate": 7.083706060593704e-07, "loss": 0.316983699798584, "step": 2817 }, { "epoch": 2.538738738738739, "grad_norm": 6.9607596463470385, "learning_rate": 7.056836638978698e-07, "loss": 0.29841530323028564, "step": 2818 }, { "epoch": 2.5396396396396397, "grad_norm": 10.171800807078554, "learning_rate": 7.030014404133984e-07, "loss": 0.263772189617157, "step": 2819 }, { "epoch": 2.5405405405405403, "grad_norm": 13.953416938579434, "learning_rate": 7.003239385532324e-07, "loss": 0.643886148929596, "step": 2820 }, { "epoch": 2.5414414414414415, "grad_norm": 12.628443446584198, "learning_rate": 6.976511612594622e-07, "loss": 0.7477445006370544, "step": 2821 }, { "epoch": 2.5423423423423426, "grad_norm": 7.342700406531725, "learning_rate": 6.94983111468987e-07, "loss": 0.20469197630882263, "step": 2822 }, { "epoch": 2.5432432432432432, "grad_norm": 10.276771013603613, "learning_rate": 6.923197921135117e-07, "loss": 0.4767210781574249, "step": 2823 }, { "epoch": 2.544144144144144, "grad_norm": 16.75752980705756, "learning_rate": 6.89661206119543e-07, "loss": 0.38303235173225403, "step": 2824 }, { "epoch": 2.545045045045045, "grad_norm": 12.627027923019437, "learning_rate": 6.87007356408384e-07, "loss": 0.7219441533088684, "step": 2825 }, { "epoch": 2.545945945945946, "grad_norm": 16.181118085524965, "learning_rate": 6.843582458961384e-07, "loss": 0.30149808526039124, "step": 2826 }, { "epoch": 2.546846846846847, "grad_norm": 12.478379524267625, "learning_rate": 6.817138774936976e-07, "loss": 0.4125371277332306, "step": 2827 }, { "epoch": 2.5477477477477475, "grad_norm": 11.586766250938542, "learning_rate": 6.790742541067441e-07, "loss": 0.36932528018951416, "step": 2828 }, { "epoch": 2.5486486486486486, "grad_norm": 8.164706687612089, "learning_rate": 6.764393786357476e-07, "loss": 0.44537636637687683, "step": 2829 }, { "epoch": 2.5495495495495497, "grad_norm": 11.391787633194824, "learning_rate": 6.738092539759589e-07, "loss": 0.3408915400505066, "step": 2830 }, { "epoch": 2.5504504504504504, "grad_norm": 12.578359600083925, "learning_rate": 6.711838830174106e-07, "loss": 0.316663920879364, "step": 2831 }, { "epoch": 2.5513513513513515, "grad_norm": 9.511460694028267, "learning_rate": 6.685632686449084e-07, "loss": 0.25527459383010864, "step": 2832 }, { "epoch": 2.552252252252252, "grad_norm": 7.891829504280663, "learning_rate": 6.659474137380367e-07, "loss": 0.2744280695915222, "step": 2833 }, { "epoch": 2.5531531531531533, "grad_norm": 6.566574909703896, "learning_rate": 6.633363211711435e-07, "loss": 0.28016623854637146, "step": 2834 }, { "epoch": 2.554054054054054, "grad_norm": 19.180555503186014, "learning_rate": 6.607299938133499e-07, "loss": 0.4957726299762726, "step": 2835 }, { "epoch": 2.554954954954955, "grad_norm": 12.288713341044215, "learning_rate": 6.581284345285371e-07, "loss": 0.7940866351127625, "step": 2836 }, { "epoch": 2.5558558558558557, "grad_norm": 8.82895512056585, "learning_rate": 6.55531646175348e-07, "loss": 0.45401430130004883, "step": 2837 }, { "epoch": 2.556756756756757, "grad_norm": 11.756629631873064, "learning_rate": 6.529396316071851e-07, "loss": 0.5431941151618958, "step": 2838 }, { "epoch": 2.5576576576576575, "grad_norm": 11.969502445434662, "learning_rate": 6.503523936722017e-07, "loss": 0.4053301215171814, "step": 2839 }, { "epoch": 2.5585585585585586, "grad_norm": 9.058846918218157, "learning_rate": 6.47769935213306e-07, "loss": 0.16752083599567413, "step": 2840 }, { "epoch": 2.5594594594594593, "grad_norm": 12.549421008138633, "learning_rate": 6.451922590681509e-07, "loss": 0.5353535413742065, "step": 2841 }, { "epoch": 2.5603603603603604, "grad_norm": 9.233954840547739, "learning_rate": 6.426193680691384e-07, "loss": 0.6705703139305115, "step": 2842 }, { "epoch": 2.5612612612612615, "grad_norm": 10.078066790506593, "learning_rate": 6.400512650434082e-07, "loss": 0.39536216855049133, "step": 2843 }, { "epoch": 2.562162162162162, "grad_norm": 11.840546024997153, "learning_rate": 6.374879528128441e-07, "loss": 0.5658566355705261, "step": 2844 }, { "epoch": 2.563063063063063, "grad_norm": 5.645452605399901, "learning_rate": 6.349294341940593e-07, "loss": 0.1515130251646042, "step": 2845 }, { "epoch": 2.563963963963964, "grad_norm": 12.147964576938012, "learning_rate": 6.323757119984053e-07, "loss": 0.6295697093009949, "step": 2846 }, { "epoch": 2.564864864864865, "grad_norm": 9.706790522003507, "learning_rate": 6.29826789031961e-07, "loss": 0.4312012791633606, "step": 2847 }, { "epoch": 2.5657657657657658, "grad_norm": 9.446993445594122, "learning_rate": 6.272826680955296e-07, "loss": 0.43167930841445923, "step": 2848 }, { "epoch": 2.5666666666666664, "grad_norm": 15.33390394523994, "learning_rate": 6.247433519846424e-07, "loss": 0.6086018085479736, "step": 2849 }, { "epoch": 2.5675675675675675, "grad_norm": 11.20570689593158, "learning_rate": 6.222088434895462e-07, "loss": 0.7297212481498718, "step": 2850 }, { "epoch": 2.5684684684684687, "grad_norm": 9.884279470721058, "learning_rate": 6.1967914539521e-07, "loss": 0.667428731918335, "step": 2851 }, { "epoch": 2.5693693693693693, "grad_norm": 12.952756668389576, "learning_rate": 6.171542604813113e-07, "loss": 0.4452500343322754, "step": 2852 }, { "epoch": 2.57027027027027, "grad_norm": 11.905149875696582, "learning_rate": 6.146341915222459e-07, "loss": 0.6105617880821228, "step": 2853 }, { "epoch": 2.571171171171171, "grad_norm": 11.820755527856914, "learning_rate": 6.12118941287112e-07, "loss": 0.5909096002578735, "step": 2854 }, { "epoch": 2.5720720720720722, "grad_norm": 12.628751523129457, "learning_rate": 6.096085125397138e-07, "loss": 0.7401934862136841, "step": 2855 }, { "epoch": 2.572972972972973, "grad_norm": 8.191129931939585, "learning_rate": 6.071029080385604e-07, "loss": 0.4276942014694214, "step": 2856 }, { "epoch": 2.573873873873874, "grad_norm": 8.703721858390045, "learning_rate": 6.046021305368554e-07, "loss": 0.30004197359085083, "step": 2857 }, { "epoch": 2.5747747747747747, "grad_norm": 8.553950762759087, "learning_rate": 6.021061827825042e-07, "loss": 0.5719914436340332, "step": 2858 }, { "epoch": 2.575675675675676, "grad_norm": 14.224655598442842, "learning_rate": 5.99615067518099e-07, "loss": 0.43226099014282227, "step": 2859 }, { "epoch": 2.5765765765765765, "grad_norm": 14.871823642636507, "learning_rate": 5.971287874809273e-07, "loss": 0.5740262269973755, "step": 2860 }, { "epoch": 2.5774774774774776, "grad_norm": 8.099573713687539, "learning_rate": 5.946473454029594e-07, "loss": 0.2974473237991333, "step": 2861 }, { "epoch": 2.5783783783783782, "grad_norm": 12.63806722855833, "learning_rate": 5.921707440108526e-07, "loss": 1.7352497577667236, "step": 2862 }, { "epoch": 2.5792792792792794, "grad_norm": 6.300345224350456, "learning_rate": 5.896989860259433e-07, "loss": 0.3095148205757141, "step": 2863 }, { "epoch": 2.58018018018018, "grad_norm": 15.20115945583773, "learning_rate": 5.872320741642474e-07, "loss": 0.36338961124420166, "step": 2864 }, { "epoch": 2.581081081081081, "grad_norm": 14.771699131375906, "learning_rate": 5.847700111364529e-07, "loss": 0.4657498300075531, "step": 2865 }, { "epoch": 2.581981981981982, "grad_norm": 9.717327746058759, "learning_rate": 5.823127996479233e-07, "loss": 0.5524632930755615, "step": 2866 }, { "epoch": 2.582882882882883, "grad_norm": 13.80679440350738, "learning_rate": 5.798604423986909e-07, "loss": 0.7932208180427551, "step": 2867 }, { "epoch": 2.583783783783784, "grad_norm": 13.23848968111488, "learning_rate": 5.774129420834501e-07, "loss": 0.547808051109314, "step": 2868 }, { "epoch": 2.5846846846846847, "grad_norm": 11.49878569960777, "learning_rate": 5.749703013915631e-07, "loss": 0.23563973605632782, "step": 2869 }, { "epoch": 2.5855855855855854, "grad_norm": 15.194198862846598, "learning_rate": 5.725325230070488e-07, "loss": 0.247900128364563, "step": 2870 }, { "epoch": 2.5864864864864865, "grad_norm": 13.752781534722335, "learning_rate": 5.70099609608587e-07, "loss": 0.8496720194816589, "step": 2871 }, { "epoch": 2.5873873873873876, "grad_norm": 10.879228284947253, "learning_rate": 5.676715638695063e-07, "loss": 0.3789510726928711, "step": 2872 }, { "epoch": 2.5882882882882883, "grad_norm": 9.108727839012266, "learning_rate": 5.65248388457793e-07, "loss": 0.5383734107017517, "step": 2873 }, { "epoch": 2.589189189189189, "grad_norm": 18.22308185678221, "learning_rate": 5.628300860360775e-07, "loss": 0.888434112071991, "step": 2874 }, { "epoch": 2.59009009009009, "grad_norm": 16.69431853086682, "learning_rate": 5.604166592616356e-07, "loss": 0.8922913670539856, "step": 2875 }, { "epoch": 2.590990990990991, "grad_norm": 9.724091768622573, "learning_rate": 5.580081107863883e-07, "loss": 0.29118427634239197, "step": 2876 }, { "epoch": 2.591891891891892, "grad_norm": 11.289451576422394, "learning_rate": 5.556044432568936e-07, "loss": 0.47553423047065735, "step": 2877 }, { "epoch": 2.592792792792793, "grad_norm": 7.330711679101707, "learning_rate": 5.532056593143492e-07, "loss": 0.12005828320980072, "step": 2878 }, { "epoch": 2.5936936936936936, "grad_norm": 10.72402962786972, "learning_rate": 5.508117615945829e-07, "loss": 0.29493868350982666, "step": 2879 }, { "epoch": 2.5945945945945947, "grad_norm": 11.130972531541053, "learning_rate": 5.484227527280572e-07, "loss": 0.5804529190063477, "step": 2880 }, { "epoch": 2.5954954954954954, "grad_norm": 9.753123234270712, "learning_rate": 5.460386353398583e-07, "loss": 0.3161405026912689, "step": 2881 }, { "epoch": 2.5963963963963965, "grad_norm": 10.072435072281797, "learning_rate": 5.436594120497024e-07, "loss": 0.6472926735877991, "step": 2882 }, { "epoch": 2.597297297297297, "grad_norm": 13.20651579999806, "learning_rate": 5.412850854719254e-07, "loss": 0.3501310348510742, "step": 2883 }, { "epoch": 2.5981981981981983, "grad_norm": 11.833623558460557, "learning_rate": 5.389156582154808e-07, "loss": 0.6468371748924255, "step": 2884 }, { "epoch": 2.599099099099099, "grad_norm": 17.387581772165536, "learning_rate": 5.365511328839434e-07, "loss": 0.7932006120681763, "step": 2885 }, { "epoch": 2.6, "grad_norm": 6.793615575187419, "learning_rate": 5.34191512075497e-07, "loss": 0.6898642778396606, "step": 2886 }, { "epoch": 2.6009009009009008, "grad_norm": 18.576504492980916, "learning_rate": 5.318367983829393e-07, "loss": 1.0162630081176758, "step": 2887 }, { "epoch": 2.601801801801802, "grad_norm": 5.625984073417594, "learning_rate": 5.294869943936731e-07, "loss": 0.19983269274234772, "step": 2888 }, { "epoch": 2.6027027027027025, "grad_norm": 7.496120354594705, "learning_rate": 5.2714210268971e-07, "loss": 0.36101967096328735, "step": 2889 }, { "epoch": 2.6036036036036037, "grad_norm": 7.873168721581297, "learning_rate": 5.248021258476604e-07, "loss": 0.5061078667640686, "step": 2890 }, { "epoch": 2.6045045045045043, "grad_norm": 11.624768233702522, "learning_rate": 5.224670664387372e-07, "loss": 0.3834763765335083, "step": 2891 }, { "epoch": 2.6054054054054054, "grad_norm": 9.911151099128082, "learning_rate": 5.201369270287465e-07, "loss": 0.5326143503189087, "step": 2892 }, { "epoch": 2.6063063063063066, "grad_norm": 7.820904391932334, "learning_rate": 5.178117101780916e-07, "loss": 0.3475406765937805, "step": 2893 }, { "epoch": 2.6072072072072072, "grad_norm": 16.50750543368978, "learning_rate": 5.154914184417653e-07, "loss": 0.5961655974388123, "step": 2894 }, { "epoch": 2.608108108108108, "grad_norm": 12.496019832747427, "learning_rate": 5.13176054369347e-07, "loss": 0.40796059370040894, "step": 2895 }, { "epoch": 2.609009009009009, "grad_norm": 22.26962261514545, "learning_rate": 5.108656205050044e-07, "loss": 1.1192822456359863, "step": 2896 }, { "epoch": 2.60990990990991, "grad_norm": 13.324383607685071, "learning_rate": 5.085601193874862e-07, "loss": 0.4706074595451355, "step": 2897 }, { "epoch": 2.610810810810811, "grad_norm": 15.002986098962861, "learning_rate": 5.062595535501219e-07, "loss": 0.3952459990978241, "step": 2898 }, { "epoch": 2.6117117117117115, "grad_norm": 10.50665689705243, "learning_rate": 5.039639255208156e-07, "loss": 0.5983648300170898, "step": 2899 }, { "epoch": 2.6126126126126126, "grad_norm": 9.743087865938486, "learning_rate": 5.016732378220496e-07, "loss": 0.3142485022544861, "step": 2900 }, { "epoch": 2.6135135135135137, "grad_norm": 10.900750755523788, "learning_rate": 4.993874929708742e-07, "loss": 0.6305020451545715, "step": 2901 }, { "epoch": 2.6144144144144144, "grad_norm": 11.325325838620994, "learning_rate": 4.971066934789082e-07, "loss": 0.39126256108283997, "step": 2902 }, { "epoch": 2.6153153153153155, "grad_norm": 9.195635130603092, "learning_rate": 4.948308418523406e-07, "loss": 0.6813149452209473, "step": 2903 }, { "epoch": 2.616216216216216, "grad_norm": 7.970211430341625, "learning_rate": 4.925599405919185e-07, "loss": 0.20607680082321167, "step": 2904 }, { "epoch": 2.6171171171171173, "grad_norm": 14.374879080672446, "learning_rate": 4.902939921929528e-07, "loss": 1.5283896923065186, "step": 2905 }, { "epoch": 2.618018018018018, "grad_norm": 12.673012709285532, "learning_rate": 4.880329991453103e-07, "loss": 0.6433181166648865, "step": 2906 }, { "epoch": 2.618918918918919, "grad_norm": 6.616142823623153, "learning_rate": 4.857769639334143e-07, "loss": 0.31344592571258545, "step": 2907 }, { "epoch": 2.6198198198198197, "grad_norm": 11.189778461418907, "learning_rate": 4.835258890362387e-07, "loss": 0.17635640501976013, "step": 2908 }, { "epoch": 2.620720720720721, "grad_norm": 10.10800041744702, "learning_rate": 4.812797769273087e-07, "loss": 0.450918048620224, "step": 2909 }, { "epoch": 2.6216216216216215, "grad_norm": 11.70342646609071, "learning_rate": 4.790386300746935e-07, "loss": 0.6094970107078552, "step": 2910 }, { "epoch": 2.6225225225225226, "grad_norm": 10.22947360088361, "learning_rate": 4.7680245094100964e-07, "loss": 0.6853034496307373, "step": 2911 }, { "epoch": 2.6234234234234233, "grad_norm": 12.839932056774703, "learning_rate": 4.7457124198341366e-07, "loss": 0.3796372413635254, "step": 2912 }, { "epoch": 2.6243243243243244, "grad_norm": 13.780762966229505, "learning_rate": 4.7234500565359995e-07, "loss": 0.5920535326004028, "step": 2913 }, { "epoch": 2.6252252252252255, "grad_norm": 10.240242886503744, "learning_rate": 4.701237443978007e-07, "loss": 0.5691888928413391, "step": 2914 }, { "epoch": 2.626126126126126, "grad_norm": 12.114205411324416, "learning_rate": 4.679074606567785e-07, "loss": 0.5729956030845642, "step": 2915 }, { "epoch": 2.627027027027027, "grad_norm": 11.1676128406666, "learning_rate": 4.656961568658308e-07, "loss": 0.7371394038200378, "step": 2916 }, { "epoch": 2.627927927927928, "grad_norm": 13.772638871336252, "learning_rate": 4.634898354547779e-07, "loss": 0.78377366065979, "step": 2917 }, { "epoch": 2.628828828828829, "grad_norm": 17.795301472662988, "learning_rate": 4.6128849884797043e-07, "loss": 0.3983089029788971, "step": 2918 }, { "epoch": 2.6297297297297297, "grad_norm": 9.779365655263382, "learning_rate": 4.5909214946427806e-07, "loss": 0.3358728885650635, "step": 2919 }, { "epoch": 2.6306306306306304, "grad_norm": 11.477954073981119, "learning_rate": 4.569007897170907e-07, "loss": 0.6650623679161072, "step": 2920 }, { "epoch": 2.6315315315315315, "grad_norm": 11.574699250695376, "learning_rate": 4.547144220143185e-07, "loss": 0.31445077061653137, "step": 2921 }, { "epoch": 2.6324324324324326, "grad_norm": 23.500686708610274, "learning_rate": 4.5253304875838177e-07, "loss": 0.9121463298797607, "step": 2922 }, { "epoch": 2.6333333333333333, "grad_norm": 11.436808050811093, "learning_rate": 4.5035667234621716e-07, "loss": 0.45456385612487793, "step": 2923 }, { "epoch": 2.634234234234234, "grad_norm": 13.467816478645442, "learning_rate": 4.481852951692672e-07, "loss": 0.40558144450187683, "step": 2924 }, { "epoch": 2.635135135135135, "grad_norm": 15.985868983989086, "learning_rate": 4.4601891961348454e-07, "loss": 0.8153447508811951, "step": 2925 }, { "epoch": 2.636036036036036, "grad_norm": 20.91003276858005, "learning_rate": 4.43857548059321e-07, "loss": 0.4715864062309265, "step": 2926 }, { "epoch": 2.636936936936937, "grad_norm": 11.525057441277907, "learning_rate": 4.4170118288173694e-07, "loss": 0.753389835357666, "step": 2927 }, { "epoch": 2.637837837837838, "grad_norm": 10.303849555361593, "learning_rate": 4.395498264501863e-07, "loss": 0.4432196021080017, "step": 2928 }, { "epoch": 2.6387387387387387, "grad_norm": 9.203487422445022, "learning_rate": 4.374034811286193e-07, "loss": 0.7571563720703125, "step": 2929 }, { "epoch": 2.6396396396396398, "grad_norm": 14.315069407156413, "learning_rate": 4.352621492754833e-07, "loss": 0.29805177450180054, "step": 2930 }, { "epoch": 2.6405405405405404, "grad_norm": 9.050108256681167, "learning_rate": 4.331258332437127e-07, "loss": 0.6004000902175903, "step": 2931 }, { "epoch": 2.6414414414414416, "grad_norm": 10.959545782986904, "learning_rate": 4.3099453538073433e-07, "loss": 0.3260193467140198, "step": 2932 }, { "epoch": 2.642342342342342, "grad_norm": 14.515970073269813, "learning_rate": 4.2886825802845754e-07, "loss": 0.5870916247367859, "step": 2933 }, { "epoch": 2.6432432432432433, "grad_norm": 13.144185376047663, "learning_rate": 4.267470035232785e-07, "loss": 0.678016185760498, "step": 2934 }, { "epoch": 2.644144144144144, "grad_norm": 9.631680019422237, "learning_rate": 4.2463077419606977e-07, "loss": 0.31092676520347595, "step": 2935 }, { "epoch": 2.645045045045045, "grad_norm": 9.079015859517357, "learning_rate": 4.22519572372187e-07, "loss": 0.24442800879478455, "step": 2936 }, { "epoch": 2.645945945945946, "grad_norm": 14.122301869657322, "learning_rate": 4.204134003714577e-07, "loss": 0.9151749610900879, "step": 2937 }, { "epoch": 2.646846846846847, "grad_norm": 19.03327722407373, "learning_rate": 4.183122605081852e-07, "loss": 0.5897693037986755, "step": 2938 }, { "epoch": 2.647747747747748, "grad_norm": 16.441082240151502, "learning_rate": 4.162161550911414e-07, "loss": 0.4502699077129364, "step": 2939 }, { "epoch": 2.6486486486486487, "grad_norm": 22.301106051469862, "learning_rate": 4.1412508642356574e-07, "loss": 0.4282999634742737, "step": 2940 }, { "epoch": 2.6495495495495494, "grad_norm": 7.801578059242751, "learning_rate": 4.120390568031674e-07, "loss": 0.44312307238578796, "step": 2941 }, { "epoch": 2.6504504504504505, "grad_norm": 14.570842876967822, "learning_rate": 4.0995806852211384e-07, "loss": 0.5986523032188416, "step": 2942 }, { "epoch": 2.6513513513513516, "grad_norm": 27.648638258523697, "learning_rate": 4.078821238670355e-07, "loss": 0.642958402633667, "step": 2943 }, { "epoch": 2.6522522522522523, "grad_norm": 9.833657333706556, "learning_rate": 4.0581122511901935e-07, "loss": 0.24922549724578857, "step": 2944 }, { "epoch": 2.653153153153153, "grad_norm": 11.07686680706659, "learning_rate": 4.037453745536102e-07, "loss": 0.5340144038200378, "step": 2945 }, { "epoch": 2.654054054054054, "grad_norm": 12.06915123034913, "learning_rate": 4.0168457444080267e-07, "loss": 0.5442618727684021, "step": 2946 }, { "epoch": 2.654954954954955, "grad_norm": 16.12520800633065, "learning_rate": 3.996288270450438e-07, "loss": 0.4101296365261078, "step": 2947 }, { "epoch": 2.655855855855856, "grad_norm": 40.56171989302629, "learning_rate": 3.975781346252283e-07, "loss": 0.5544003844261169, "step": 2948 }, { "epoch": 2.6567567567567565, "grad_norm": 10.706278418644738, "learning_rate": 3.955324994346954e-07, "loss": 0.2974967062473297, "step": 2949 }, { "epoch": 2.6576576576576576, "grad_norm": 9.782226221682107, "learning_rate": 3.9349192372123034e-07, "loss": 0.8130560517311096, "step": 2950 }, { "epoch": 2.6585585585585587, "grad_norm": 9.37768439068461, "learning_rate": 3.914564097270546e-07, "loss": 0.3785146474838257, "step": 2951 }, { "epoch": 2.6594594594594594, "grad_norm": 17.206005574217432, "learning_rate": 3.8942595968883167e-07, "loss": 1.1182104349136353, "step": 2952 }, { "epoch": 2.6603603603603605, "grad_norm": 20.771815609474533, "learning_rate": 3.87400575837657e-07, "loss": 0.4129304587841034, "step": 2953 }, { "epoch": 2.661261261261261, "grad_norm": 11.277415853457976, "learning_rate": 3.8538026039906307e-07, "loss": 0.2300529181957245, "step": 2954 }, { "epoch": 2.6621621621621623, "grad_norm": 9.654022712563158, "learning_rate": 3.8336501559300967e-07, "loss": 0.8957171440124512, "step": 2955 }, { "epoch": 2.663063063063063, "grad_norm": 12.797325451540969, "learning_rate": 3.8135484363388706e-07, "loss": 0.6313111782073975, "step": 2956 }, { "epoch": 2.663963963963964, "grad_norm": 15.339600675956936, "learning_rate": 3.793497467305113e-07, "loss": 0.7425114512443542, "step": 2957 }, { "epoch": 2.6648648648648647, "grad_norm": 9.97783864849821, "learning_rate": 3.7734972708612037e-07, "loss": 0.5404260158538818, "step": 2958 }, { "epoch": 2.665765765765766, "grad_norm": 9.59150288921644, "learning_rate": 3.7535478689837477e-07, "loss": 0.2231852412223816, "step": 2959 }, { "epoch": 2.6666666666666665, "grad_norm": 8.70809862679508, "learning_rate": 3.733649283593521e-07, "loss": 0.18978221714496613, "step": 2960 }, { "epoch": 2.6675675675675676, "grad_norm": 11.210466186650416, "learning_rate": 3.7138015365554834e-07, "loss": 1.2090866565704346, "step": 2961 }, { "epoch": 2.6684684684684683, "grad_norm": 13.860488258268921, "learning_rate": 3.694004649678706e-07, "loss": 0.3982599377632141, "step": 2962 }, { "epoch": 2.6693693693693694, "grad_norm": 12.25531176790016, "learning_rate": 3.6742586447164e-07, "loss": 0.6313212513923645, "step": 2963 }, { "epoch": 2.6702702702702705, "grad_norm": 10.876511676239046, "learning_rate": 3.654563543365836e-07, "loss": 0.2990618348121643, "step": 2964 }, { "epoch": 2.671171171171171, "grad_norm": 6.791956414843501, "learning_rate": 3.6349193672683856e-07, "loss": 0.2878454923629761, "step": 2965 }, { "epoch": 2.672072072072072, "grad_norm": 11.29455290884701, "learning_rate": 3.615326138009445e-07, "loss": 0.639564573764801, "step": 2966 }, { "epoch": 2.672972972972973, "grad_norm": 12.46761612333896, "learning_rate": 3.5957838771184137e-07, "loss": 0.274543434381485, "step": 2967 }, { "epoch": 2.673873873873874, "grad_norm": 14.973325720741578, "learning_rate": 3.576292606068721e-07, "loss": 0.330290824174881, "step": 2968 }, { "epoch": 2.6747747747747748, "grad_norm": 11.74660551912347, "learning_rate": 3.556852346277734e-07, "loss": 0.5368125438690186, "step": 2969 }, { "epoch": 2.6756756756756754, "grad_norm": 9.198014550280483, "learning_rate": 3.5374631191067875e-07, "loss": 0.4176695942878723, "step": 2970 }, { "epoch": 2.6765765765765765, "grad_norm": 7.897810927600474, "learning_rate": 3.51812494586114e-07, "loss": 0.2113216370344162, "step": 2971 }, { "epoch": 2.6774774774774777, "grad_norm": 9.424805129671286, "learning_rate": 3.498837847789949e-07, "loss": 0.550119161605835, "step": 2972 }, { "epoch": 2.6783783783783783, "grad_norm": 7.847503068149666, "learning_rate": 3.4796018460862444e-07, "loss": 0.628257155418396, "step": 2973 }, { "epoch": 2.679279279279279, "grad_norm": 8.062500342777113, "learning_rate": 3.460416961886898e-07, "loss": 0.5177410840988159, "step": 2974 }, { "epoch": 2.68018018018018, "grad_norm": 11.597691603877141, "learning_rate": 3.4412832162726506e-07, "loss": 0.4858509302139282, "step": 2975 }, { "epoch": 2.6810810810810812, "grad_norm": 19.929904139237365, "learning_rate": 3.422200630268013e-07, "loss": 0.7135697603225708, "step": 2976 }, { "epoch": 2.681981981981982, "grad_norm": 15.940704926590058, "learning_rate": 3.403169224841307e-07, "loss": 0.9417455792427063, "step": 2977 }, { "epoch": 2.682882882882883, "grad_norm": 12.173019301492538, "learning_rate": 3.3841890209045933e-07, "loss": 0.14253586530685425, "step": 2978 }, { "epoch": 2.6837837837837837, "grad_norm": 9.487205575782996, "learning_rate": 3.3652600393137e-07, "loss": 1.2050479650497437, "step": 2979 }, { "epoch": 2.684684684684685, "grad_norm": 19.108920534477047, "learning_rate": 3.346382300868134e-07, "loss": 0.6063281297683716, "step": 2980 }, { "epoch": 2.6855855855855855, "grad_norm": 12.042226841213884, "learning_rate": 3.3275558263111354e-07, "loss": 0.4792482852935791, "step": 2981 }, { "epoch": 2.6864864864864866, "grad_norm": 15.098029466037605, "learning_rate": 3.3087806363295783e-07, "loss": 0.40536242723464966, "step": 2982 }, { "epoch": 2.6873873873873872, "grad_norm": 8.906903068783857, "learning_rate": 3.2900567515540163e-07, "loss": 0.5243141055107117, "step": 2983 }, { "epoch": 2.6882882882882884, "grad_norm": 18.434403089069676, "learning_rate": 3.2713841925585963e-07, "loss": 0.4433700144290924, "step": 2984 }, { "epoch": 2.689189189189189, "grad_norm": 11.302154592289067, "learning_rate": 3.2527629798610906e-07, "loss": 0.2879910469055176, "step": 2985 }, { "epoch": 2.69009009009009, "grad_norm": 12.569851224235085, "learning_rate": 3.234193133922858e-07, "loss": 0.5801317691802979, "step": 2986 }, { "epoch": 2.690990990990991, "grad_norm": 9.530874455846417, "learning_rate": 3.215674675148778e-07, "loss": 0.68318110704422, "step": 2987 }, { "epoch": 2.691891891891892, "grad_norm": 8.053056998882271, "learning_rate": 3.1972076238873107e-07, "loss": 0.4892258644104004, "step": 2988 }, { "epoch": 2.692792792792793, "grad_norm": 13.914923960879545, "learning_rate": 3.1787920004303806e-07, "loss": 0.5541627407073975, "step": 2989 }, { "epoch": 2.6936936936936937, "grad_norm": 21.508437543644284, "learning_rate": 3.1604278250134514e-07, "loss": 1.266975998878479, "step": 2990 }, { "epoch": 2.6945945945945944, "grad_norm": 12.186755003576165, "learning_rate": 3.142115117815414e-07, "loss": 0.31859084963798523, "step": 2991 }, { "epoch": 2.6954954954954955, "grad_norm": 15.816992189418007, "learning_rate": 3.1238538989586287e-07, "loss": 1.1375571489334106, "step": 2992 }, { "epoch": 2.6963963963963966, "grad_norm": 12.044527763238884, "learning_rate": 3.105644188508877e-07, "loss": 0.32023024559020996, "step": 2993 }, { "epoch": 2.6972972972972973, "grad_norm": 10.930969892517487, "learning_rate": 3.087486006475321e-07, "loss": 0.5635913610458374, "step": 2994 }, { "epoch": 2.698198198198198, "grad_norm": 11.19352392870248, "learning_rate": 3.069379372810544e-07, "loss": 0.2701873779296875, "step": 2995 }, { "epoch": 2.699099099099099, "grad_norm": 12.458216828465993, "learning_rate": 3.0513243074104303e-07, "loss": 0.41960519552230835, "step": 2996 }, { "epoch": 2.7, "grad_norm": 14.87043863301729, "learning_rate": 3.0333208301142615e-07, "loss": 0.29608380794525146, "step": 2997 }, { "epoch": 2.700900900900901, "grad_norm": 14.340352075710008, "learning_rate": 3.015368960704584e-07, "loss": 0.4806203544139862, "step": 2998 }, { "epoch": 2.701801801801802, "grad_norm": 11.118175080345315, "learning_rate": 2.997468718907259e-07, "loss": 0.2992958426475525, "step": 2999 }, { "epoch": 2.7027027027027026, "grad_norm": 9.049822187405406, "learning_rate": 2.97962012439143e-07, "loss": 0.25708407163619995, "step": 3000 }, { "epoch": 2.7036036036036037, "grad_norm": 8.944507727197477, "learning_rate": 2.9618231967694533e-07, "loss": 0.4808061122894287, "step": 3001 }, { "epoch": 2.7045045045045044, "grad_norm": 9.718239368961745, "learning_rate": 2.944077955596947e-07, "loss": 0.5168577432632446, "step": 3002 }, { "epoch": 2.7054054054054055, "grad_norm": 12.317380466134502, "learning_rate": 2.926384420372713e-07, "loss": 0.35036972165107727, "step": 3003 }, { "epoch": 2.706306306306306, "grad_norm": 8.014069226924516, "learning_rate": 2.908742610538762e-07, "loss": 0.3752833902835846, "step": 3004 }, { "epoch": 2.7072072072072073, "grad_norm": 12.7383092954234, "learning_rate": 2.8911525454802304e-07, "loss": 0.8151978850364685, "step": 3005 }, { "epoch": 2.708108108108108, "grad_norm": 9.209115269320733, "learning_rate": 2.873614244525436e-07, "loss": 0.22708743810653687, "step": 3006 }, { "epoch": 2.709009009009009, "grad_norm": 16.32057044884385, "learning_rate": 2.85612772694579e-07, "loss": 0.4021984338760376, "step": 3007 }, { "epoch": 2.7099099099099098, "grad_norm": 10.00615274011178, "learning_rate": 2.838693011955823e-07, "loss": 0.7005878686904907, "step": 3008 }, { "epoch": 2.710810810810811, "grad_norm": 10.060717699823652, "learning_rate": 2.821310118713122e-07, "loss": 0.6117244362831116, "step": 3009 }, { "epoch": 2.7117117117117115, "grad_norm": 9.917230152814122, "learning_rate": 2.8039790663183573e-07, "loss": 0.3839304447174072, "step": 3010 }, { "epoch": 2.7126126126126127, "grad_norm": 7.84963929859655, "learning_rate": 2.7866998738152016e-07, "loss": 0.3038249611854553, "step": 3011 }, { "epoch": 2.7135135135135133, "grad_norm": 11.237512908364222, "learning_rate": 2.769472560190384e-07, "loss": 0.8849109411239624, "step": 3012 }, { "epoch": 2.7144144144144144, "grad_norm": 11.635626435222667, "learning_rate": 2.7522971443735946e-07, "loss": 0.4839766025543213, "step": 3013 }, { "epoch": 2.7153153153153156, "grad_norm": 7.328928901326618, "learning_rate": 2.735173645237493e-07, "loss": 0.4564918279647827, "step": 3014 }, { "epoch": 2.7162162162162162, "grad_norm": 15.957568190247981, "learning_rate": 2.71810208159774e-07, "loss": 1.339203119277954, "step": 3015 }, { "epoch": 2.717117117117117, "grad_norm": 7.631599104932898, "learning_rate": 2.701082472212879e-07, "loss": 0.36641496419906616, "step": 3016 }, { "epoch": 2.718018018018018, "grad_norm": 11.386224175739468, "learning_rate": 2.6841148357843905e-07, "loss": 0.8879693150520325, "step": 3017 }, { "epoch": 2.718918918918919, "grad_norm": 8.528432535758734, "learning_rate": 2.6671991909566265e-07, "loss": 0.18705230951309204, "step": 3018 }, { "epoch": 2.71981981981982, "grad_norm": 18.3906209804001, "learning_rate": 2.650335556316835e-07, "loss": 0.4086385667324066, "step": 3019 }, { "epoch": 2.7207207207207205, "grad_norm": 9.512586159138896, "learning_rate": 2.6335239503951006e-07, "loss": 0.2790507674217224, "step": 3020 }, { "epoch": 2.7216216216216216, "grad_norm": 7.546803365202142, "learning_rate": 2.616764391664317e-07, "loss": 0.23840834200382233, "step": 3021 }, { "epoch": 2.7225225225225227, "grad_norm": 7.975201178673251, "learning_rate": 2.600056898540232e-07, "loss": 0.2528843879699707, "step": 3022 }, { "epoch": 2.7234234234234234, "grad_norm": 8.548652521131835, "learning_rate": 2.5834014893813486e-07, "loss": 0.35694175958633423, "step": 3023 }, { "epoch": 2.7243243243243245, "grad_norm": 12.896325190744951, "learning_rate": 2.56679818248895e-07, "loss": 0.35105961561203003, "step": 3024 }, { "epoch": 2.725225225225225, "grad_norm": 10.067003688832774, "learning_rate": 2.5502469961070643e-07, "loss": 0.338885098695755, "step": 3025 }, { "epoch": 2.7261261261261263, "grad_norm": 9.104931644949936, "learning_rate": 2.533747948422466e-07, "loss": 0.3766016364097595, "step": 3026 }, { "epoch": 2.727027027027027, "grad_norm": 10.795857671933486, "learning_rate": 2.517301057564603e-07, "loss": 0.2562897801399231, "step": 3027 }, { "epoch": 2.727927927927928, "grad_norm": 7.405613649365451, "learning_rate": 2.500906341605652e-07, "loss": 0.5181165933609009, "step": 3028 }, { "epoch": 2.7288288288288287, "grad_norm": 9.171006696531848, "learning_rate": 2.4845638185604314e-07, "loss": 0.3681427240371704, "step": 3029 }, { "epoch": 2.72972972972973, "grad_norm": 18.192644826948797, "learning_rate": 2.4682735063864205e-07, "loss": 0.40039005875587463, "step": 3030 }, { "epoch": 2.7306306306306305, "grad_norm": 7.724440284318915, "learning_rate": 2.452035422983734e-07, "loss": 0.37891077995300293, "step": 3031 }, { "epoch": 2.7315315315315316, "grad_norm": 11.685556863073284, "learning_rate": 2.435849586195077e-07, "loss": 0.2748810946941376, "step": 3032 }, { "epoch": 2.7324324324324323, "grad_norm": 9.146797818479726, "learning_rate": 2.4197160138057675e-07, "loss": 0.7849463820457458, "step": 3033 }, { "epoch": 2.7333333333333334, "grad_norm": 8.77265614001756, "learning_rate": 2.403634723543674e-07, "loss": 0.4742673337459564, "step": 3034 }, { "epoch": 2.7342342342342345, "grad_norm": 6.532636672073548, "learning_rate": 2.3876057330792344e-07, "loss": 0.1301393061876297, "step": 3035 }, { "epoch": 2.735135135135135, "grad_norm": 12.16544161167629, "learning_rate": 2.3716290600254043e-07, "loss": 0.1819266974925995, "step": 3036 }, { "epoch": 2.736036036036036, "grad_norm": 8.98831106384767, "learning_rate": 2.3557047219376628e-07, "loss": 0.17506417632102966, "step": 3037 }, { "epoch": 2.736936936936937, "grad_norm": 10.835020726650052, "learning_rate": 2.3398327363139739e-07, "loss": 0.23951488733291626, "step": 3038 }, { "epoch": 2.737837837837838, "grad_norm": 7.661414503867859, "learning_rate": 2.3240131205947814e-07, "loss": 0.3445315957069397, "step": 3039 }, { "epoch": 2.7387387387387387, "grad_norm": 6.431788788385182, "learning_rate": 2.3082458921629857e-07, "loss": 0.22054603695869446, "step": 3040 }, { "epoch": 2.7396396396396394, "grad_norm": 8.864951894862525, "learning_rate": 2.2925310683439062e-07, "loss": 0.18980057537555695, "step": 3041 }, { "epoch": 2.7405405405405405, "grad_norm": 8.253867273417365, "learning_rate": 2.2768686664053074e-07, "loss": 0.39419782161712646, "step": 3042 }, { "epoch": 2.7414414414414416, "grad_norm": 14.513183317428417, "learning_rate": 2.261258703557323e-07, "loss": 0.3947722315788269, "step": 3043 }, { "epoch": 2.7423423423423423, "grad_norm": 7.339659464691865, "learning_rate": 2.2457011969524879e-07, "loss": 0.3583824038505554, "step": 3044 }, { "epoch": 2.743243243243243, "grad_norm": 10.915913341596568, "learning_rate": 2.2301961636856884e-07, "loss": 0.4267234802246094, "step": 3045 }, { "epoch": 2.744144144144144, "grad_norm": 11.527013447134877, "learning_rate": 2.214743620794152e-07, "loss": 0.7158124446868896, "step": 3046 }, { "epoch": 2.745045045045045, "grad_norm": 20.27771534058546, "learning_rate": 2.1993435852574297e-07, "loss": 0.3993403911590576, "step": 3047 }, { "epoch": 2.745945945945946, "grad_norm": 9.160585240600767, "learning_rate": 2.1839960739973687e-07, "loss": 0.377150297164917, "step": 3048 }, { "epoch": 2.746846846846847, "grad_norm": 7.85243737608308, "learning_rate": 2.168701103878118e-07, "loss": 0.35824477672576904, "step": 3049 }, { "epoch": 2.7477477477477477, "grad_norm": 11.30277465334945, "learning_rate": 2.1534586917060673e-07, "loss": 0.48433351516723633, "step": 3050 }, { "epoch": 2.7486486486486488, "grad_norm": 11.06453851381117, "learning_rate": 2.1382688542298912e-07, "loss": 0.24319399893283844, "step": 3051 }, { "epoch": 2.7495495495495494, "grad_norm": 14.001568751247557, "learning_rate": 2.1231316081404552e-07, "loss": 0.7487995028495789, "step": 3052 }, { "epoch": 2.7504504504504506, "grad_norm": 13.81950197404454, "learning_rate": 2.108046970070876e-07, "loss": 0.6276721358299255, "step": 3053 }, { "epoch": 2.7513513513513512, "grad_norm": 7.978721613829254, "learning_rate": 2.093014956596423e-07, "loss": 0.3058184087276459, "step": 3054 }, { "epoch": 2.7522522522522523, "grad_norm": 5.464143163192235, "learning_rate": 2.0780355842345722e-07, "loss": 0.20870988070964813, "step": 3055 }, { "epoch": 2.753153153153153, "grad_norm": 11.223152122559126, "learning_rate": 2.0631088694449352e-07, "loss": 0.33823296427726746, "step": 3056 }, { "epoch": 2.754054054054054, "grad_norm": 9.854687827814896, "learning_rate": 2.0482348286292864e-07, "loss": 0.4668673574924469, "step": 3057 }, { "epoch": 2.754954954954955, "grad_norm": 9.436338510663326, "learning_rate": 2.0334134781314907e-07, "loss": 0.5126863121986389, "step": 3058 }, { "epoch": 2.755855855855856, "grad_norm": 18.530337066924343, "learning_rate": 2.018644834237543e-07, "loss": 0.6312925815582275, "step": 3059 }, { "epoch": 2.756756756756757, "grad_norm": 9.128267070096916, "learning_rate": 2.0039289131755124e-07, "loss": 0.350289523601532, "step": 3060 }, { "epoch": 2.7576576576576577, "grad_norm": 10.091471025669732, "learning_rate": 1.989265731115525e-07, "loss": 0.6063660383224487, "step": 3061 }, { "epoch": 2.7585585585585584, "grad_norm": 9.75504633193664, "learning_rate": 1.9746553041697758e-07, "loss": 0.3913068175315857, "step": 3062 }, { "epoch": 2.7594594594594595, "grad_norm": 8.800586125231272, "learning_rate": 1.9600976483924782e-07, "loss": 0.33195608854293823, "step": 3063 }, { "epoch": 2.7603603603603606, "grad_norm": 8.337439084226812, "learning_rate": 1.9455927797798645e-07, "loss": 0.4687657058238983, "step": 3064 }, { "epoch": 2.7612612612612613, "grad_norm": 10.79715324778288, "learning_rate": 1.9311407142701576e-07, "loss": 0.5919493436813354, "step": 3065 }, { "epoch": 2.762162162162162, "grad_norm": 9.985293943064102, "learning_rate": 1.916741467743566e-07, "loss": 0.28231510519981384, "step": 3066 }, { "epoch": 2.763063063063063, "grad_norm": 9.314398130051678, "learning_rate": 1.902395056022266e-07, "loss": 0.3249053955078125, "step": 3067 }, { "epoch": 2.763963963963964, "grad_norm": 9.18327770307208, "learning_rate": 1.8881014948703536e-07, "loss": 0.40570229291915894, "step": 3068 }, { "epoch": 2.764864864864865, "grad_norm": 8.896525489631102, "learning_rate": 1.8738607999938818e-07, "loss": 0.18493056297302246, "step": 3069 }, { "epoch": 2.7657657657657655, "grad_norm": 8.969722816637464, "learning_rate": 1.8596729870407836e-07, "loss": 0.7123985290527344, "step": 3070 }, { "epoch": 2.7666666666666666, "grad_norm": 10.679117370967804, "learning_rate": 1.8455380716009162e-07, "loss": 0.26059746742248535, "step": 3071 }, { "epoch": 2.7675675675675677, "grad_norm": 10.097263761653606, "learning_rate": 1.8314560692059836e-07, "loss": 0.6941015720367432, "step": 3072 }, { "epoch": 2.7684684684684684, "grad_norm": 6.585430257572345, "learning_rate": 1.8174269953295631e-07, "loss": 0.35611647367477417, "step": 3073 }, { "epoch": 2.7693693693693695, "grad_norm": 7.595614278486373, "learning_rate": 1.8034508653870796e-07, "loss": 0.6131374835968018, "step": 3074 }, { "epoch": 2.77027027027027, "grad_norm": 9.468174216358069, "learning_rate": 1.7895276947357542e-07, "loss": 0.5632179975509644, "step": 3075 }, { "epoch": 2.7711711711711713, "grad_norm": 8.469459034669931, "learning_rate": 1.775657498674649e-07, "loss": 0.39638811349868774, "step": 3076 }, { "epoch": 2.772072072072072, "grad_norm": 9.257610469982092, "learning_rate": 1.7618402924445944e-07, "loss": 0.3628331124782562, "step": 3077 }, { "epoch": 2.772972972972973, "grad_norm": 9.385884073498866, "learning_rate": 1.7480760912282015e-07, "loss": 0.8038603067398071, "step": 3078 }, { "epoch": 2.7738738738738737, "grad_norm": 9.072494003276503, "learning_rate": 1.7343649101498327e-07, "loss": 0.3333243131637573, "step": 3079 }, { "epoch": 2.774774774774775, "grad_norm": 11.359674102953054, "learning_rate": 1.7207067642756092e-07, "loss": 0.6656917333602905, "step": 3080 }, { "epoch": 2.7756756756756755, "grad_norm": 8.296166497045828, "learning_rate": 1.707101668613348e-07, "loss": 0.4607012867927551, "step": 3081 }, { "epoch": 2.7765765765765766, "grad_norm": 7.216294213414764, "learning_rate": 1.693549638112607e-07, "loss": 0.4505841135978699, "step": 3082 }, { "epoch": 2.7774774774774773, "grad_norm": 8.30811616772895, "learning_rate": 1.6800506876645972e-07, "loss": 0.317621648311615, "step": 3083 }, { "epoch": 2.7783783783783784, "grad_norm": 12.855704246507058, "learning_rate": 1.6666048321022367e-07, "loss": 0.5960338115692139, "step": 3084 }, { "epoch": 2.7792792792792795, "grad_norm": 14.831071328132793, "learning_rate": 1.65321208620009e-07, "loss": 1.1567168235778809, "step": 3085 }, { "epoch": 2.78018018018018, "grad_norm": 6.972786870459175, "learning_rate": 1.6398724646743525e-07, "loss": 0.4468737840652466, "step": 3086 }, { "epoch": 2.781081081081081, "grad_norm": 7.818059401640994, "learning_rate": 1.6265859821828656e-07, "loss": 0.5314226150512695, "step": 3087 }, { "epoch": 2.781981981981982, "grad_norm": 9.666906330053216, "learning_rate": 1.6133526533250566e-07, "loss": 0.18195509910583496, "step": 3088 }, { "epoch": 2.782882882882883, "grad_norm": 13.45187665837708, "learning_rate": 1.6001724926419826e-07, "loss": 0.3830515146255493, "step": 3089 }, { "epoch": 2.7837837837837838, "grad_norm": 18.07292873051059, "learning_rate": 1.5870455146162367e-07, "loss": 0.5667542815208435, "step": 3090 }, { "epoch": 2.7846846846846844, "grad_norm": 7.150461444462728, "learning_rate": 1.5739717336720084e-07, "loss": 0.1438254415988922, "step": 3091 }, { "epoch": 2.7855855855855856, "grad_norm": 9.567340392432493, "learning_rate": 1.5609511641750118e-07, "loss": 0.2774874269962311, "step": 3092 }, { "epoch": 2.7864864864864867, "grad_norm": 11.980751288504987, "learning_rate": 1.5479838204324915e-07, "loss": 0.3390207886695862, "step": 3093 }, { "epoch": 2.7873873873873873, "grad_norm": 11.66756062576352, "learning_rate": 1.535069716693227e-07, "loss": 0.5234507918357849, "step": 3094 }, { "epoch": 2.7882882882882885, "grad_norm": 9.71211202192653, "learning_rate": 1.5222088671474732e-07, "loss": 0.7736992239952087, "step": 3095 }, { "epoch": 2.789189189189189, "grad_norm": 10.281658736805602, "learning_rate": 1.5094012859269814e-07, "loss": 0.4092180132865906, "step": 3096 }, { "epoch": 2.7900900900900902, "grad_norm": 9.823131199230207, "learning_rate": 1.4966469871049605e-07, "loss": 0.3046525716781616, "step": 3097 }, { "epoch": 2.790990990990991, "grad_norm": 11.885545549588448, "learning_rate": 1.4839459846960946e-07, "loss": 0.31213539838790894, "step": 3098 }, { "epoch": 2.791891891891892, "grad_norm": 12.373019318273954, "learning_rate": 1.4712982926564646e-07, "loss": 0.3035721480846405, "step": 3099 }, { "epoch": 2.7927927927927927, "grad_norm": 9.854784746700393, "learning_rate": 1.45870392488362e-07, "loss": 0.533233106136322, "step": 3100 }, { "epoch": 2.793693693693694, "grad_norm": 13.07465983745449, "learning_rate": 1.446162895216474e-07, "loss": 0.49974358081817627, "step": 3101 }, { "epoch": 2.7945945945945945, "grad_norm": 9.560357311500448, "learning_rate": 1.4336752174353706e-07, "loss": 0.3893500864505768, "step": 3102 }, { "epoch": 2.7954954954954956, "grad_norm": 8.622650578947777, "learning_rate": 1.421240905261989e-07, "loss": 0.25431448221206665, "step": 3103 }, { "epoch": 2.7963963963963963, "grad_norm": 27.539509816072687, "learning_rate": 1.4088599723594e-07, "loss": 1.0530831813812256, "step": 3104 }, { "epoch": 2.7972972972972974, "grad_norm": 8.544234342637827, "learning_rate": 1.3965324323320095e-07, "loss": 0.3481456935405731, "step": 3105 }, { "epoch": 2.798198198198198, "grad_norm": 17.30398356731005, "learning_rate": 1.3842582987255494e-07, "loss": 0.36002570390701294, "step": 3106 }, { "epoch": 2.799099099099099, "grad_norm": 17.35792289018547, "learning_rate": 1.3720375850270806e-07, "loss": 0.8561835289001465, "step": 3107 }, { "epoch": 2.8, "grad_norm": 9.690736973795328, "learning_rate": 1.3598703046649507e-07, "loss": 0.5120982527732849, "step": 3108 }, { "epoch": 2.800900900900901, "grad_norm": 6.887583688878893, "learning_rate": 1.3477564710088097e-07, "loss": 0.7675758600234985, "step": 3109 }, { "epoch": 2.801801801801802, "grad_norm": 9.776529585323216, "learning_rate": 1.3356960973695544e-07, "loss": 0.3310229778289795, "step": 3110 }, { "epoch": 2.8027027027027027, "grad_norm": 8.184489373151052, "learning_rate": 1.3236891969993727e-07, "loss": 0.4742078185081482, "step": 3111 }, { "epoch": 2.8036036036036034, "grad_norm": 9.172924808590428, "learning_rate": 1.311735783091661e-07, "loss": 0.9775221943855286, "step": 3112 }, { "epoch": 2.8045045045045045, "grad_norm": 19.61258494026224, "learning_rate": 1.2998358687810687e-07, "loss": 0.722926139831543, "step": 3113 }, { "epoch": 2.8054054054054056, "grad_norm": 11.216368934493955, "learning_rate": 1.2879894671434468e-07, "loss": 0.3220384418964386, "step": 3114 }, { "epoch": 2.8063063063063063, "grad_norm": 14.424103129637146, "learning_rate": 1.2761965911958385e-07, "loss": 0.3698599338531494, "step": 3115 }, { "epoch": 2.807207207207207, "grad_norm": 10.036482837996969, "learning_rate": 1.2644572538965006e-07, "loss": 0.3106807768344879, "step": 3116 }, { "epoch": 2.808108108108108, "grad_norm": 11.541431598800086, "learning_rate": 1.2527714681448255e-07, "loss": 0.7433834075927734, "step": 3117 }, { "epoch": 2.809009009009009, "grad_norm": 10.832494397814555, "learning_rate": 1.2411392467813922e-07, "loss": 0.3309785723686218, "step": 3118 }, { "epoch": 2.80990990990991, "grad_norm": 17.445178429968113, "learning_rate": 1.229560602587898e-07, "loss": 0.4417427182197571, "step": 3119 }, { "epoch": 2.810810810810811, "grad_norm": 6.774341516784999, "learning_rate": 1.2180355482871831e-07, "loss": 0.1555105745792389, "step": 3120 }, { "epoch": 2.8117117117117116, "grad_norm": 9.737932385587456, "learning_rate": 1.2065640965432003e-07, "loss": 0.4465380012989044, "step": 3121 }, { "epoch": 2.8126126126126128, "grad_norm": 10.501743399687943, "learning_rate": 1.1951462599609887e-07, "loss": 0.2202143371105194, "step": 3122 }, { "epoch": 2.8135135135135134, "grad_norm": 11.45244426375706, "learning_rate": 1.1837820510867015e-07, "loss": 0.7149019837379456, "step": 3123 }, { "epoch": 2.8144144144144145, "grad_norm": 9.244790397934926, "learning_rate": 1.1724714824075334e-07, "loss": 0.2830231785774231, "step": 3124 }, { "epoch": 2.815315315315315, "grad_norm": 12.698000526181215, "learning_rate": 1.1612145663517705e-07, "loss": 0.8680525422096252, "step": 3125 }, { "epoch": 2.8162162162162163, "grad_norm": 10.379207079081237, "learning_rate": 1.1500113152887126e-07, "loss": 0.22102048993110657, "step": 3126 }, { "epoch": 2.817117117117117, "grad_norm": 14.270298170762834, "learning_rate": 1.138861741528724e-07, "loss": 0.9436591267585754, "step": 3127 }, { "epoch": 2.818018018018018, "grad_norm": 13.26213957882597, "learning_rate": 1.1277658573231488e-07, "loss": 0.3559871017932892, "step": 3128 }, { "epoch": 2.8189189189189188, "grad_norm": 12.621166444214905, "learning_rate": 1.1167236748643729e-07, "loss": 0.5587183237075806, "step": 3129 }, { "epoch": 2.81981981981982, "grad_norm": 6.363494593930729, "learning_rate": 1.1057352062857463e-07, "loss": 0.16196994483470917, "step": 3130 }, { "epoch": 2.820720720720721, "grad_norm": 16.66892027792195, "learning_rate": 1.0948004636616216e-07, "loss": 0.39959704875946045, "step": 3131 }, { "epoch": 2.8216216216216217, "grad_norm": 10.70662559558524, "learning_rate": 1.0839194590072932e-07, "loss": 0.6919280290603638, "step": 3132 }, { "epoch": 2.8225225225225223, "grad_norm": 16.56026046820519, "learning_rate": 1.0730922042790192e-07, "loss": 1.7580801248550415, "step": 3133 }, { "epoch": 2.8234234234234235, "grad_norm": 14.565322998042095, "learning_rate": 1.0623187113739997e-07, "loss": 0.5585309267044067, "step": 3134 }, { "epoch": 2.8243243243243246, "grad_norm": 11.67670268575054, "learning_rate": 1.0515989921303427e-07, "loss": 0.7268367409706116, "step": 3135 }, { "epoch": 2.8252252252252252, "grad_norm": 7.28452322565033, "learning_rate": 1.0409330583271037e-07, "loss": 0.3579501807689667, "step": 3136 }, { "epoch": 2.826126126126126, "grad_norm": 13.967372160846384, "learning_rate": 1.0303209216841914e-07, "loss": 0.3260246515274048, "step": 3137 }, { "epoch": 2.827027027027027, "grad_norm": 8.193213648838276, "learning_rate": 1.0197625938624389e-07, "loss": 0.4330894947052002, "step": 3138 }, { "epoch": 2.827927927927928, "grad_norm": 7.1412374000814705, "learning_rate": 1.0092580864635326e-07, "loss": 0.4103037714958191, "step": 3139 }, { "epoch": 2.828828828828829, "grad_norm": 11.615310464679258, "learning_rate": 9.988074110300228e-08, "loss": 0.48591896891593933, "step": 3140 }, { "epoch": 2.8297297297297295, "grad_norm": 10.031412041495873, "learning_rate": 9.884105790453236e-08, "loss": 0.4669106602668762, "step": 3141 }, { "epoch": 2.8306306306306306, "grad_norm": 14.658941383654822, "learning_rate": 9.780676019336632e-08, "loss": 0.6931451559066772, "step": 3142 }, { "epoch": 2.8315315315315317, "grad_norm": 9.597417748980858, "learning_rate": 9.677784910601118e-08, "loss": 0.3629925549030304, "step": 3143 }, { "epoch": 2.8324324324324324, "grad_norm": 8.526941774340733, "learning_rate": 9.57543257730531e-08, "loss": 0.2318694144487381, "step": 3144 }, { "epoch": 2.8333333333333335, "grad_norm": 14.408431177296317, "learning_rate": 9.473619131916023e-08, "loss": 0.23618540167808533, "step": 3145 }, { "epoch": 2.834234234234234, "grad_norm": 18.225393040529905, "learning_rate": 9.372344686307655e-08, "loss": 0.3627060055732727, "step": 3146 }, { "epoch": 2.8351351351351353, "grad_norm": 9.946450643827884, "learning_rate": 9.271609351762689e-08, "loss": 0.5038033127784729, "step": 3147 }, { "epoch": 2.836036036036036, "grad_norm": 17.986414138042953, "learning_rate": 9.171413238970972e-08, "loss": 0.7872669696807861, "step": 3148 }, { "epoch": 2.836936936936937, "grad_norm": 15.127977317489515, "learning_rate": 9.071756458029823e-08, "loss": 0.351871132850647, "step": 3149 }, { "epoch": 2.8378378378378377, "grad_norm": 11.755022226996932, "learning_rate": 8.972639118444204e-08, "loss": 0.5332342386245728, "step": 3150 }, { "epoch": 2.838738738738739, "grad_norm": 9.076796300001938, "learning_rate": 8.874061329125939e-08, "loss": 0.41797953844070435, "step": 3151 }, { "epoch": 2.8396396396396395, "grad_norm": 9.904316253921376, "learning_rate": 8.776023198394378e-08, "loss": 0.40943342447280884, "step": 3152 }, { "epoch": 2.8405405405405406, "grad_norm": 11.498471545204916, "learning_rate": 8.678524833975522e-08, "loss": 0.980539083480835, "step": 3153 }, { "epoch": 2.8414414414414413, "grad_norm": 12.455088370462938, "learning_rate": 8.581566343002612e-08, "loss": 0.31767308712005615, "step": 3154 }, { "epoch": 2.8423423423423424, "grad_norm": 19.403465996640033, "learning_rate": 8.485147832015373e-08, "loss": 0.49638694524765015, "step": 3155 }, { "epoch": 2.8432432432432435, "grad_norm": 10.368178731883894, "learning_rate": 8.389269406960387e-08, "loss": 0.43598970770835876, "step": 3156 }, { "epoch": 2.844144144144144, "grad_norm": 8.295404932278744, "learning_rate": 8.293931173190661e-08, "loss": 0.3116639256477356, "step": 3157 }, { "epoch": 2.845045045045045, "grad_norm": 6.306775114970428, "learning_rate": 8.199133235465673e-08, "loss": 0.4269426465034485, "step": 3158 }, { "epoch": 2.845945945945946, "grad_norm": 19.210554105758707, "learning_rate": 8.104875697951209e-08, "loss": 1.2830239534378052, "step": 3159 }, { "epoch": 2.846846846846847, "grad_norm": 8.454835654551928, "learning_rate": 8.011158664219254e-08, "loss": 0.45161306858062744, "step": 3160 }, { "epoch": 2.8477477477477477, "grad_norm": 14.186434196907499, "learning_rate": 7.917982237247934e-08, "loss": 0.6510294675827026, "step": 3161 }, { "epoch": 2.8486486486486484, "grad_norm": 11.205340813121374, "learning_rate": 7.825346519421184e-08, "loss": 0.6806632280349731, "step": 3162 }, { "epoch": 2.8495495495495495, "grad_norm": 9.945073864497756, "learning_rate": 7.733251612529024e-08, "loss": 0.5039267539978027, "step": 3163 }, { "epoch": 2.8504504504504506, "grad_norm": 11.927579615893958, "learning_rate": 7.641697617767008e-08, "loss": 0.295940101146698, "step": 3164 }, { "epoch": 2.8513513513513513, "grad_norm": 8.153347724856607, "learning_rate": 7.550684635736493e-08, "loss": 0.4894489645957947, "step": 3165 }, { "epoch": 2.852252252252252, "grad_norm": 7.8067446075246165, "learning_rate": 7.460212766444264e-08, "loss": 0.24146123230457306, "step": 3166 }, { "epoch": 2.853153153153153, "grad_norm": 11.148370226951927, "learning_rate": 7.370282109302518e-08, "loss": 0.5095897912979126, "step": 3167 }, { "epoch": 2.854054054054054, "grad_norm": 16.32831093514919, "learning_rate": 7.280892763128766e-08, "loss": 0.377160906791687, "step": 3168 }, { "epoch": 2.854954954954955, "grad_norm": 15.46169225419276, "learning_rate": 7.192044826145772e-08, "loss": 0.8302932381629944, "step": 3169 }, { "epoch": 2.855855855855856, "grad_norm": 6.0030695746135825, "learning_rate": 7.103738395981385e-08, "loss": 0.32825881242752075, "step": 3170 }, { "epoch": 2.8567567567567567, "grad_norm": 13.482668870269196, "learning_rate": 7.015973569668322e-08, "loss": 0.3736622631549835, "step": 3171 }, { "epoch": 2.857657657657658, "grad_norm": 11.395520680124969, "learning_rate": 6.928750443644272e-08, "loss": 0.5651583075523376, "step": 3172 }, { "epoch": 2.8585585585585584, "grad_norm": 14.640898599397087, "learning_rate": 6.842069113751737e-08, "loss": 0.6570605039596558, "step": 3173 }, { "epoch": 2.8594594594594596, "grad_norm": 10.251298650104035, "learning_rate": 6.75592967523775e-08, "loss": 0.7141177654266357, "step": 3174 }, { "epoch": 2.8603603603603602, "grad_norm": 9.714012028774386, "learning_rate": 6.670332222753984e-08, "loss": 0.2499598264694214, "step": 3175 }, { "epoch": 2.8612612612612613, "grad_norm": 12.450561908834207, "learning_rate": 6.585276850356648e-08, "loss": 0.3667965531349182, "step": 3176 }, { "epoch": 2.862162162162162, "grad_norm": 10.371921540417889, "learning_rate": 6.500763651506092e-08, "loss": 0.553224503993988, "step": 3177 }, { "epoch": 2.863063063063063, "grad_norm": 11.27330409284051, "learning_rate": 6.416792719067144e-08, "loss": 0.5520440340042114, "step": 3178 }, { "epoch": 2.863963963963964, "grad_norm": 8.474832985083667, "learning_rate": 6.333364145308607e-08, "loss": 0.2104385495185852, "step": 3179 }, { "epoch": 2.864864864864865, "grad_norm": 12.020900380707227, "learning_rate": 6.250478021903372e-08, "loss": 0.4151121973991394, "step": 3180 }, { "epoch": 2.865765765765766, "grad_norm": 15.056271582278818, "learning_rate": 6.168134439928364e-08, "loss": 0.3605121970176697, "step": 3181 }, { "epoch": 2.8666666666666667, "grad_norm": 9.830821405315397, "learning_rate": 6.08633348986426e-08, "loss": 0.40905314683914185, "step": 3182 }, { "epoch": 2.8675675675675674, "grad_norm": 9.854758247754067, "learning_rate": 6.005075261595495e-08, "loss": 0.6086558103561401, "step": 3183 }, { "epoch": 2.8684684684684685, "grad_norm": 7.051560198748801, "learning_rate": 5.924359844410199e-08, "loss": 0.28362664580345154, "step": 3184 }, { "epoch": 2.8693693693693696, "grad_norm": 10.133311638483319, "learning_rate": 5.844187327000039e-08, "loss": 0.6039377450942993, "step": 3185 }, { "epoch": 2.8702702702702703, "grad_norm": 15.78844002760415, "learning_rate": 5.764557797460046e-08, "loss": 0.29750943183898926, "step": 3186 }, { "epoch": 2.871171171171171, "grad_norm": 14.254120783836019, "learning_rate": 5.685471343288673e-08, "loss": 0.5924072861671448, "step": 3187 }, { "epoch": 2.872072072072072, "grad_norm": 14.95662317760345, "learning_rate": 5.606928051387683e-08, "loss": 0.813556432723999, "step": 3188 }, { "epoch": 2.872972972972973, "grad_norm": 10.654281488495068, "learning_rate": 5.528928008061929e-08, "loss": 0.3268616795539856, "step": 3189 }, { "epoch": 2.873873873873874, "grad_norm": 12.154146024935937, "learning_rate": 5.451471299019351e-08, "loss": 0.29414835572242737, "step": 3190 }, { "epoch": 2.8747747747747745, "grad_norm": 6.0078985583140225, "learning_rate": 5.374558009370812e-08, "loss": 0.32455912232398987, "step": 3191 }, { "epoch": 2.8756756756756756, "grad_norm": 14.454379249339306, "learning_rate": 5.2981882236302075e-08, "loss": 0.5916678309440613, "step": 3192 }, { "epoch": 2.8765765765765767, "grad_norm": 14.42246258374093, "learning_rate": 5.222362025714078e-08, "loss": 0.45917463302612305, "step": 3193 }, { "epoch": 2.8774774774774774, "grad_norm": 9.503167642063966, "learning_rate": 5.1470794989416094e-08, "loss": 0.37499675154685974, "step": 3194 }, { "epoch": 2.8783783783783785, "grad_norm": 12.330288370609928, "learning_rate": 5.0723407260348524e-08, "loss": 0.7628058791160583, "step": 3195 }, { "epoch": 2.879279279279279, "grad_norm": 10.40154826672831, "learning_rate": 4.998145789118114e-08, "loss": 0.886831521987915, "step": 3196 }, { "epoch": 2.8801801801801803, "grad_norm": 11.827194363221171, "learning_rate": 4.92449476971818e-08, "loss": 0.3936125636100769, "step": 3197 }, { "epoch": 2.881081081081081, "grad_norm": 9.102660423404513, "learning_rate": 4.851387748764258e-08, "loss": 0.5932949185371399, "step": 3198 }, { "epoch": 2.881981981981982, "grad_norm": 16.516755542278325, "learning_rate": 4.7788248065877005e-08, "loss": 0.39385271072387695, "step": 3199 }, { "epoch": 2.8828828828828827, "grad_norm": 8.202026157088671, "learning_rate": 4.7068060229221165e-08, "loss": 0.25490644574165344, "step": 3200 }, { "epoch": 2.883783783783784, "grad_norm": 14.390882359360264, "learning_rate": 4.635331476903093e-08, "loss": 0.6286637187004089, "step": 3201 }, { "epoch": 2.8846846846846845, "grad_norm": 7.805575653302741, "learning_rate": 4.56440124706814e-08, "loss": 0.3353833854198456, "step": 3202 }, { "epoch": 2.8855855855855856, "grad_norm": 10.56625474443465, "learning_rate": 4.494015411356911e-08, "loss": 0.6304891109466553, "step": 3203 }, { "epoch": 2.8864864864864863, "grad_norm": 17.0211055096181, "learning_rate": 4.4241740471105964e-08, "loss": 0.7264276742935181, "step": 3204 }, { "epoch": 2.8873873873873874, "grad_norm": 9.271331154934623, "learning_rate": 4.3548772310723073e-08, "loss": 0.3691805303096771, "step": 3205 }, { "epoch": 2.8882882882882885, "grad_norm": 9.185473061048938, "learning_rate": 4.2861250393866346e-08, "loss": 0.3754459321498871, "step": 3206 }, { "epoch": 2.889189189189189, "grad_norm": 11.97298458517804, "learning_rate": 4.217917547599815e-08, "loss": 0.47409266233444214, "step": 3207 }, { "epoch": 2.89009009009009, "grad_norm": 7.687386462005143, "learning_rate": 4.150254830659617e-08, "loss": 0.31540459394454956, "step": 3208 }, { "epoch": 2.890990990990991, "grad_norm": 11.90742204866117, "learning_rate": 4.083136962915069e-08, "loss": 0.3772083818912506, "step": 3209 }, { "epoch": 2.891891891891892, "grad_norm": 9.649776961840816, "learning_rate": 4.0165640181165645e-08, "loss": 0.2596743106842041, "step": 3210 }, { "epoch": 2.8927927927927928, "grad_norm": 17.218222829814035, "learning_rate": 3.950536069415756e-08, "loss": 0.3328412175178528, "step": 3211 }, { "epoch": 2.8936936936936934, "grad_norm": 12.705058714692216, "learning_rate": 3.8850531893654396e-08, "loss": 0.5901011228561401, "step": 3212 }, { "epoch": 2.8945945945945946, "grad_norm": 11.454782903587885, "learning_rate": 3.8201154499193926e-08, "loss": 0.8860027194023132, "step": 3213 }, { "epoch": 2.8954954954954957, "grad_norm": 14.386825711705143, "learning_rate": 3.755722922432481e-08, "loss": 0.5607985854148865, "step": 3214 }, { "epoch": 2.8963963963963963, "grad_norm": 16.020674920081223, "learning_rate": 3.6918756776604947e-08, "loss": 0.8824067115783691, "step": 3215 }, { "epoch": 2.8972972972972975, "grad_norm": 15.152606697875525, "learning_rate": 3.628573785759926e-08, "loss": 1.8139207363128662, "step": 3216 }, { "epoch": 2.898198198198198, "grad_norm": 10.61658714741093, "learning_rate": 3.565817316288134e-08, "loss": 0.7253017425537109, "step": 3217 }, { "epoch": 2.8990990990990992, "grad_norm": 10.366162346838676, "learning_rate": 3.503606338203125e-08, "loss": 0.6917277574539185, "step": 3218 }, { "epoch": 2.9, "grad_norm": 8.260677970686553, "learning_rate": 3.441940919863551e-08, "loss": 0.27983352541923523, "step": 3219 }, { "epoch": 2.900900900900901, "grad_norm": 11.270135771023659, "learning_rate": 3.3808211290284886e-08, "loss": 0.40894824266433716, "step": 3220 }, { "epoch": 2.9018018018018017, "grad_norm": 12.724887344162209, "learning_rate": 3.3202470328576044e-08, "loss": 0.42095816135406494, "step": 3221 }, { "epoch": 2.902702702702703, "grad_norm": 11.13563960252325, "learning_rate": 3.260218697910877e-08, "loss": 0.2819526791572571, "step": 3222 }, { "epoch": 2.9036036036036035, "grad_norm": 11.215550017411921, "learning_rate": 3.2007361901485455e-08, "loss": 0.7610554695129395, "step": 3223 }, { "epoch": 2.9045045045045046, "grad_norm": 12.903076807253631, "learning_rate": 3.141799574931104e-08, "loss": 0.3334919810295105, "step": 3224 }, { "epoch": 2.9054054054054053, "grad_norm": 10.354816353326154, "learning_rate": 3.0834089170193035e-08, "loss": 0.29180341958999634, "step": 3225 }, { "epoch": 2.9063063063063064, "grad_norm": 7.847481672257274, "learning_rate": 3.025564280573878e-08, "loss": 0.17764884233474731, "step": 3226 }, { "epoch": 2.907207207207207, "grad_norm": 11.399254349729706, "learning_rate": 2.968265729155595e-08, "loss": 0.4697573482990265, "step": 3227 }, { "epoch": 2.908108108108108, "grad_norm": 11.498447581785861, "learning_rate": 2.9115133257253127e-08, "loss": 0.28961923718452454, "step": 3228 }, { "epoch": 2.909009009009009, "grad_norm": 14.312939853206586, "learning_rate": 2.8553071326435368e-08, "loss": 0.7388343811035156, "step": 3229 }, { "epoch": 2.90990990990991, "grad_norm": 10.45193006078936, "learning_rate": 2.7996472116707528e-08, "loss": 0.42354726791381836, "step": 3230 }, { "epoch": 2.910810810810811, "grad_norm": 10.57611073120087, "learning_rate": 2.7445336239671493e-08, "loss": 0.22124624252319336, "step": 3231 }, { "epoch": 2.9117117117117117, "grad_norm": 13.386066094937002, "learning_rate": 2.689966430092561e-08, "loss": 0.37491780519485474, "step": 3232 }, { "epoch": 2.9126126126126124, "grad_norm": 6.936665276055443, "learning_rate": 2.6359456900065804e-08, "loss": 0.29225584864616394, "step": 3233 }, { "epoch": 2.9135135135135135, "grad_norm": 16.309786733297162, "learning_rate": 2.5824714630680592e-08, "loss": 0.532295823097229, "step": 3234 }, { "epoch": 2.9144144144144146, "grad_norm": 8.917212056104644, "learning_rate": 2.529543808035606e-08, "loss": 0.5746088624000549, "step": 3235 }, { "epoch": 2.9153153153153153, "grad_norm": 11.758951296263918, "learning_rate": 2.4771627830670887e-08, "loss": 0.7198326587677002, "step": 3236 }, { "epoch": 2.916216216216216, "grad_norm": 11.722109065623714, "learning_rate": 2.42532844571991e-08, "loss": 0.38108983635902405, "step": 3237 }, { "epoch": 2.917117117117117, "grad_norm": 8.778991805597002, "learning_rate": 2.3740408529504545e-08, "loss": 0.29683244228363037, "step": 3238 }, { "epoch": 2.918018018018018, "grad_norm": 11.340263744942852, "learning_rate": 2.3233000611146418e-08, "loss": 0.25810110569000244, "step": 3239 }, { "epoch": 2.918918918918919, "grad_norm": 20.343205243742325, "learning_rate": 2.2731061259673726e-08, "loss": 0.9457066059112549, "step": 3240 }, { "epoch": 2.91981981981982, "grad_norm": 9.440779790937915, "learning_rate": 2.223459102662695e-08, "loss": 0.4441452622413635, "step": 3241 }, { "epoch": 2.9207207207207206, "grad_norm": 8.627164592105776, "learning_rate": 2.174359045753749e-08, "loss": 0.3240242004394531, "step": 3242 }, { "epoch": 2.9216216216216218, "grad_norm": 13.32946293311854, "learning_rate": 2.1258060091925435e-08, "loss": 0.3010973632335663, "step": 3243 }, { "epoch": 2.9225225225225224, "grad_norm": 8.65777187208865, "learning_rate": 2.0778000463301806e-08, "loss": 0.29864582419395447, "step": 3244 }, { "epoch": 2.9234234234234235, "grad_norm": 11.767462048271236, "learning_rate": 2.0303412099164644e-08, "loss": 0.6927505731582642, "step": 3245 }, { "epoch": 2.924324324324324, "grad_norm": 12.306775084343114, "learning_rate": 1.9834295521001246e-08, "loss": 0.5736360549926758, "step": 3246 }, { "epoch": 2.9252252252252253, "grad_norm": 11.26548459845472, "learning_rate": 1.9370651244285387e-08, "loss": 0.2858881652355194, "step": 3247 }, { "epoch": 2.926126126126126, "grad_norm": 16.04319591603478, "learning_rate": 1.8912479778478433e-08, "loss": 0.9276719689369202, "step": 3248 }, { "epoch": 2.927027027027027, "grad_norm": 7.64346475088744, "learning_rate": 1.8459781627028217e-08, "loss": 0.12898045778274536, "step": 3249 }, { "epoch": 2.9279279279279278, "grad_norm": 11.383312259923118, "learning_rate": 1.8012557287367394e-08, "loss": 0.5170237421989441, "step": 3250 }, { "epoch": 2.928828828828829, "grad_norm": 6.685223208905962, "learning_rate": 1.7570807250915644e-08, "loss": 0.19171756505966187, "step": 3251 }, { "epoch": 2.92972972972973, "grad_norm": 6.508299625365426, "learning_rate": 1.71345320030758e-08, "loss": 0.33970847725868225, "step": 3252 }, { "epoch": 2.9306306306306307, "grad_norm": 10.187987487393286, "learning_rate": 1.6703732023235496e-08, "loss": 0.22915905714035034, "step": 3253 }, { "epoch": 2.9315315315315313, "grad_norm": 11.619803682327976, "learning_rate": 1.6278407784766082e-08, "loss": 0.9763340353965759, "step": 3254 }, { "epoch": 2.9324324324324325, "grad_norm": 12.667673207105299, "learning_rate": 1.585855975502204e-08, "loss": 0.3878988027572632, "step": 3255 }, { "epoch": 2.9333333333333336, "grad_norm": 9.631707717548007, "learning_rate": 1.54441883953399e-08, "loss": 0.4277806282043457, "step": 3256 }, { "epoch": 2.9342342342342342, "grad_norm": 17.748096278629962, "learning_rate": 1.5035294161039882e-08, "loss": 0.5856724381446838, "step": 3257 }, { "epoch": 2.935135135135135, "grad_norm": 12.035952523628472, "learning_rate": 1.4631877501422032e-08, "loss": 0.4578258991241455, "step": 3258 }, { "epoch": 2.936036036036036, "grad_norm": 8.457963427946522, "learning_rate": 1.4233938859767871e-08, "loss": 0.6646236777305603, "step": 3259 }, { "epoch": 2.936936936936937, "grad_norm": 10.507764583532897, "learning_rate": 1.3841478673341512e-08, "loss": 0.3159530758857727, "step": 3260 }, { "epoch": 2.937837837837838, "grad_norm": 11.62267773905207, "learning_rate": 1.3454497373384113e-08, "loss": 0.3774804472923279, "step": 3261 }, { "epoch": 2.9387387387387385, "grad_norm": 13.679348849536279, "learning_rate": 1.3072995385119414e-08, "loss": 0.6660423874855042, "step": 3262 }, { "epoch": 2.9396396396396396, "grad_norm": 11.23337226179496, "learning_rate": 1.2696973127747647e-08, "loss": 0.28214752674102783, "step": 3263 }, { "epoch": 2.9405405405405407, "grad_norm": 9.58744730143738, "learning_rate": 1.232643101445108e-08, "loss": 0.4660099744796753, "step": 3264 }, { "epoch": 2.9414414414414414, "grad_norm": 14.04262320731674, "learning_rate": 1.1961369452386795e-08, "loss": 0.21129421889781952, "step": 3265 }, { "epoch": 2.9423423423423425, "grad_norm": 19.16431899563648, "learning_rate": 1.1601788842692807e-08, "loss": 0.33819282054901123, "step": 3266 }, { "epoch": 2.943243243243243, "grad_norm": 9.506793879772301, "learning_rate": 1.1247689580481947e-08, "loss": 0.5624006986618042, "step": 3267 }, { "epoch": 2.9441441441441443, "grad_norm": 10.162640460978432, "learning_rate": 1.0899072054846305e-08, "loss": 0.8094921708106995, "step": 3268 }, { "epoch": 2.945045045045045, "grad_norm": 14.152942052843835, "learning_rate": 1.0555936648853348e-08, "loss": 0.3222825527191162, "step": 3269 }, { "epoch": 2.945945945945946, "grad_norm": 15.300664485221406, "learning_rate": 1.021828373954592e-08, "loss": 0.6321902275085449, "step": 3270 }, { "epoch": 2.9468468468468467, "grad_norm": 12.215805986837733, "learning_rate": 9.886113697944454e-09, "loss": 0.2645990252494812, "step": 3271 }, { "epoch": 2.947747747747748, "grad_norm": 16.822664069918225, "learning_rate": 9.5594268890431e-09, "loss": 0.49241942167282104, "step": 3272 }, { "epoch": 2.9486486486486485, "grad_norm": 13.855780315823067, "learning_rate": 9.238223671812485e-09, "loss": 0.7209813594818115, "step": 3273 }, { "epoch": 2.9495495495495496, "grad_norm": 23.393334692473292, "learning_rate": 8.922504399195842e-09, "loss": 0.6558884382247925, "step": 3274 }, { "epoch": 2.9504504504504503, "grad_norm": 15.308223380421072, "learning_rate": 8.612269418111774e-09, "loss": 0.4492531418800354, "step": 3275 }, { "epoch": 2.9513513513513514, "grad_norm": 16.769733893507475, "learning_rate": 8.307519069453151e-09, "loss": 0.410381942987442, "step": 3276 }, { "epoch": 2.9522522522522525, "grad_norm": 11.516254837208239, "learning_rate": 8.008253688084888e-09, "loss": 0.2751805782318115, "step": 3277 }, { "epoch": 2.953153153153153, "grad_norm": 7.851398966518017, "learning_rate": 7.714473602845052e-09, "loss": 0.3193674683570862, "step": 3278 }, { "epoch": 2.954054054054054, "grad_norm": 10.429980279549909, "learning_rate": 7.426179136545974e-09, "loss": 0.38420000672340393, "step": 3279 }, { "epoch": 2.954954954954955, "grad_norm": 12.064185011961586, "learning_rate": 7.143370605970368e-09, "loss": 1.0094915628433228, "step": 3280 }, { "epoch": 2.955855855855856, "grad_norm": 13.338177731424542, "learning_rate": 6.866048321873542e-09, "loss": 0.2732114791870117, "step": 3281 }, { "epoch": 2.9567567567567568, "grad_norm": 20.053017422890512, "learning_rate": 6.594212588983406e-09, "loss": 0.7617955803871155, "step": 3282 }, { "epoch": 2.9576576576576574, "grad_norm": 12.413155284763997, "learning_rate": 6.327863705997139e-09, "loss": 0.27530044317245483, "step": 3283 }, { "epoch": 2.9585585585585585, "grad_norm": 12.599218730023678, "learning_rate": 6.067001965584518e-09, "loss": 0.7731039524078369, "step": 3284 }, { "epoch": 2.9594594594594597, "grad_norm": 13.009718545098739, "learning_rate": 5.811627654386254e-09, "loss": 0.6429943442344666, "step": 3285 }, { "epoch": 2.9603603603603603, "grad_norm": 9.607637414599234, "learning_rate": 5.561741053010661e-09, "loss": 0.5738593935966492, "step": 3286 }, { "epoch": 2.961261261261261, "grad_norm": 32.66930142798836, "learning_rate": 5.317342436039763e-09, "loss": 0.9971611499786377, "step": 3287 }, { "epoch": 2.962162162162162, "grad_norm": 12.894644868161095, "learning_rate": 5.078432072022077e-09, "loss": 0.5130572319030762, "step": 3288 }, { "epoch": 2.963063063063063, "grad_norm": 8.937462133502917, "learning_rate": 4.845010223478164e-09, "loss": 0.275386244058609, "step": 3289 }, { "epoch": 2.963963963963964, "grad_norm": 11.189473389354694, "learning_rate": 4.617077146895077e-09, "loss": 0.30980658531188965, "step": 3290 }, { "epoch": 2.964864864864865, "grad_norm": 15.211661756409315, "learning_rate": 4.394633092730805e-09, "loss": 0.43425253033638, "step": 3291 }, { "epoch": 2.9657657657657657, "grad_norm": 10.47742440838181, "learning_rate": 4.177678305411492e-09, "loss": 0.31434789299964905, "step": 3292 }, { "epoch": 2.966666666666667, "grad_norm": 9.206209319652455, "learning_rate": 3.9662130233303345e-09, "loss": 0.7355430722236633, "step": 3293 }, { "epoch": 2.9675675675675675, "grad_norm": 15.258599512761888, "learning_rate": 3.760237478849793e-09, "loss": 0.35044312477111816, "step": 3294 }, { "epoch": 2.9684684684684686, "grad_norm": 13.741751480249235, "learning_rate": 3.5597518982999346e-09, "loss": 0.2986290752887726, "step": 3295 }, { "epoch": 2.9693693693693692, "grad_norm": 9.978224016439794, "learning_rate": 3.364756501977873e-09, "loss": 0.3490036427974701, "step": 3296 }, { "epoch": 2.9702702702702704, "grad_norm": 7.5368131890603465, "learning_rate": 3.1752515041483247e-09, "loss": 0.6433002948760986, "step": 3297 }, { "epoch": 2.971171171171171, "grad_norm": 10.181178482993774, "learning_rate": 2.99123711304361e-09, "loss": 0.23826146125793457, "step": 3298 }, { "epoch": 2.972072072072072, "grad_norm": 10.131560555517467, "learning_rate": 2.812713530861433e-09, "loss": 0.525177538394928, "step": 3299 }, { "epoch": 2.972972972972973, "grad_norm": 10.39489764496429, "learning_rate": 2.639680953767099e-09, "loss": 0.195095032453537, "step": 3300 }, { "epoch": 2.973873873873874, "grad_norm": 13.987087200682417, "learning_rate": 2.472139571892407e-09, "loss": 0.6874642968177795, "step": 3301 }, { "epoch": 2.974774774774775, "grad_norm": 8.661015237190858, "learning_rate": 2.3100895693350946e-09, "loss": 0.22578030824661255, "step": 3302 }, { "epoch": 2.9756756756756757, "grad_norm": 15.381673495975587, "learning_rate": 2.1535311241582813e-09, "loss": 0.743354856967926, "step": 3303 }, { "epoch": 2.9765765765765764, "grad_norm": 10.7092406635183, "learning_rate": 2.002464408392135e-09, "loss": 0.2619395852088928, "step": 3304 }, { "epoch": 2.9774774774774775, "grad_norm": 10.59307492735683, "learning_rate": 1.8568895880305415e-09, "loss": 0.5660840272903442, "step": 3305 }, { "epoch": 2.9783783783783786, "grad_norm": 10.067598366755512, "learning_rate": 1.7168068230349888e-09, "loss": 0.33253148198127747, "step": 3306 }, { "epoch": 2.9792792792792793, "grad_norm": 12.974608439563891, "learning_rate": 1.5822162673312381e-09, "loss": 0.2697274386882782, "step": 3307 }, { "epoch": 2.98018018018018, "grad_norm": 27.404269266331838, "learning_rate": 1.4531180688087676e-09, "loss": 0.7453837394714355, "step": 3308 }, { "epoch": 2.981081081081081, "grad_norm": 14.465097789448581, "learning_rate": 1.329512369324104e-09, "loss": 0.4750048816204071, "step": 3309 }, { "epoch": 2.981981981981982, "grad_norm": 12.462823206244318, "learning_rate": 1.2113993046969364e-09, "loss": 0.40287506580352783, "step": 3310 }, { "epoch": 2.982882882882883, "grad_norm": 9.858468113014286, "learning_rate": 1.098779004712891e-09, "loss": 0.7160459756851196, "step": 3311 }, { "epoch": 2.983783783783784, "grad_norm": 16.969565265560316, "learning_rate": 9.91651593120757e-10, "loss": 0.35625120997428894, "step": 3312 }, { "epoch": 2.9846846846846846, "grad_norm": 5.912720869543489, "learning_rate": 8.900171876341512e-10, "loss": 0.18205636739730835, "step": 3313 }, { "epoch": 2.9855855855855857, "grad_norm": 8.846669541838752, "learning_rate": 7.938758999315177e-10, "loss": 0.27084779739379883, "step": 3314 }, { "epoch": 2.9864864864864864, "grad_norm": 13.619433761053685, "learning_rate": 7.032278356544631e-10, "loss": 0.3235873579978943, "step": 3315 }, { "epoch": 2.9873873873873875, "grad_norm": 7.542452056253367, "learning_rate": 6.180730944083113e-10, "loss": 0.5045291781425476, "step": 3316 }, { "epoch": 2.988288288288288, "grad_norm": 11.396204561080522, "learning_rate": 5.384117697632141e-10, "loss": 0.838004469871521, "step": 3317 }, { "epoch": 2.9891891891891893, "grad_norm": 10.738421241795267, "learning_rate": 4.642439492519302e-10, "loss": 0.3510596752166748, "step": 3318 }, { "epoch": 2.99009009009009, "grad_norm": 10.018582234235625, "learning_rate": 3.9556971437260117e-10, "loss": 0.31141674518585205, "step": 3319 }, { "epoch": 2.990990990990991, "grad_norm": 12.65715526365031, "learning_rate": 3.323891405848656e-10, "loss": 0.54250568151474, "step": 3320 }, { "epoch": 2.9918918918918918, "grad_norm": 11.954924640049924, "learning_rate": 2.747022973131896e-10, "loss": 0.4889550805091858, "step": 3321 }, { "epoch": 2.992792792792793, "grad_norm": 9.086500695806155, "learning_rate": 2.2250924794520179e-10, "loss": 0.6249538660049438, "step": 3322 }, { "epoch": 2.9936936936936935, "grad_norm": 11.255665880191785, "learning_rate": 1.758100498311377e-10, "loss": 0.4605242908000946, "step": 3323 }, { "epoch": 2.9945945945945946, "grad_norm": 8.32903205574773, "learning_rate": 1.3460475428495046e-10, "loss": 0.2069612741470337, "step": 3324 }, { "epoch": 2.9954954954954953, "grad_norm": 9.889481233057783, "learning_rate": 9.889340658375545e-11, "loss": 0.729750394821167, "step": 3325 }, { "epoch": 2.9963963963963964, "grad_norm": 17.60998683169484, "learning_rate": 6.867604596838551e-11, "loss": 0.2950170636177063, "step": 3326 }, { "epoch": 2.9972972972972975, "grad_norm": 10.062760639643265, "learning_rate": 4.395270564172549e-11, "loss": 0.38005125522613525, "step": 3327 }, { "epoch": 2.998198198198198, "grad_norm": 10.526283818405414, "learning_rate": 2.4723412770377708e-11, "loss": 0.3282592296600342, "step": 3328 }, { "epoch": 2.999099099099099, "grad_norm": 10.437410474566098, "learning_rate": 1.0988188484661878e-11, "loss": 0.7834963202476501, "step": 3329 }, { "epoch": 3.0, "grad_norm": 8.285804422799801, "learning_rate": 2.7470478758395702e-12, "loss": 0.17721490561962128, "step": 3330 }, { "epoch": 3.0, "step": 3330, "total_flos": 8949330124800.0, "train_loss": 1.6578803809801559, "train_runtime": 3829.0681, "train_samples_per_second": 3.476, "train_steps_per_second": 0.87 } ], "logging_steps": 1, "max_steps": 3330, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8949330124800.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }