Files
llama3-8b-full-pretrain-was…/trainer_state.json
ModelHub XC 45fcd09a4a 初始化项目,由ModelHub XC社区提供模型
Model: shuoxing/llama3-8b-full-pretrain-wash-c4-0-9m-bs4
Source: Original Platform
2026-06-12 17:02:21 +08:00

10040 lines
258 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1428,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0021008403361344537,
"grad_norm": 46.99030458326992,
"learning_rate": 0.0,
"loss": 4.722589492797852,
"step": 1
},
{
"epoch": 0.004201680672268907,
"grad_norm": 36.88474864262765,
"learning_rate": 6.993006993006993e-08,
"loss": 3.9686050415039062,
"step": 2
},
{
"epoch": 0.0063025210084033615,
"grad_norm": 33.07457670969746,
"learning_rate": 1.3986013986013987e-07,
"loss": 4.086915969848633,
"step": 3
},
{
"epoch": 0.008403361344537815,
"grad_norm": 31.625791708920385,
"learning_rate": 2.097902097902098e-07,
"loss": 4.020754814147949,
"step": 4
},
{
"epoch": 0.01050420168067227,
"grad_norm": 43.0872077594366,
"learning_rate": 2.7972027972027973e-07,
"loss": 4.11073112487793,
"step": 5
},
{
"epoch": 0.012605042016806723,
"grad_norm": 41.05815920768766,
"learning_rate": 3.496503496503497e-07,
"loss": 4.068889141082764,
"step": 6
},
{
"epoch": 0.014705882352941176,
"grad_norm": 38.60187500165403,
"learning_rate": 4.195804195804196e-07,
"loss": 3.9590301513671875,
"step": 7
},
{
"epoch": 0.01680672268907563,
"grad_norm": 39.6727205000689,
"learning_rate": 4.895104895104896e-07,
"loss": 3.7929787635803223,
"step": 8
},
{
"epoch": 0.018907563025210083,
"grad_norm": 35.21152216885091,
"learning_rate": 5.594405594405595e-07,
"loss": 4.345971584320068,
"step": 9
},
{
"epoch": 0.02100840336134454,
"grad_norm": 32.94311862999745,
"learning_rate": 6.293706293706295e-07,
"loss": 4.204797744750977,
"step": 10
},
{
"epoch": 0.023109243697478993,
"grad_norm": 35.708967266069514,
"learning_rate": 6.993006993006994e-07,
"loss": 4.2924957275390625,
"step": 11
},
{
"epoch": 0.025210084033613446,
"grad_norm": 35.068164747297715,
"learning_rate": 7.692307692307694e-07,
"loss": 3.6519017219543457,
"step": 12
},
{
"epoch": 0.0273109243697479,
"grad_norm": 36.012069511225576,
"learning_rate": 8.391608391608393e-07,
"loss": 3.5902950763702393,
"step": 13
},
{
"epoch": 0.029411764705882353,
"grad_norm": 31.883494522724174,
"learning_rate": 9.090909090909091e-07,
"loss": 3.751192092895508,
"step": 14
},
{
"epoch": 0.031512605042016806,
"grad_norm": 28.67530148457089,
"learning_rate": 9.790209790209791e-07,
"loss": 4.330526351928711,
"step": 15
},
{
"epoch": 0.03361344537815126,
"grad_norm": 36.87930031460684,
"learning_rate": 1.0489510489510491e-06,
"loss": 3.7747349739074707,
"step": 16
},
{
"epoch": 0.03571428571428571,
"grad_norm": 34.94283148044396,
"learning_rate": 1.118881118881119e-06,
"loss": 3.6174468994140625,
"step": 17
},
{
"epoch": 0.037815126050420166,
"grad_norm": 26.489554272646977,
"learning_rate": 1.188811188811189e-06,
"loss": 3.4348971843719482,
"step": 18
},
{
"epoch": 0.03991596638655462,
"grad_norm": 25.603886661513403,
"learning_rate": 1.258741258741259e-06,
"loss": 3.4862470626831055,
"step": 19
},
{
"epoch": 0.04201680672268908,
"grad_norm": 22.404814809342252,
"learning_rate": 1.3286713286713287e-06,
"loss": 3.7471625804901123,
"step": 20
},
{
"epoch": 0.04411764705882353,
"grad_norm": 21.15277638751192,
"learning_rate": 1.3986013986013987e-06,
"loss": 3.6562182903289795,
"step": 21
},
{
"epoch": 0.046218487394957986,
"grad_norm": 20.270881263670095,
"learning_rate": 1.4685314685314685e-06,
"loss": 2.944753408432007,
"step": 22
},
{
"epoch": 0.04831932773109244,
"grad_norm": 22.812966816264836,
"learning_rate": 1.5384615384615387e-06,
"loss": 4.277539253234863,
"step": 23
},
{
"epoch": 0.05042016806722689,
"grad_norm": 21.380979723581284,
"learning_rate": 1.6083916083916085e-06,
"loss": 4.103379726409912,
"step": 24
},
{
"epoch": 0.052521008403361345,
"grad_norm": 13.548338819677783,
"learning_rate": 1.6783216783216785e-06,
"loss": 3.516192674636841,
"step": 25
},
{
"epoch": 0.0546218487394958,
"grad_norm": 16.574184790133323,
"learning_rate": 1.7482517482517483e-06,
"loss": 3.054426908493042,
"step": 26
},
{
"epoch": 0.05672268907563025,
"grad_norm": 14.817846975349166,
"learning_rate": 1.8181818181818183e-06,
"loss": 3.598344564437866,
"step": 27
},
{
"epoch": 0.058823529411764705,
"grad_norm": 13.230202987729585,
"learning_rate": 1.888111888111888e-06,
"loss": 2.1566905975341797,
"step": 28
},
{
"epoch": 0.06092436974789916,
"grad_norm": 19.612310968262104,
"learning_rate": 1.9580419580419583e-06,
"loss": 2.7493889331817627,
"step": 29
},
{
"epoch": 0.06302521008403361,
"grad_norm": 18.334666543367657,
"learning_rate": 2.027972027972028e-06,
"loss": 3.7484190464019775,
"step": 30
},
{
"epoch": 0.06512605042016807,
"grad_norm": 15.812972082251932,
"learning_rate": 2.0979020979020983e-06,
"loss": 3.4763312339782715,
"step": 31
},
{
"epoch": 0.06722689075630252,
"grad_norm": 18.340243585590446,
"learning_rate": 2.167832167832168e-06,
"loss": 4.1537184715271,
"step": 32
},
{
"epoch": 0.06932773109243698,
"grad_norm": 9.470566142580898,
"learning_rate": 2.237762237762238e-06,
"loss": 3.949978828430176,
"step": 33
},
{
"epoch": 0.07142857142857142,
"grad_norm": 15.047162012043515,
"learning_rate": 2.307692307692308e-06,
"loss": 3.095123291015625,
"step": 34
},
{
"epoch": 0.07352941176470588,
"grad_norm": 15.619490386855553,
"learning_rate": 2.377622377622378e-06,
"loss": 3.6232047080993652,
"step": 35
},
{
"epoch": 0.07563025210084033,
"grad_norm": 12.996399577415676,
"learning_rate": 2.4475524475524477e-06,
"loss": 3.801804304122925,
"step": 36
},
{
"epoch": 0.07773109243697479,
"grad_norm": 7.924270548246447,
"learning_rate": 2.517482517482518e-06,
"loss": 2.909287452697754,
"step": 37
},
{
"epoch": 0.07983193277310924,
"grad_norm": 10.838167134028488,
"learning_rate": 2.5874125874125877e-06,
"loss": 3.283078670501709,
"step": 38
},
{
"epoch": 0.0819327731092437,
"grad_norm": 12.236334826312302,
"learning_rate": 2.6573426573426574e-06,
"loss": 2.9224965572357178,
"step": 39
},
{
"epoch": 0.08403361344537816,
"grad_norm": 10.59808836361908,
"learning_rate": 2.7272727272727272e-06,
"loss": 3.591977119445801,
"step": 40
},
{
"epoch": 0.0861344537815126,
"grad_norm": 9.295669805450128,
"learning_rate": 2.7972027972027974e-06,
"loss": 3.0213565826416016,
"step": 41
},
{
"epoch": 0.08823529411764706,
"grad_norm": 12.563423620415891,
"learning_rate": 2.8671328671328672e-06,
"loss": 2.9183509349823,
"step": 42
},
{
"epoch": 0.09033613445378151,
"grad_norm": 13.858660538396043,
"learning_rate": 2.937062937062937e-06,
"loss": 3.4748919010162354,
"step": 43
},
{
"epoch": 0.09243697478991597,
"grad_norm": 11.043040598415395,
"learning_rate": 3.006993006993007e-06,
"loss": 3.53951096534729,
"step": 44
},
{
"epoch": 0.09453781512605042,
"grad_norm": 12.201335477546305,
"learning_rate": 3.0769230769230774e-06,
"loss": 3.2075607776641846,
"step": 45
},
{
"epoch": 0.09663865546218488,
"grad_norm": 20.281483402633803,
"learning_rate": 3.1468531468531472e-06,
"loss": 3.2893571853637695,
"step": 46
},
{
"epoch": 0.09873949579831932,
"grad_norm": 10.008468666819498,
"learning_rate": 3.216783216783217e-06,
"loss": 3.47295880317688,
"step": 47
},
{
"epoch": 0.10084033613445378,
"grad_norm": 11.407538553004894,
"learning_rate": 3.286713286713287e-06,
"loss": 3.5495269298553467,
"step": 48
},
{
"epoch": 0.10294117647058823,
"grad_norm": 16.252417297798132,
"learning_rate": 3.356643356643357e-06,
"loss": 3.218782901763916,
"step": 49
},
{
"epoch": 0.10504201680672269,
"grad_norm": 8.752146553121406,
"learning_rate": 3.426573426573427e-06,
"loss": 2.612854242324829,
"step": 50
},
{
"epoch": 0.10714285714285714,
"grad_norm": 14.782692853689836,
"learning_rate": 3.4965034965034966e-06,
"loss": 3.0805444717407227,
"step": 51
},
{
"epoch": 0.1092436974789916,
"grad_norm": 14.664178996815842,
"learning_rate": 3.566433566433567e-06,
"loss": 3.1539719104766846,
"step": 52
},
{
"epoch": 0.11134453781512606,
"grad_norm": 13.158498079025986,
"learning_rate": 3.6363636363636366e-06,
"loss": 3.5745811462402344,
"step": 53
},
{
"epoch": 0.1134453781512605,
"grad_norm": 9.661944205457672,
"learning_rate": 3.7062937062937064e-06,
"loss": 3.033264398574829,
"step": 54
},
{
"epoch": 0.11554621848739496,
"grad_norm": 8.534767379388418,
"learning_rate": 3.776223776223776e-06,
"loss": 2.5727319717407227,
"step": 55
},
{
"epoch": 0.11764705882352941,
"grad_norm": 10.446726865588245,
"learning_rate": 3.846153846153847e-06,
"loss": 3.4801394939422607,
"step": 56
},
{
"epoch": 0.11974789915966387,
"grad_norm": 11.510497882977212,
"learning_rate": 3.916083916083917e-06,
"loss": 3.253239631652832,
"step": 57
},
{
"epoch": 0.12184873949579832,
"grad_norm": 12.480969279334285,
"learning_rate": 3.986013986013986e-06,
"loss": 3.0049266815185547,
"step": 58
},
{
"epoch": 0.12394957983193278,
"grad_norm": 10.926998541566615,
"learning_rate": 4.055944055944056e-06,
"loss": 3.13586688041687,
"step": 59
},
{
"epoch": 0.12605042016806722,
"grad_norm": 15.080151132986066,
"learning_rate": 4.125874125874127e-06,
"loss": 3.5970468521118164,
"step": 60
},
{
"epoch": 0.12815126050420167,
"grad_norm": 11.40302094802426,
"learning_rate": 4.195804195804197e-06,
"loss": 3.0423130989074707,
"step": 61
},
{
"epoch": 0.13025210084033614,
"grad_norm": 27.877774734458356,
"learning_rate": 4.265734265734266e-06,
"loss": 3.270495891571045,
"step": 62
},
{
"epoch": 0.1323529411764706,
"grad_norm": 14.517043785366944,
"learning_rate": 4.335664335664336e-06,
"loss": 3.3109726905822754,
"step": 63
},
{
"epoch": 0.13445378151260504,
"grad_norm": 9.703645186786849,
"learning_rate": 4.405594405594406e-06,
"loss": 2.8192973136901855,
"step": 64
},
{
"epoch": 0.13655462184873948,
"grad_norm": 7.165620671720677,
"learning_rate": 4.475524475524476e-06,
"loss": 2.6368956565856934,
"step": 65
},
{
"epoch": 0.13865546218487396,
"grad_norm": 8.390508554521247,
"learning_rate": 4.5454545454545455e-06,
"loss": 3.2420871257781982,
"step": 66
},
{
"epoch": 0.1407563025210084,
"grad_norm": 13.495244668273582,
"learning_rate": 4.615384615384616e-06,
"loss": 3.4662106037139893,
"step": 67
},
{
"epoch": 0.14285714285714285,
"grad_norm": 8.727556576037161,
"learning_rate": 4.685314685314686e-06,
"loss": 2.52485728263855,
"step": 68
},
{
"epoch": 0.14495798319327732,
"grad_norm": 7.972842185352863,
"learning_rate": 4.755244755244756e-06,
"loss": 2.94364595413208,
"step": 69
},
{
"epoch": 0.14705882352941177,
"grad_norm": 9.543376366698592,
"learning_rate": 4.8251748251748255e-06,
"loss": 3.080875873565674,
"step": 70
},
{
"epoch": 0.14915966386554622,
"grad_norm": 9.776294776088129,
"learning_rate": 4.895104895104895e-06,
"loss": 2.779900550842285,
"step": 71
},
{
"epoch": 0.15126050420168066,
"grad_norm": 13.30903798143632,
"learning_rate": 4.965034965034965e-06,
"loss": 2.5541608333587646,
"step": 72
},
{
"epoch": 0.15336134453781514,
"grad_norm": 15.821626595005261,
"learning_rate": 5.034965034965036e-06,
"loss": 3.3032145500183105,
"step": 73
},
{
"epoch": 0.15546218487394958,
"grad_norm": 16.2233191932233,
"learning_rate": 5.1048951048951055e-06,
"loss": 3.302570104598999,
"step": 74
},
{
"epoch": 0.15756302521008403,
"grad_norm": 12.108052548372182,
"learning_rate": 5.174825174825175e-06,
"loss": 3.084743022918701,
"step": 75
},
{
"epoch": 0.15966386554621848,
"grad_norm": 12.123207907469205,
"learning_rate": 5.244755244755245e-06,
"loss": 2.839994430541992,
"step": 76
},
{
"epoch": 0.16176470588235295,
"grad_norm": 27.162201978657112,
"learning_rate": 5.314685314685315e-06,
"loss": 2.8261585235595703,
"step": 77
},
{
"epoch": 0.1638655462184874,
"grad_norm": 9.26220027446702,
"learning_rate": 5.384615384615385e-06,
"loss": 3.022369861602783,
"step": 78
},
{
"epoch": 0.16596638655462184,
"grad_norm": 34.01052504369158,
"learning_rate": 5.4545454545454545e-06,
"loss": 3.11270809173584,
"step": 79
},
{
"epoch": 0.16806722689075632,
"grad_norm": 10.318191696420305,
"learning_rate": 5.524475524475524e-06,
"loss": 2.8419973850250244,
"step": 80
},
{
"epoch": 0.17016806722689076,
"grad_norm": 8.227880656419073,
"learning_rate": 5.594405594405595e-06,
"loss": 3.14296555519104,
"step": 81
},
{
"epoch": 0.1722689075630252,
"grad_norm": 9.40271889928186,
"learning_rate": 5.664335664335665e-06,
"loss": 2.8033950328826904,
"step": 82
},
{
"epoch": 0.17436974789915966,
"grad_norm": 9.95443701525972,
"learning_rate": 5.7342657342657345e-06,
"loss": 3.087614059448242,
"step": 83
},
{
"epoch": 0.17647058823529413,
"grad_norm": 9.899264562788812,
"learning_rate": 5.804195804195804e-06,
"loss": 2.7504851818084717,
"step": 84
},
{
"epoch": 0.17857142857142858,
"grad_norm": 14.065492890913543,
"learning_rate": 5.874125874125874e-06,
"loss": 2.701443672180176,
"step": 85
},
{
"epoch": 0.18067226890756302,
"grad_norm": 12.602747808400954,
"learning_rate": 5.944055944055944e-06,
"loss": 2.8965351581573486,
"step": 86
},
{
"epoch": 0.18277310924369747,
"grad_norm": 10.478287423381614,
"learning_rate": 6.013986013986014e-06,
"loss": 2.9607667922973633,
"step": 87
},
{
"epoch": 0.18487394957983194,
"grad_norm": 25.168903954415445,
"learning_rate": 6.083916083916085e-06,
"loss": 3.2360849380493164,
"step": 88
},
{
"epoch": 0.1869747899159664,
"grad_norm": 9.706474901305377,
"learning_rate": 6.153846153846155e-06,
"loss": 3.146829605102539,
"step": 89
},
{
"epoch": 0.18907563025210083,
"grad_norm": 16.976357238619705,
"learning_rate": 6.223776223776225e-06,
"loss": 3.017669200897217,
"step": 90
},
{
"epoch": 0.19117647058823528,
"grad_norm": 22.76924701111939,
"learning_rate": 6.2937062937062944e-06,
"loss": 3.4739527702331543,
"step": 91
},
{
"epoch": 0.19327731092436976,
"grad_norm": 10.061400086768733,
"learning_rate": 6.363636363636364e-06,
"loss": 2.8482136726379395,
"step": 92
},
{
"epoch": 0.1953781512605042,
"grad_norm": 12.711145684012218,
"learning_rate": 6.433566433566434e-06,
"loss": 2.7700202465057373,
"step": 93
},
{
"epoch": 0.19747899159663865,
"grad_norm": 19.156479556141598,
"learning_rate": 6.503496503496504e-06,
"loss": 3.109806537628174,
"step": 94
},
{
"epoch": 0.19957983193277312,
"grad_norm": 7.981944912040402,
"learning_rate": 6.573426573426574e-06,
"loss": 2.9637131690979004,
"step": 95
},
{
"epoch": 0.20168067226890757,
"grad_norm": 12.84352715723152,
"learning_rate": 6.643356643356644e-06,
"loss": 2.846522808074951,
"step": 96
},
{
"epoch": 0.20378151260504201,
"grad_norm": 17.72741270084134,
"learning_rate": 6.713286713286714e-06,
"loss": 3.3485140800476074,
"step": 97
},
{
"epoch": 0.20588235294117646,
"grad_norm": 13.533003488049717,
"learning_rate": 6.783216783216784e-06,
"loss": 3.014303207397461,
"step": 98
},
{
"epoch": 0.20798319327731093,
"grad_norm": 8.522856642426069,
"learning_rate": 6.853146853146854e-06,
"loss": 2.6768596172332764,
"step": 99
},
{
"epoch": 0.21008403361344538,
"grad_norm": 28.089483697370316,
"learning_rate": 6.923076923076923e-06,
"loss": 2.9336276054382324,
"step": 100
},
{
"epoch": 0.21218487394957983,
"grad_norm": 10.595985912398088,
"learning_rate": 6.993006993006993e-06,
"loss": 3.103717803955078,
"step": 101
},
{
"epoch": 0.21428571428571427,
"grad_norm": 9.980132596619391,
"learning_rate": 7.062937062937063e-06,
"loss": 2.7759556770324707,
"step": 102
},
{
"epoch": 0.21638655462184875,
"grad_norm": 9.86026405652693,
"learning_rate": 7.132867132867134e-06,
"loss": 2.586292266845703,
"step": 103
},
{
"epoch": 0.2184873949579832,
"grad_norm": 15.473317115776915,
"learning_rate": 7.202797202797203e-06,
"loss": 3.109880208969116,
"step": 104
},
{
"epoch": 0.22058823529411764,
"grad_norm": 12.647734541011893,
"learning_rate": 7.272727272727273e-06,
"loss": 2.7075915336608887,
"step": 105
},
{
"epoch": 0.22268907563025211,
"grad_norm": 13.506863668083897,
"learning_rate": 7.342657342657343e-06,
"loss": 3.034566879272461,
"step": 106
},
{
"epoch": 0.22478991596638656,
"grad_norm": 10.413965863492892,
"learning_rate": 7.412587412587413e-06,
"loss": 2.3479254245758057,
"step": 107
},
{
"epoch": 0.226890756302521,
"grad_norm": 11.563038534176888,
"learning_rate": 7.4825174825174825e-06,
"loss": 2.9856462478637695,
"step": 108
},
{
"epoch": 0.22899159663865545,
"grad_norm": 12.710111708974129,
"learning_rate": 7.552447552447552e-06,
"loss": 3.462696075439453,
"step": 109
},
{
"epoch": 0.23109243697478993,
"grad_norm": 11.076816381042432,
"learning_rate": 7.622377622377622e-06,
"loss": 3.270888566970825,
"step": 110
},
{
"epoch": 0.23319327731092437,
"grad_norm": 11.212503376143607,
"learning_rate": 7.692307692307694e-06,
"loss": 3.048227310180664,
"step": 111
},
{
"epoch": 0.23529411764705882,
"grad_norm": 12.400573941878047,
"learning_rate": 7.762237762237763e-06,
"loss": 3.2194204330444336,
"step": 112
},
{
"epoch": 0.23739495798319327,
"grad_norm": 15.219804312233611,
"learning_rate": 7.832167832167833e-06,
"loss": 3.6809778213500977,
"step": 113
},
{
"epoch": 0.23949579831932774,
"grad_norm": 14.217254787332546,
"learning_rate": 7.902097902097902e-06,
"loss": 2.912044048309326,
"step": 114
},
{
"epoch": 0.2415966386554622,
"grad_norm": 16.623326169718574,
"learning_rate": 7.972027972027973e-06,
"loss": 3.1021275520324707,
"step": 115
},
{
"epoch": 0.24369747899159663,
"grad_norm": 9.18687643151976,
"learning_rate": 8.041958041958042e-06,
"loss": 3.089829683303833,
"step": 116
},
{
"epoch": 0.24579831932773108,
"grad_norm": 16.772079088582387,
"learning_rate": 8.111888111888112e-06,
"loss": 3.4016504287719727,
"step": 117
},
{
"epoch": 0.24789915966386555,
"grad_norm": 68.62004336442155,
"learning_rate": 8.181818181818183e-06,
"loss": 3.502598285675049,
"step": 118
},
{
"epoch": 0.25,
"grad_norm": 9.091493777708147,
"learning_rate": 8.251748251748254e-06,
"loss": 3.0750184059143066,
"step": 119
},
{
"epoch": 0.25210084033613445,
"grad_norm": 13.607326745790957,
"learning_rate": 8.321678321678323e-06,
"loss": 2.8168656826019287,
"step": 120
},
{
"epoch": 0.2542016806722689,
"grad_norm": 8.683002515816812,
"learning_rate": 8.391608391608393e-06,
"loss": 2.3565826416015625,
"step": 121
},
{
"epoch": 0.25630252100840334,
"grad_norm": 23.36617403509855,
"learning_rate": 8.461538461538462e-06,
"loss": 2.972810745239258,
"step": 122
},
{
"epoch": 0.25840336134453784,
"grad_norm": 8.174637732136828,
"learning_rate": 8.531468531468533e-06,
"loss": 2.9700140953063965,
"step": 123
},
{
"epoch": 0.2605042016806723,
"grad_norm": 10.47900789596826,
"learning_rate": 8.601398601398602e-06,
"loss": 3.0162484645843506,
"step": 124
},
{
"epoch": 0.26260504201680673,
"grad_norm": 16.076782876444586,
"learning_rate": 8.671328671328672e-06,
"loss": 3.109422445297241,
"step": 125
},
{
"epoch": 0.2647058823529412,
"grad_norm": 33.666522199585756,
"learning_rate": 8.741258741258743e-06,
"loss": 2.6802124977111816,
"step": 126
},
{
"epoch": 0.2668067226890756,
"grad_norm": 12.537622047835336,
"learning_rate": 8.811188811188812e-06,
"loss": 2.6609840393066406,
"step": 127
},
{
"epoch": 0.2689075630252101,
"grad_norm": 11.767487098574284,
"learning_rate": 8.881118881118883e-06,
"loss": 3.0896430015563965,
"step": 128
},
{
"epoch": 0.2710084033613445,
"grad_norm": 7.737680571917604,
"learning_rate": 8.951048951048951e-06,
"loss": 3.2370247840881348,
"step": 129
},
{
"epoch": 0.27310924369747897,
"grad_norm": 13.8395022595692,
"learning_rate": 9.020979020979022e-06,
"loss": 2.8461947441101074,
"step": 130
},
{
"epoch": 0.27521008403361347,
"grad_norm": 27.87627626250655,
"learning_rate": 9.090909090909091e-06,
"loss": 3.480252742767334,
"step": 131
},
{
"epoch": 0.2773109243697479,
"grad_norm": 8.067136701179228,
"learning_rate": 9.160839160839162e-06,
"loss": 2.8424923419952393,
"step": 132
},
{
"epoch": 0.27941176470588236,
"grad_norm": 12.474203656062087,
"learning_rate": 9.230769230769232e-06,
"loss": 3.4489340782165527,
"step": 133
},
{
"epoch": 0.2815126050420168,
"grad_norm": 9.56092760411321,
"learning_rate": 9.300699300699301e-06,
"loss": 2.48683500289917,
"step": 134
},
{
"epoch": 0.28361344537815125,
"grad_norm": 10.45857911102664,
"learning_rate": 9.370629370629372e-06,
"loss": 2.975668430328369,
"step": 135
},
{
"epoch": 0.2857142857142857,
"grad_norm": 9.79706691198192,
"learning_rate": 9.44055944055944e-06,
"loss": 3.163745403289795,
"step": 136
},
{
"epoch": 0.28781512605042014,
"grad_norm": 16.663615728677826,
"learning_rate": 9.510489510489511e-06,
"loss": 3.3047399520874023,
"step": 137
},
{
"epoch": 0.28991596638655465,
"grad_norm": 10.093105336690149,
"learning_rate": 9.58041958041958e-06,
"loss": 2.901014804840088,
"step": 138
},
{
"epoch": 0.2920168067226891,
"grad_norm": 10.712099293339499,
"learning_rate": 9.650349650349651e-06,
"loss": 2.4749934673309326,
"step": 139
},
{
"epoch": 0.29411764705882354,
"grad_norm": 12.306067699743261,
"learning_rate": 9.72027972027972e-06,
"loss": 2.735682964324951,
"step": 140
},
{
"epoch": 0.296218487394958,
"grad_norm": 8.899689488937057,
"learning_rate": 9.79020979020979e-06,
"loss": 1.6851799488067627,
"step": 141
},
{
"epoch": 0.29831932773109243,
"grad_norm": 15.412895468970188,
"learning_rate": 9.860139860139861e-06,
"loss": 2.4892358779907227,
"step": 142
},
{
"epoch": 0.3004201680672269,
"grad_norm": 13.718632928552148,
"learning_rate": 9.93006993006993e-06,
"loss": 3.152186870574951,
"step": 143
},
{
"epoch": 0.3025210084033613,
"grad_norm": 9.63303607414013,
"learning_rate": 1e-05,
"loss": 2.4623451232910156,
"step": 144
},
{
"epoch": 0.30462184873949577,
"grad_norm": 9.020782417307544,
"learning_rate": 9.999985057155316e-06,
"loss": 2.3573660850524902,
"step": 145
},
{
"epoch": 0.3067226890756303,
"grad_norm": 15.431206065267094,
"learning_rate": 9.999940228710581e-06,
"loss": 3.248166561126709,
"step": 146
},
{
"epoch": 0.3088235294117647,
"grad_norm": 9.624481227031932,
"learning_rate": 9.99986551493374e-06,
"loss": 3.073438882827759,
"step": 147
},
{
"epoch": 0.31092436974789917,
"grad_norm": 12.24535420873494,
"learning_rate": 9.999760916271368e-06,
"loss": 3.175532579421997,
"step": 148
},
{
"epoch": 0.3130252100840336,
"grad_norm": 8.43469444061833,
"learning_rate": 9.999626433348664e-06,
"loss": 2.2849655151367188,
"step": 149
},
{
"epoch": 0.31512605042016806,
"grad_norm": 13.307775899632185,
"learning_rate": 9.999462066969451e-06,
"loss": 2.7922751903533936,
"step": 150
},
{
"epoch": 0.3172268907563025,
"grad_norm": 11.454291564861384,
"learning_rate": 9.999267818116173e-06,
"loss": 3.03188419342041,
"step": 151
},
{
"epoch": 0.31932773109243695,
"grad_norm": 16.712527557096042,
"learning_rate": 9.999043687949878e-06,
"loss": 3.3826239109039307,
"step": 152
},
{
"epoch": 0.32142857142857145,
"grad_norm": 22.000641429064785,
"learning_rate": 9.998789677810226e-06,
"loss": 3.103822708129883,
"step": 153
},
{
"epoch": 0.3235294117647059,
"grad_norm": 11.206024089957094,
"learning_rate": 9.998505789215469e-06,
"loss": 2.633566379547119,
"step": 154
},
{
"epoch": 0.32563025210084034,
"grad_norm": 17.0566593574694,
"learning_rate": 9.998192023862448e-06,
"loss": 2.937821388244629,
"step": 155
},
{
"epoch": 0.3277310924369748,
"grad_norm": 10.638495096316019,
"learning_rate": 9.997848383626583e-06,
"loss": 3.0057592391967773,
"step": 156
},
{
"epoch": 0.32983193277310924,
"grad_norm": 13.891998906384215,
"learning_rate": 9.997474870561858e-06,
"loss": 3.4198083877563477,
"step": 157
},
{
"epoch": 0.3319327731092437,
"grad_norm": 7.77313705300237,
"learning_rate": 9.997071486900813e-06,
"loss": 2.748509407043457,
"step": 158
},
{
"epoch": 0.33403361344537813,
"grad_norm": 11.432910137348301,
"learning_rate": 9.996638235054527e-06,
"loss": 3.3422679901123047,
"step": 159
},
{
"epoch": 0.33613445378151263,
"grad_norm": 9.407520098068266,
"learning_rate": 9.996175117612608e-06,
"loss": 3.2214763164520264,
"step": 160
},
{
"epoch": 0.3382352941176471,
"grad_norm": 9.207535688673886,
"learning_rate": 9.99568213734317e-06,
"loss": 2.5538628101348877,
"step": 161
},
{
"epoch": 0.3403361344537815,
"grad_norm": 10.755277234400435,
"learning_rate": 9.995159297192824e-06,
"loss": 2.781787872314453,
"step": 162
},
{
"epoch": 0.34243697478991597,
"grad_norm": 13.097444619561006,
"learning_rate": 9.99460660028666e-06,
"loss": 3.3784282207489014,
"step": 163
},
{
"epoch": 0.3445378151260504,
"grad_norm": 9.022751200279867,
"learning_rate": 9.994024049928222e-06,
"loss": 3.2824249267578125,
"step": 164
},
{
"epoch": 0.34663865546218486,
"grad_norm": 7.521996605994801,
"learning_rate": 9.993411649599494e-06,
"loss": 2.589594841003418,
"step": 165
},
{
"epoch": 0.3487394957983193,
"grad_norm": 19.97411284039417,
"learning_rate": 9.992769402960878e-06,
"loss": 3.7193164825439453,
"step": 166
},
{
"epoch": 0.35084033613445376,
"grad_norm": 8.085906363590569,
"learning_rate": 9.99209731385117e-06,
"loss": 2.823063611984253,
"step": 167
},
{
"epoch": 0.35294117647058826,
"grad_norm": 11.903719879092119,
"learning_rate": 9.99139538628754e-06,
"loss": 3.1389951705932617,
"step": 168
},
{
"epoch": 0.3550420168067227,
"grad_norm": 21.923086009804106,
"learning_rate": 9.990663624465504e-06,
"loss": 2.9536495208740234,
"step": 169
},
{
"epoch": 0.35714285714285715,
"grad_norm": 13.50586631618126,
"learning_rate": 9.989902032758904e-06,
"loss": 2.6355466842651367,
"step": 170
},
{
"epoch": 0.3592436974789916,
"grad_norm": 5.578596081707914,
"learning_rate": 9.989110615719882e-06,
"loss": 1.1800763607025146,
"step": 171
},
{
"epoch": 0.36134453781512604,
"grad_norm": 16.042050675579503,
"learning_rate": 9.988289378078842e-06,
"loss": 2.679232358932495,
"step": 172
},
{
"epoch": 0.3634453781512605,
"grad_norm": 13.382798743317503,
"learning_rate": 9.987438324744437e-06,
"loss": 2.3583908081054688,
"step": 173
},
{
"epoch": 0.36554621848739494,
"grad_norm": 9.108315025108485,
"learning_rate": 9.986557460803527e-06,
"loss": 2.748077392578125,
"step": 174
},
{
"epoch": 0.36764705882352944,
"grad_norm": 9.67015040715346,
"learning_rate": 9.985646791521165e-06,
"loss": 3.2660067081451416,
"step": 175
},
{
"epoch": 0.3697478991596639,
"grad_norm": 11.678263700428246,
"learning_rate": 9.984706322340539e-06,
"loss": 2.9270148277282715,
"step": 176
},
{
"epoch": 0.37184873949579833,
"grad_norm": 9.845183071879623,
"learning_rate": 9.983736058882965e-06,
"loss": 2.455327033996582,
"step": 177
},
{
"epoch": 0.3739495798319328,
"grad_norm": 9.630887189931224,
"learning_rate": 9.982736006947842e-06,
"loss": 3.171403169631958,
"step": 178
},
{
"epoch": 0.3760504201680672,
"grad_norm": 7.217019473795253,
"learning_rate": 9.98170617251262e-06,
"loss": 2.6023473739624023,
"step": 179
},
{
"epoch": 0.37815126050420167,
"grad_norm": 7.438957257707156,
"learning_rate": 9.98064656173276e-06,
"loss": 2.7492432594299316,
"step": 180
},
{
"epoch": 0.3802521008403361,
"grad_norm": 12.314225953456766,
"learning_rate": 9.979557180941702e-06,
"loss": 3.520758628845215,
"step": 181
},
{
"epoch": 0.38235294117647056,
"grad_norm": 7.754983445761027,
"learning_rate": 9.978438036650822e-06,
"loss": 2.7245442867279053,
"step": 182
},
{
"epoch": 0.38445378151260506,
"grad_norm": 15.124443991385633,
"learning_rate": 9.977289135549404e-06,
"loss": 2.790768623352051,
"step": 183
},
{
"epoch": 0.3865546218487395,
"grad_norm": 15.169071975047261,
"learning_rate": 9.976110484504587e-06,
"loss": 2.5588126182556152,
"step": 184
},
{
"epoch": 0.38865546218487396,
"grad_norm": 18.50299410182784,
"learning_rate": 9.974902090561331e-06,
"loss": 3.0367865562438965,
"step": 185
},
{
"epoch": 0.3907563025210084,
"grad_norm": 12.853534690634186,
"learning_rate": 9.973663960942373e-06,
"loss": 3.1013669967651367,
"step": 186
},
{
"epoch": 0.39285714285714285,
"grad_norm": 11.962180171730763,
"learning_rate": 9.972396103048184e-06,
"loss": 2.678436279296875,
"step": 187
},
{
"epoch": 0.3949579831932773,
"grad_norm": 14.345031935763927,
"learning_rate": 9.971098524456925e-06,
"loss": 2.866910696029663,
"step": 188
},
{
"epoch": 0.39705882352941174,
"grad_norm": 24.927874908872194,
"learning_rate": 9.969771232924404e-06,
"loss": 2.6690807342529297,
"step": 189
},
{
"epoch": 0.39915966386554624,
"grad_norm": 13.232716463146705,
"learning_rate": 9.968414236384022e-06,
"loss": 2.615846633911133,
"step": 190
},
{
"epoch": 0.4012605042016807,
"grad_norm": 19.24597028348177,
"learning_rate": 9.967027542946739e-06,
"loss": 3.197604179382324,
"step": 191
},
{
"epoch": 0.40336134453781514,
"grad_norm": 19.57923793430777,
"learning_rate": 9.965611160901008e-06,
"loss": 1.584808349609375,
"step": 192
},
{
"epoch": 0.4054621848739496,
"grad_norm": 9.313854254132917,
"learning_rate": 9.964165098712745e-06,
"loss": 2.7913365364074707,
"step": 193
},
{
"epoch": 0.40756302521008403,
"grad_norm": 15.764914604292455,
"learning_rate": 9.962689365025259e-06,
"loss": 3.42575740814209,
"step": 194
},
{
"epoch": 0.4096638655462185,
"grad_norm": 9.662424511151881,
"learning_rate": 9.961183968659217e-06,
"loss": 2.6931188106536865,
"step": 195
},
{
"epoch": 0.4117647058823529,
"grad_norm": 13.117904635638109,
"learning_rate": 9.959648918612576e-06,
"loss": 2.4463605880737305,
"step": 196
},
{
"epoch": 0.41386554621848737,
"grad_norm": 8.434614198562612,
"learning_rate": 9.958084224060547e-06,
"loss": 2.647773265838623,
"step": 197
},
{
"epoch": 0.41596638655462187,
"grad_norm": 26.520590112059157,
"learning_rate": 9.956489894355521e-06,
"loss": 2.660770893096924,
"step": 198
},
{
"epoch": 0.4180672268907563,
"grad_norm": 28.510323184410662,
"learning_rate": 9.954865939027028e-06,
"loss": 3.627254009246826,
"step": 199
},
{
"epoch": 0.42016806722689076,
"grad_norm": 7.679364921262506,
"learning_rate": 9.953212367781675e-06,
"loss": 2.683685779571533,
"step": 200
},
{
"epoch": 0.4222689075630252,
"grad_norm": 13.123862369544378,
"learning_rate": 9.95152919050308e-06,
"loss": 2.7249388694763184,
"step": 201
},
{
"epoch": 0.42436974789915966,
"grad_norm": 6.985824973864478,
"learning_rate": 9.949816417251831e-06,
"loss": 2.933401107788086,
"step": 202
},
{
"epoch": 0.4264705882352941,
"grad_norm": 13.569070375050062,
"learning_rate": 9.948074058265409e-06,
"loss": 3.5457630157470703,
"step": 203
},
{
"epoch": 0.42857142857142855,
"grad_norm": 7.335673995298351,
"learning_rate": 9.94630212395813e-06,
"loss": 2.483736038208008,
"step": 204
},
{
"epoch": 0.43067226890756305,
"grad_norm": 73.9554577496319,
"learning_rate": 9.944500624921094e-06,
"loss": 2.470374584197998,
"step": 205
},
{
"epoch": 0.4327731092436975,
"grad_norm": 11.27254717083641,
"learning_rate": 9.942669571922108e-06,
"loss": 3.2255494594573975,
"step": 206
},
{
"epoch": 0.43487394957983194,
"grad_norm": 11.257221010364708,
"learning_rate": 9.940808975905627e-06,
"loss": 3.4820542335510254,
"step": 207
},
{
"epoch": 0.4369747899159664,
"grad_norm": 16.32933603207297,
"learning_rate": 9.93891884799269e-06,
"loss": 3.218539237976074,
"step": 208
},
{
"epoch": 0.43907563025210083,
"grad_norm": 27.30232213883322,
"learning_rate": 9.936999199480854e-06,
"loss": 2.8428990840911865,
"step": 209
},
{
"epoch": 0.4411764705882353,
"grad_norm": 8.340720464987514,
"learning_rate": 9.935050041844121e-06,
"loss": 3.661019802093506,
"step": 210
},
{
"epoch": 0.4432773109243697,
"grad_norm": 18.410105558121085,
"learning_rate": 9.933071386732874e-06,
"loss": 3.330902338027954,
"step": 211
},
{
"epoch": 0.44537815126050423,
"grad_norm": 10.649860943280096,
"learning_rate": 9.931063245973812e-06,
"loss": 2.7754883766174316,
"step": 212
},
{
"epoch": 0.4474789915966387,
"grad_norm": 13.898816541841864,
"learning_rate": 9.929025631569864e-06,
"loss": 2.3284661769866943,
"step": 213
},
{
"epoch": 0.4495798319327731,
"grad_norm": 11.170546252681195,
"learning_rate": 9.926958555700134e-06,
"loss": 2.599228858947754,
"step": 214
},
{
"epoch": 0.45168067226890757,
"grad_norm": 11.76779841923458,
"learning_rate": 9.924862030719821e-06,
"loss": 3.174004077911377,
"step": 215
},
{
"epoch": 0.453781512605042,
"grad_norm": 12.943887425672324,
"learning_rate": 9.922736069160141e-06,
"loss": 2.7390694618225098,
"step": 216
},
{
"epoch": 0.45588235294117646,
"grad_norm": 11.55413590289726,
"learning_rate": 9.920580683728263e-06,
"loss": 2.7388081550598145,
"step": 217
},
{
"epoch": 0.4579831932773109,
"grad_norm": 22.67934856569803,
"learning_rate": 9.918395887307219e-06,
"loss": 2.4359140396118164,
"step": 218
},
{
"epoch": 0.46008403361344535,
"grad_norm": 16.89880489289811,
"learning_rate": 9.916181692955841e-06,
"loss": 2.9688220024108887,
"step": 219
},
{
"epoch": 0.46218487394957986,
"grad_norm": 19.071787734842648,
"learning_rate": 9.913938113908675e-06,
"loss": 3.1534006595611572,
"step": 220
},
{
"epoch": 0.4642857142857143,
"grad_norm": 15.85242809267351,
"learning_rate": 9.9116651635759e-06,
"loss": 2.618938684463501,
"step": 221
},
{
"epoch": 0.46638655462184875,
"grad_norm": 16.618677645763935,
"learning_rate": 9.909362855543253e-06,
"loss": 2.844968318939209,
"step": 222
},
{
"epoch": 0.4684873949579832,
"grad_norm": 10.671971882677827,
"learning_rate": 9.907031203571948e-06,
"loss": 2.4792628288269043,
"step": 223
},
{
"epoch": 0.47058823529411764,
"grad_norm": 19.241816484552377,
"learning_rate": 9.90467022159859e-06,
"loss": 2.894502639770508,
"step": 224
},
{
"epoch": 0.4726890756302521,
"grad_norm": 23.569212607106817,
"learning_rate": 9.902279923735093e-06,
"loss": 2.792015552520752,
"step": 225
},
{
"epoch": 0.47478991596638653,
"grad_norm": 9.680153288005078,
"learning_rate": 9.899860324268599e-06,
"loss": 2.9171247482299805,
"step": 226
},
{
"epoch": 0.47689075630252103,
"grad_norm": 12.955899131578942,
"learning_rate": 9.897411437661386e-06,
"loss": 2.560214042663574,
"step": 227
},
{
"epoch": 0.4789915966386555,
"grad_norm": 8.404115741492017,
"learning_rate": 9.894933278550785e-06,
"loss": 3.2796883583068848,
"step": 228
},
{
"epoch": 0.4810924369747899,
"grad_norm": 8.318847487560761,
"learning_rate": 9.8924258617491e-06,
"loss": 3.0324971675872803,
"step": 229
},
{
"epoch": 0.4831932773109244,
"grad_norm": 18.51611171900766,
"learning_rate": 9.8898892022435e-06,
"loss": 3.3899683952331543,
"step": 230
},
{
"epoch": 0.4852941176470588,
"grad_norm": 11.1091069250737,
"learning_rate": 9.887323315195956e-06,
"loss": 2.742903709411621,
"step": 231
},
{
"epoch": 0.48739495798319327,
"grad_norm": 7.80795476246885,
"learning_rate": 9.884728215943122e-06,
"loss": 3.230966806411743,
"step": 232
},
{
"epoch": 0.4894957983193277,
"grad_norm": 13.293388527053166,
"learning_rate": 9.882103919996268e-06,
"loss": 2.8818302154541016,
"step": 233
},
{
"epoch": 0.49159663865546216,
"grad_norm": 6.043647907341577,
"learning_rate": 9.879450443041172e-06,
"loss": 2.358765125274658,
"step": 234
},
{
"epoch": 0.49369747899159666,
"grad_norm": 8.169920329828493,
"learning_rate": 9.876767800938032e-06,
"loss": 3.0420098304748535,
"step": 235
},
{
"epoch": 0.4957983193277311,
"grad_norm": 18.681067639331463,
"learning_rate": 9.874056009721367e-06,
"loss": 2.9595160484313965,
"step": 236
},
{
"epoch": 0.49789915966386555,
"grad_norm": 7.792831708992119,
"learning_rate": 9.87131508559993e-06,
"loss": 2.9571242332458496,
"step": 237
},
{
"epoch": 0.5,
"grad_norm": 13.533405695746444,
"learning_rate": 9.868545044956603e-06,
"loss": 2.798694610595703,
"step": 238
},
{
"epoch": 0.5021008403361344,
"grad_norm": 7.855798585235136,
"learning_rate": 9.865745904348296e-06,
"loss": 2.9430432319641113,
"step": 239
},
{
"epoch": 0.5042016806722689,
"grad_norm": 12.596029584158895,
"learning_rate": 9.862917680505863e-06,
"loss": 3.065462112426758,
"step": 240
},
{
"epoch": 0.5063025210084033,
"grad_norm": 11.793626015707394,
"learning_rate": 9.860060390333988e-06,
"loss": 3.8562116622924805,
"step": 241
},
{
"epoch": 0.5084033613445378,
"grad_norm": 8.660405200484282,
"learning_rate": 9.857174050911085e-06,
"loss": 2.645123243331909,
"step": 242
},
{
"epoch": 0.5105042016806722,
"grad_norm": 11.950071539791612,
"learning_rate": 9.854258679489203e-06,
"loss": 2.500267744064331,
"step": 243
},
{
"epoch": 0.5126050420168067,
"grad_norm": 14.029861713702717,
"learning_rate": 9.851314293493923e-06,
"loss": 2.553537368774414,
"step": 244
},
{
"epoch": 0.5147058823529411,
"grad_norm": 21.40352382596275,
"learning_rate": 9.848340910524243e-06,
"loss": 2.694528102874756,
"step": 245
},
{
"epoch": 0.5168067226890757,
"grad_norm": 11.756867034830558,
"learning_rate": 9.845338548352482e-06,
"loss": 3.2089271545410156,
"step": 246
},
{
"epoch": 0.5189075630252101,
"grad_norm": 13.513723971793041,
"learning_rate": 9.842307224924174e-06,
"loss": 2.443826198577881,
"step": 247
},
{
"epoch": 0.5210084033613446,
"grad_norm": 8.839705225157738,
"learning_rate": 9.839246958357957e-06,
"loss": 2.9329233169555664,
"step": 248
},
{
"epoch": 0.523109243697479,
"grad_norm": 14.107087922274081,
"learning_rate": 9.836157766945467e-06,
"loss": 2.5171399116516113,
"step": 249
},
{
"epoch": 0.5252100840336135,
"grad_norm": 8.285926532283062,
"learning_rate": 9.833039669151225e-06,
"loss": 3.0069408416748047,
"step": 250
},
{
"epoch": 0.5273109243697479,
"grad_norm": 9.58371718621674,
"learning_rate": 9.829892683612535e-06,
"loss": 2.5816359519958496,
"step": 251
},
{
"epoch": 0.5294117647058824,
"grad_norm": 20.00310864922347,
"learning_rate": 9.826716829139358e-06,
"loss": 2.3982670307159424,
"step": 252
},
{
"epoch": 0.5315126050420168,
"grad_norm": 10.648220658525108,
"learning_rate": 9.82351212471422e-06,
"loss": 2.975574016571045,
"step": 253
},
{
"epoch": 0.5336134453781513,
"grad_norm": 17.551242772865887,
"learning_rate": 9.820278589492076e-06,
"loss": 2.4827775955200195,
"step": 254
},
{
"epoch": 0.5357142857142857,
"grad_norm": 17.09184171751482,
"learning_rate": 9.817016242800215e-06,
"loss": 2.690033197402954,
"step": 255
},
{
"epoch": 0.5378151260504201,
"grad_norm": 14.722560106056354,
"learning_rate": 9.813725104138133e-06,
"loss": 3.346949338912964,
"step": 256
},
{
"epoch": 0.5399159663865546,
"grad_norm": 17.505076110573757,
"learning_rate": 9.810405193177418e-06,
"loss": 2.6791281700134277,
"step": 257
},
{
"epoch": 0.542016806722689,
"grad_norm": 17.808558357662132,
"learning_rate": 9.807056529761637e-06,
"loss": 2.853158950805664,
"step": 258
},
{
"epoch": 0.5441176470588235,
"grad_norm": 20.31515982195739,
"learning_rate": 9.80367913390621e-06,
"loss": 3.1636295318603516,
"step": 259
},
{
"epoch": 0.5462184873949579,
"grad_norm": 12.64467693447632,
"learning_rate": 9.800273025798302e-06,
"loss": 2.5055313110351562,
"step": 260
},
{
"epoch": 0.5483193277310925,
"grad_norm": 9.570949964132296,
"learning_rate": 9.796838225796688e-06,
"loss": 2.9986414909362793,
"step": 261
},
{
"epoch": 0.5504201680672269,
"grad_norm": 14.832124263006255,
"learning_rate": 9.793374754431642e-06,
"loss": 2.419975757598877,
"step": 262
},
{
"epoch": 0.5525210084033614,
"grad_norm": 22.395098332172758,
"learning_rate": 9.789882632404809e-06,
"loss": 3.0301923751831055,
"step": 263
},
{
"epoch": 0.5546218487394958,
"grad_norm": 8.680917615796206,
"learning_rate": 9.786361880589084e-06,
"loss": 2.846034526824951,
"step": 264
},
{
"epoch": 0.5567226890756303,
"grad_norm": 16.17159732015871,
"learning_rate": 9.782812520028487e-06,
"loss": 3.250943183898926,
"step": 265
},
{
"epoch": 0.5588235294117647,
"grad_norm": 12.800181347711561,
"learning_rate": 9.779234571938034e-06,
"loss": 2.5069515705108643,
"step": 266
},
{
"epoch": 0.5609243697478992,
"grad_norm": 20.58760178113823,
"learning_rate": 9.775628057703616e-06,
"loss": 2.2883377075195312,
"step": 267
},
{
"epoch": 0.5630252100840336,
"grad_norm": 19.3122933281468,
"learning_rate": 9.771992998881865e-06,
"loss": 1.8844149112701416,
"step": 268
},
{
"epoch": 0.5651260504201681,
"grad_norm": 11.766785955468544,
"learning_rate": 9.768329417200029e-06,
"loss": 2.608553409576416,
"step": 269
},
{
"epoch": 0.5672268907563025,
"grad_norm": 9.015634942296078,
"learning_rate": 9.76463733455584e-06,
"loss": 2.8849685192108154,
"step": 270
},
{
"epoch": 0.569327731092437,
"grad_norm": 10.71605416834433,
"learning_rate": 9.760916773017386e-06,
"loss": 2.83829402923584,
"step": 271
},
{
"epoch": 0.5714285714285714,
"grad_norm": 8.102503833940233,
"learning_rate": 9.757167754822974e-06,
"loss": 2.6053004264831543,
"step": 272
},
{
"epoch": 0.5735294117647058,
"grad_norm": 18.62135736056985,
"learning_rate": 9.753390302381006e-06,
"loss": 2.8338804244995117,
"step": 273
},
{
"epoch": 0.5756302521008403,
"grad_norm": 34.82348840659483,
"learning_rate": 9.749584438269833e-06,
"loss": 2.979978084564209,
"step": 274
},
{
"epoch": 0.5777310924369747,
"grad_norm": 6.726547081859168,
"learning_rate": 9.74575018523763e-06,
"loss": 1.8241777420043945,
"step": 275
},
{
"epoch": 0.5798319327731093,
"grad_norm": 12.206262847267514,
"learning_rate": 9.741887566202259e-06,
"loss": 3.2140274047851562,
"step": 276
},
{
"epoch": 0.5819327731092437,
"grad_norm": 43.432328207654045,
"learning_rate": 9.737996604251124e-06,
"loss": 3.074397325515747,
"step": 277
},
{
"epoch": 0.5840336134453782,
"grad_norm": 19.844157721727896,
"learning_rate": 9.73407732264104e-06,
"loss": 2.527010679244995,
"step": 278
},
{
"epoch": 0.5861344537815126,
"grad_norm": 10.112570131000647,
"learning_rate": 9.730129744798096e-06,
"loss": 2.6019768714904785,
"step": 279
},
{
"epoch": 0.5882352941176471,
"grad_norm": 216.6539557731807,
"learning_rate": 9.726153894317508e-06,
"loss": 2.848952293395996,
"step": 280
},
{
"epoch": 0.5903361344537815,
"grad_norm": 18.730728554973695,
"learning_rate": 9.722149794963483e-06,
"loss": 3.120556354522705,
"step": 281
},
{
"epoch": 0.592436974789916,
"grad_norm": 7.236837867364418,
"learning_rate": 9.718117470669072e-06,
"loss": 2.8926405906677246,
"step": 282
},
{
"epoch": 0.5945378151260504,
"grad_norm": 8.247663007399707,
"learning_rate": 9.714056945536039e-06,
"loss": 3.2854347229003906,
"step": 283
},
{
"epoch": 0.5966386554621849,
"grad_norm": 7.729125572796969,
"learning_rate": 9.709968243834698e-06,
"loss": 2.856870651245117,
"step": 284
},
{
"epoch": 0.5987394957983193,
"grad_norm": 20.951434970442865,
"learning_rate": 9.705851390003783e-06,
"loss": 3.3881802558898926,
"step": 285
},
{
"epoch": 0.6008403361344538,
"grad_norm": 8.671814837426174,
"learning_rate": 9.7017064086503e-06,
"loss": 2.6102542877197266,
"step": 286
},
{
"epoch": 0.6029411764705882,
"grad_norm": 8.644019718162792,
"learning_rate": 9.697533324549371e-06,
"loss": 2.7697243690490723,
"step": 287
},
{
"epoch": 0.6050420168067226,
"grad_norm": 12.279613000984195,
"learning_rate": 9.693332162644095e-06,
"loss": 2.568695545196533,
"step": 288
},
{
"epoch": 0.6071428571428571,
"grad_norm": 13.384358670021655,
"learning_rate": 9.689102948045398e-06,
"loss": 2.922543525695801,
"step": 289
},
{
"epoch": 0.6092436974789915,
"grad_norm": 15.250277694133263,
"learning_rate": 9.684845706031878e-06,
"loss": 3.1011314392089844,
"step": 290
},
{
"epoch": 0.6113445378151261,
"grad_norm": 9.840291260984259,
"learning_rate": 9.680560462049657e-06,
"loss": 2.627528429031372,
"step": 291
},
{
"epoch": 0.6134453781512605,
"grad_norm": 13.648735567431437,
"learning_rate": 9.676247241712228e-06,
"loss": 2.8417811393737793,
"step": 292
},
{
"epoch": 0.615546218487395,
"grad_norm": 8.931356705581003,
"learning_rate": 9.671906070800307e-06,
"loss": 2.3787314891815186,
"step": 293
},
{
"epoch": 0.6176470588235294,
"grad_norm": 7.6270227976464895,
"learning_rate": 9.667536975261667e-06,
"loss": 2.751317024230957,
"step": 294
},
{
"epoch": 0.6197478991596639,
"grad_norm": 7.016417787785432,
"learning_rate": 9.663139981210998e-06,
"loss": 2.6910929679870605,
"step": 295
},
{
"epoch": 0.6218487394957983,
"grad_norm": 11.206285204533946,
"learning_rate": 9.658715114929737e-06,
"loss": 2.801499366760254,
"step": 296
},
{
"epoch": 0.6239495798319328,
"grad_norm": 18.427453742915965,
"learning_rate": 9.654262402865922e-06,
"loss": 2.885946273803711,
"step": 297
},
{
"epoch": 0.6260504201680672,
"grad_norm": 13.114557836832477,
"learning_rate": 9.649781871634025e-06,
"loss": 3.1485133171081543,
"step": 298
},
{
"epoch": 0.6281512605042017,
"grad_norm": 8.349893932720915,
"learning_rate": 9.6452735480148e-06,
"loss": 3.174015998840332,
"step": 299
},
{
"epoch": 0.6302521008403361,
"grad_norm": 11.762326882141835,
"learning_rate": 9.64073745895512e-06,
"loss": 3.339445114135742,
"step": 300
},
{
"epoch": 0.6323529411764706,
"grad_norm": 7.555546243601591,
"learning_rate": 9.636173631567812e-06,
"loss": 2.9448843002319336,
"step": 301
},
{
"epoch": 0.634453781512605,
"grad_norm": 5.95180683932207,
"learning_rate": 9.631582093131501e-06,
"loss": 2.6363561153411865,
"step": 302
},
{
"epoch": 0.6365546218487395,
"grad_norm": 11.371944122058592,
"learning_rate": 9.62696287109045e-06,
"loss": 2.4621901512145996,
"step": 303
},
{
"epoch": 0.6386554621848739,
"grad_norm": 11.742524524874973,
"learning_rate": 9.622315993054384e-06,
"loss": 2.8623251914978027,
"step": 304
},
{
"epoch": 0.6407563025210085,
"grad_norm": 12.39315277601619,
"learning_rate": 9.61764148679833e-06,
"loss": 2.191575765609741,
"step": 305
},
{
"epoch": 0.6428571428571429,
"grad_norm": 6.720496031064891,
"learning_rate": 9.61293938026246e-06,
"loss": 2.018388271331787,
"step": 306
},
{
"epoch": 0.6449579831932774,
"grad_norm": 9.624398589362118,
"learning_rate": 9.608209701551913e-06,
"loss": 2.756854772567749,
"step": 307
},
{
"epoch": 0.6470588235294118,
"grad_norm": 21.622075822614562,
"learning_rate": 9.60345247893663e-06,
"loss": 2.6668529510498047,
"step": 308
},
{
"epoch": 0.6491596638655462,
"grad_norm": 14.926878160653533,
"learning_rate": 9.598667740851187e-06,
"loss": 2.6617343425750732,
"step": 309
},
{
"epoch": 0.6512605042016807,
"grad_norm": 9.499741494871419,
"learning_rate": 9.59385551589462e-06,
"loss": 3.1460976600646973,
"step": 310
},
{
"epoch": 0.6533613445378151,
"grad_norm": 17.910724072364676,
"learning_rate": 9.589015832830267e-06,
"loss": 2.7566354274749756,
"step": 311
},
{
"epoch": 0.6554621848739496,
"grad_norm": 15.059990921253526,
"learning_rate": 9.584148720585575e-06,
"loss": 3.3112881183624268,
"step": 312
},
{
"epoch": 0.657563025210084,
"grad_norm": 12.27519040871759,
"learning_rate": 9.57925420825195e-06,
"loss": 2.8563618659973145,
"step": 313
},
{
"epoch": 0.6596638655462185,
"grad_norm": 14.53433732237354,
"learning_rate": 9.574332325084564e-06,
"loss": 3.5544567108154297,
"step": 314
},
{
"epoch": 0.6617647058823529,
"grad_norm": 9.662661721128384,
"learning_rate": 9.569383100502193e-06,
"loss": 2.924015998840332,
"step": 315
},
{
"epoch": 0.6638655462184874,
"grad_norm": 11.360356839234715,
"learning_rate": 9.564406564087032e-06,
"loss": 2.7250008583068848,
"step": 316
},
{
"epoch": 0.6659663865546218,
"grad_norm": 6.265433825569306,
"learning_rate": 9.559402745584527e-06,
"loss": 2.9229238033294678,
"step": 317
},
{
"epoch": 0.6680672268907563,
"grad_norm": 10.23047238705242,
"learning_rate": 9.554371674903191e-06,
"loss": 3.4867515563964844,
"step": 318
},
{
"epoch": 0.6701680672268907,
"grad_norm": 13.446172115002414,
"learning_rate": 9.549313382114427e-06,
"loss": 2.4049417972564697,
"step": 319
},
{
"epoch": 0.6722689075630253,
"grad_norm": 14.135159230227343,
"learning_rate": 9.54422789745235e-06,
"loss": 3.1008338928222656,
"step": 320
},
{
"epoch": 0.6743697478991597,
"grad_norm": 9.140604791680513,
"learning_rate": 9.5391152513136e-06,
"loss": 2.6114342212677,
"step": 321
},
{
"epoch": 0.6764705882352942,
"grad_norm": 9.779919855511938,
"learning_rate": 9.533975474257171e-06,
"loss": 2.7165164947509766,
"step": 322
},
{
"epoch": 0.6785714285714286,
"grad_norm": 9.275622947642706,
"learning_rate": 9.528808597004216e-06,
"loss": 2.8122520446777344,
"step": 323
},
{
"epoch": 0.680672268907563,
"grad_norm": 14.779154717919877,
"learning_rate": 9.523614650437876e-06,
"loss": 2.862661838531494,
"step": 324
},
{
"epoch": 0.6827731092436975,
"grad_norm": 12.377273208842894,
"learning_rate": 9.518393665603084e-06,
"loss": 2.9812843799591064,
"step": 325
},
{
"epoch": 0.6848739495798319,
"grad_norm": 11.010658732376989,
"learning_rate": 9.513145673706383e-06,
"loss": 2.9455337524414062,
"step": 326
},
{
"epoch": 0.6869747899159664,
"grad_norm": 14.806340169845868,
"learning_rate": 9.507870706115749e-06,
"loss": 3.1577422618865967,
"step": 327
},
{
"epoch": 0.6890756302521008,
"grad_norm": 13.616368197529665,
"learning_rate": 9.50256879436039e-06,
"loss": 2.545835018157959,
"step": 328
},
{
"epoch": 0.6911764705882353,
"grad_norm": 8.987871015734585,
"learning_rate": 9.497239970130561e-06,
"loss": 2.559062957763672,
"step": 329
},
{
"epoch": 0.6932773109243697,
"grad_norm": 8.587992072590101,
"learning_rate": 9.491884265277383e-06,
"loss": 2.932499647140503,
"step": 330
},
{
"epoch": 0.6953781512605042,
"grad_norm": 7.463276523398998,
"learning_rate": 9.486501711812637e-06,
"loss": 2.967616558074951,
"step": 331
},
{
"epoch": 0.6974789915966386,
"grad_norm": 14.475511521289118,
"learning_rate": 9.481092341908591e-06,
"loss": 2.4604697227478027,
"step": 332
},
{
"epoch": 0.6995798319327731,
"grad_norm": 9.02600045542574,
"learning_rate": 9.475656187897794e-06,
"loss": 3.146969795227051,
"step": 333
},
{
"epoch": 0.7016806722689075,
"grad_norm": 7.639638057540197,
"learning_rate": 9.470193282272886e-06,
"loss": 3.337083339691162,
"step": 334
},
{
"epoch": 0.7037815126050421,
"grad_norm": 16.397307515268395,
"learning_rate": 9.464703657686412e-06,
"loss": 2.7829766273498535,
"step": 335
},
{
"epoch": 0.7058823529411765,
"grad_norm": 11.048022152868258,
"learning_rate": 9.45918734695061e-06,
"loss": 3.095449447631836,
"step": 336
},
{
"epoch": 0.707983193277311,
"grad_norm": 10.20138527201031,
"learning_rate": 9.453644383037232e-06,
"loss": 2.6790573596954346,
"step": 337
},
{
"epoch": 0.7100840336134454,
"grad_norm": 9.93895349514755,
"learning_rate": 9.448074799077337e-06,
"loss": 2.9844274520874023,
"step": 338
},
{
"epoch": 0.7121848739495799,
"grad_norm": 171.53953866617377,
"learning_rate": 9.442478628361098e-06,
"loss": 2.256910562515259,
"step": 339
},
{
"epoch": 0.7142857142857143,
"grad_norm": 12.351676724137773,
"learning_rate": 9.436855904337596e-06,
"loss": 2.9464545249938965,
"step": 340
},
{
"epoch": 0.7163865546218487,
"grad_norm": 19.404123629754835,
"learning_rate": 9.43120666061463e-06,
"loss": 2.23644757270813,
"step": 341
},
{
"epoch": 0.7184873949579832,
"grad_norm": 11.246236929808724,
"learning_rate": 9.425530930958507e-06,
"loss": 2.85072660446167,
"step": 342
},
{
"epoch": 0.7205882352941176,
"grad_norm": 17.642986778414265,
"learning_rate": 9.419828749293845e-06,
"loss": 3.09238862991333,
"step": 343
},
{
"epoch": 0.7226890756302521,
"grad_norm": 8.1418179714146,
"learning_rate": 9.414100149703373e-06,
"loss": 2.7548587322235107,
"step": 344
},
{
"epoch": 0.7247899159663865,
"grad_norm": 11.258932741699391,
"learning_rate": 9.40834516642772e-06,
"loss": 2.487452507019043,
"step": 345
},
{
"epoch": 0.726890756302521,
"grad_norm": 10.082639156310133,
"learning_rate": 9.402563833865213e-06,
"loss": 3.077296257019043,
"step": 346
},
{
"epoch": 0.7289915966386554,
"grad_norm": 11.506257708160296,
"learning_rate": 9.396756186571672e-06,
"loss": 2.6188814640045166,
"step": 347
},
{
"epoch": 0.7310924369747899,
"grad_norm": 11.743812268831451,
"learning_rate": 9.39092225926021e-06,
"loss": 3.150355815887451,
"step": 348
},
{
"epoch": 0.7331932773109243,
"grad_norm": 10.613109994526992,
"learning_rate": 9.385062086801013e-06,
"loss": 2.6666879653930664,
"step": 349
},
{
"epoch": 0.7352941176470589,
"grad_norm": 11.1137083326389,
"learning_rate": 9.379175704221139e-06,
"loss": 2.885680675506592,
"step": 350
},
{
"epoch": 0.7373949579831933,
"grad_norm": 10.845634322034954,
"learning_rate": 9.37326314670431e-06,
"loss": 2.948115110397339,
"step": 351
},
{
"epoch": 0.7394957983193278,
"grad_norm": 20.12834911912162,
"learning_rate": 9.367324449590694e-06,
"loss": 2.743468761444092,
"step": 352
},
{
"epoch": 0.7415966386554622,
"grad_norm": 10.20324180750042,
"learning_rate": 9.361359648376707e-06,
"loss": 3.0895063877105713,
"step": 353
},
{
"epoch": 0.7436974789915967,
"grad_norm": 16.159497011872574,
"learning_rate": 9.355368778714784e-06,
"loss": 2.808818817138672,
"step": 354
},
{
"epoch": 0.7457983193277311,
"grad_norm": 14.111122417184372,
"learning_rate": 9.349351876413181e-06,
"loss": 2.889227867126465,
"step": 355
},
{
"epoch": 0.7478991596638656,
"grad_norm": 10.685634708452614,
"learning_rate": 9.343308977435754e-06,
"loss": 3.021900177001953,
"step": 356
},
{
"epoch": 0.75,
"grad_norm": 29.116347517828004,
"learning_rate": 9.337240117901742e-06,
"loss": 2.4112629890441895,
"step": 357
},
{
"epoch": 0.7521008403361344,
"grad_norm": 8.824096565810732,
"learning_rate": 9.331145334085554e-06,
"loss": 2.898515224456787,
"step": 358
},
{
"epoch": 0.7542016806722689,
"grad_norm": 21.430509149211513,
"learning_rate": 9.325024662416553e-06,
"loss": 2.683413028717041,
"step": 359
},
{
"epoch": 0.7563025210084033,
"grad_norm": 10.321295794427858,
"learning_rate": 9.318878139478842e-06,
"loss": 2.890808582305908,
"step": 360
},
{
"epoch": 0.7584033613445378,
"grad_norm": 20.795050786572304,
"learning_rate": 9.312705802011029e-06,
"loss": 2.9919955730438232,
"step": 361
},
{
"epoch": 0.7605042016806722,
"grad_norm": 9.83932446467153,
"learning_rate": 9.306507686906033e-06,
"loss": 2.7725915908813477,
"step": 362
},
{
"epoch": 0.7626050420168067,
"grad_norm": 9.916160263978837,
"learning_rate": 9.300283831210838e-06,
"loss": 2.9397757053375244,
"step": 363
},
{
"epoch": 0.7647058823529411,
"grad_norm": 9.312450032530169,
"learning_rate": 9.294034272126286e-06,
"loss": 2.770698070526123,
"step": 364
},
{
"epoch": 0.7668067226890757,
"grad_norm": 13.885653883484695,
"learning_rate": 9.28775904700686e-06,
"loss": 2.5156445503234863,
"step": 365
},
{
"epoch": 0.7689075630252101,
"grad_norm": 13.084004538001976,
"learning_rate": 9.281458193360442e-06,
"loss": 2.597851276397705,
"step": 366
},
{
"epoch": 0.7710084033613446,
"grad_norm": 17.679727525867335,
"learning_rate": 9.2751317488481e-06,
"loss": 2.4659290313720703,
"step": 367
},
{
"epoch": 0.773109243697479,
"grad_norm": 9.199345804679885,
"learning_rate": 9.26877975128387e-06,
"loss": 3.0518131256103516,
"step": 368
},
{
"epoch": 0.7752100840336135,
"grad_norm": 15.824344742656248,
"learning_rate": 9.262402238634514e-06,
"loss": 2.0272233486175537,
"step": 369
},
{
"epoch": 0.7773109243697479,
"grad_norm": 9.874132429438818,
"learning_rate": 9.255999249019307e-06,
"loss": 2.282167911529541,
"step": 370
},
{
"epoch": 0.7794117647058824,
"grad_norm": 8.044430179764902,
"learning_rate": 9.2495708207098e-06,
"loss": 2.447831869125366,
"step": 371
},
{
"epoch": 0.7815126050420168,
"grad_norm": 15.289268393319317,
"learning_rate": 9.243116992129593e-06,
"loss": 2.5548458099365234,
"step": 372
},
{
"epoch": 0.7836134453781513,
"grad_norm": 18.576142639391133,
"learning_rate": 9.23663780185411e-06,
"loss": 2.2244365215301514,
"step": 373
},
{
"epoch": 0.7857142857142857,
"grad_norm": 8.55234069521718,
"learning_rate": 9.230133288610366e-06,
"loss": 3.044992208480835,
"step": 374
},
{
"epoch": 0.7878151260504201,
"grad_norm": 11.142079035862414,
"learning_rate": 9.223603491276733e-06,
"loss": 2.545569896697998,
"step": 375
},
{
"epoch": 0.7899159663865546,
"grad_norm": 14.123674718701432,
"learning_rate": 9.217048448882711e-06,
"loss": 3.337583541870117,
"step": 376
},
{
"epoch": 0.792016806722689,
"grad_norm": 16.146080651689587,
"learning_rate": 9.210468200608691e-06,
"loss": 3.1922380924224854,
"step": 377
},
{
"epoch": 0.7941176470588235,
"grad_norm": 12.28600079308305,
"learning_rate": 9.203862785785724e-06,
"loss": 2.5922632217407227,
"step": 378
},
{
"epoch": 0.7962184873949579,
"grad_norm": 18.04398024676097,
"learning_rate": 9.197232243895285e-06,
"loss": 2.876894474029541,
"step": 379
},
{
"epoch": 0.7983193277310925,
"grad_norm": 13.494043036714963,
"learning_rate": 9.190576614569035e-06,
"loss": 2.7677531242370605,
"step": 380
},
{
"epoch": 0.8004201680672269,
"grad_norm": 23.463052019031387,
"learning_rate": 9.183895937588594e-06,
"loss": 1.9870229959487915,
"step": 381
},
{
"epoch": 0.8025210084033614,
"grad_norm": 7.7476580634838665,
"learning_rate": 9.177190252885285e-06,
"loss": 2.784242868423462,
"step": 382
},
{
"epoch": 0.8046218487394958,
"grad_norm": 6.086395137680743,
"learning_rate": 9.17045960053991e-06,
"loss": 2.878697395324707,
"step": 383
},
{
"epoch": 0.8067226890756303,
"grad_norm": 16.59316957110638,
"learning_rate": 9.163704020782507e-06,
"loss": 2.7685139179229736,
"step": 384
},
{
"epoch": 0.8088235294117647,
"grad_norm": 15.470438153645851,
"learning_rate": 9.156923553992107e-06,
"loss": 2.8312299251556396,
"step": 385
},
{
"epoch": 0.8109243697478992,
"grad_norm": 8.00902098985157,
"learning_rate": 9.150118240696497e-06,
"loss": 1.7165706157684326,
"step": 386
},
{
"epoch": 0.8130252100840336,
"grad_norm": 14.0610194690077,
"learning_rate": 9.14328812157197e-06,
"loss": 3.451162815093994,
"step": 387
},
{
"epoch": 0.8151260504201681,
"grad_norm": 10.174053809556211,
"learning_rate": 9.136433237443093e-06,
"loss": 3.455259084701538,
"step": 388
},
{
"epoch": 0.8172268907563025,
"grad_norm": 14.076181600112081,
"learning_rate": 9.129553629282448e-06,
"loss": 3.3125205039978027,
"step": 389
},
{
"epoch": 0.819327731092437,
"grad_norm": 11.760967038966463,
"learning_rate": 9.122649338210407e-06,
"loss": 3.175715923309326,
"step": 390
},
{
"epoch": 0.8214285714285714,
"grad_norm": 12.215337173611072,
"learning_rate": 9.115720405494868e-06,
"loss": 3.426882743835449,
"step": 391
},
{
"epoch": 0.8235294117647058,
"grad_norm": 16.884819154921146,
"learning_rate": 9.108766872551016e-06,
"loss": 2.693225860595703,
"step": 392
},
{
"epoch": 0.8256302521008403,
"grad_norm": 11.991779005638564,
"learning_rate": 9.101788780941076e-06,
"loss": 2.8251726627349854,
"step": 393
},
{
"epoch": 0.8277310924369747,
"grad_norm": 8.901523397999386,
"learning_rate": 9.094786172374066e-06,
"loss": 2.845076560974121,
"step": 394
},
{
"epoch": 0.8298319327731093,
"grad_norm": 15.559813600064993,
"learning_rate": 9.087759088705541e-06,
"loss": 2.9212491512298584,
"step": 395
},
{
"epoch": 0.8319327731092437,
"grad_norm": 12.334218057409931,
"learning_rate": 9.08070757193735e-06,
"loss": 2.752890110015869,
"step": 396
},
{
"epoch": 0.8340336134453782,
"grad_norm": 20.040022595533,
"learning_rate": 9.07363166421738e-06,
"loss": 3.1292171478271484,
"step": 397
},
{
"epoch": 0.8361344537815126,
"grad_norm": 9.339997691276547,
"learning_rate": 9.066531407839307e-06,
"loss": 2.2926840782165527,
"step": 398
},
{
"epoch": 0.8382352941176471,
"grad_norm": 9.210411213235453,
"learning_rate": 9.059406845242343e-06,
"loss": 2.7644119262695312,
"step": 399
},
{
"epoch": 0.8403361344537815,
"grad_norm": 13.484928949211756,
"learning_rate": 9.05225801901098e-06,
"loss": 2.9096150398254395,
"step": 400
},
{
"epoch": 0.842436974789916,
"grad_norm": 21.901892899759964,
"learning_rate": 9.045084971874738e-06,
"loss": 4.536911964416504,
"step": 401
},
{
"epoch": 0.8445378151260504,
"grad_norm": 8.027798710835631,
"learning_rate": 9.03788774670791e-06,
"loss": 3.3775062561035156,
"step": 402
},
{
"epoch": 0.8466386554621849,
"grad_norm": 11.22841391004864,
"learning_rate": 9.030666386529303e-06,
"loss": 2.755703926086426,
"step": 403
},
{
"epoch": 0.8487394957983193,
"grad_norm": 9.698938581529527,
"learning_rate": 9.023420934501981e-06,
"loss": 2.812281608581543,
"step": 404
},
{
"epoch": 0.8508403361344538,
"grad_norm": 9.495702557416454,
"learning_rate": 9.01615143393301e-06,
"loss": 2.9015493392944336,
"step": 405
},
{
"epoch": 0.8529411764705882,
"grad_norm": 8.59480884978166,
"learning_rate": 9.008857928273199e-06,
"loss": 2.8743391036987305,
"step": 406
},
{
"epoch": 0.8550420168067226,
"grad_norm": 14.060855102265236,
"learning_rate": 9.001540461116835e-06,
"loss": 2.7400550842285156,
"step": 407
},
{
"epoch": 0.8571428571428571,
"grad_norm": 9.670354596798553,
"learning_rate": 8.994199076201428e-06,
"loss": 3.788983106613159,
"step": 408
},
{
"epoch": 0.8592436974789915,
"grad_norm": 10.094582977623446,
"learning_rate": 8.98683381740745e-06,
"loss": 2.426604747772217,
"step": 409
},
{
"epoch": 0.8613445378151261,
"grad_norm": 6.42119276092813,
"learning_rate": 8.979444728758067e-06,
"loss": 2.467769145965576,
"step": 410
},
{
"epoch": 0.8634453781512605,
"grad_norm": 101.25120998420752,
"learning_rate": 8.97203185441888e-06,
"loss": 2.878884792327881,
"step": 411
},
{
"epoch": 0.865546218487395,
"grad_norm": 10.063927366400284,
"learning_rate": 8.964595238697659e-06,
"loss": 3.323913812637329,
"step": 412
},
{
"epoch": 0.8676470588235294,
"grad_norm": 8.176196947638319,
"learning_rate": 8.957134926044088e-06,
"loss": 2.2674732208251953,
"step": 413
},
{
"epoch": 0.8697478991596639,
"grad_norm": 7.688045397272728,
"learning_rate": 8.949650961049479e-06,
"loss": 2.6359667778015137,
"step": 414
},
{
"epoch": 0.8718487394957983,
"grad_norm": 12.061723837223782,
"learning_rate": 8.942143388446522e-06,
"loss": 4.3965678215026855,
"step": 415
},
{
"epoch": 0.8739495798319328,
"grad_norm": 13.801014710596668,
"learning_rate": 8.934612253109017e-06,
"loss": 3.584599733352661,
"step": 416
},
{
"epoch": 0.8760504201680672,
"grad_norm": 11.465324791085347,
"learning_rate": 8.927057600051594e-06,
"loss": 2.9781904220581055,
"step": 417
},
{
"epoch": 0.8781512605042017,
"grad_norm": 32.19803137859573,
"learning_rate": 8.919479474429462e-06,
"loss": 3.3312220573425293,
"step": 418
},
{
"epoch": 0.8802521008403361,
"grad_norm": 23.418640662777587,
"learning_rate": 8.911877921538117e-06,
"loss": 3.8054161071777344,
"step": 419
},
{
"epoch": 0.8823529411764706,
"grad_norm": 7.760210305795623,
"learning_rate": 8.904252986813091e-06,
"loss": 2.8041489124298096,
"step": 420
},
{
"epoch": 0.884453781512605,
"grad_norm": 13.790720201964906,
"learning_rate": 8.896604715829671e-06,
"loss": 2.8391265869140625,
"step": 421
},
{
"epoch": 0.8865546218487395,
"grad_norm": 8.558877313925247,
"learning_rate": 8.888933154302626e-06,
"loss": 2.6835553646087646,
"step": 422
},
{
"epoch": 0.8886554621848739,
"grad_norm": 21.689551042379083,
"learning_rate": 8.881238348085936e-06,
"loss": 2.6738481521606445,
"step": 423
},
{
"epoch": 0.8907563025210085,
"grad_norm": 7.686758427886692,
"learning_rate": 8.87352034317252e-06,
"loss": 2.619101047515869,
"step": 424
},
{
"epoch": 0.8928571428571429,
"grad_norm": 20.40695143594997,
"learning_rate": 8.865779185693957e-06,
"loss": 3.3444905281066895,
"step": 425
},
{
"epoch": 0.8949579831932774,
"grad_norm": 27.431337065110313,
"learning_rate": 8.858014921920215e-06,
"loss": 2.1527421474456787,
"step": 426
},
{
"epoch": 0.8970588235294118,
"grad_norm": 20.606507987678672,
"learning_rate": 8.850227598259365e-06,
"loss": 2.6689836978912354,
"step": 427
},
{
"epoch": 0.8991596638655462,
"grad_norm": 8.968995022440353,
"learning_rate": 8.842417261257316e-06,
"loss": 3.0119547843933105,
"step": 428
},
{
"epoch": 0.9012605042016807,
"grad_norm": 28.528232969469133,
"learning_rate": 8.83458395759753e-06,
"loss": 2.482861042022705,
"step": 429
},
{
"epoch": 0.9033613445378151,
"grad_norm": 10.074031458183692,
"learning_rate": 8.826727734100742e-06,
"loss": 2.8982067108154297,
"step": 430
},
{
"epoch": 0.9054621848739496,
"grad_norm": 8.399253353390154,
"learning_rate": 8.818848637724681e-06,
"loss": 2.5004382133483887,
"step": 431
},
{
"epoch": 0.907563025210084,
"grad_norm": 8.747805949968082,
"learning_rate": 8.810946715563798e-06,
"loss": 2.612011194229126,
"step": 432
},
{
"epoch": 0.9096638655462185,
"grad_norm": 10.425702565789909,
"learning_rate": 8.803022014848966e-06,
"loss": 2.9700820446014404,
"step": 433
},
{
"epoch": 0.9117647058823529,
"grad_norm": 11.029401754074971,
"learning_rate": 8.795074582947214e-06,
"loss": 3.248368263244629,
"step": 434
},
{
"epoch": 0.9138655462184874,
"grad_norm": 9.336382488449228,
"learning_rate": 8.787104467361442e-06,
"loss": 2.993704319000244,
"step": 435
},
{
"epoch": 0.9159663865546218,
"grad_norm": 13.327453834983904,
"learning_rate": 8.779111715730127e-06,
"loss": 2.6930155754089355,
"step": 436
},
{
"epoch": 0.9180672268907563,
"grad_norm": 10.570908488031245,
"learning_rate": 8.771096375827047e-06,
"loss": 3.069434404373169,
"step": 437
},
{
"epoch": 0.9201680672268907,
"grad_norm": 19.381962817436207,
"learning_rate": 8.763058495560994e-06,
"loss": 3.1358611583709717,
"step": 438
},
{
"epoch": 0.9222689075630253,
"grad_norm": 18.418237048785702,
"learning_rate": 8.754998122975489e-06,
"loss": 3.2987184524536133,
"step": 439
},
{
"epoch": 0.9243697478991597,
"grad_norm": 8.737803987239646,
"learning_rate": 8.746915306248488e-06,
"loss": 2.9279255867004395,
"step": 440
},
{
"epoch": 0.9264705882352942,
"grad_norm": 13.117095498271222,
"learning_rate": 8.7388100936921e-06,
"loss": 2.795942783355713,
"step": 441
},
{
"epoch": 0.9285714285714286,
"grad_norm": 25.973728201733575,
"learning_rate": 8.730682533752301e-06,
"loss": 2.7590699195861816,
"step": 442
},
{
"epoch": 0.930672268907563,
"grad_norm": 9.543199289400748,
"learning_rate": 8.722532675008635e-06,
"loss": 2.6571459770202637,
"step": 443
},
{
"epoch": 0.9327731092436975,
"grad_norm": 10.69198569405724,
"learning_rate": 8.714360566173932e-06,
"loss": 2.7342920303344727,
"step": 444
},
{
"epoch": 0.9348739495798319,
"grad_norm": 13.298135717649288,
"learning_rate": 8.706166256094013e-06,
"loss": 2.9492366313934326,
"step": 445
},
{
"epoch": 0.9369747899159664,
"grad_norm": 18.5856782117513,
"learning_rate": 8.6979497937474e-06,
"loss": 2.937699317932129,
"step": 446
},
{
"epoch": 0.9390756302521008,
"grad_norm": 10.292297569389804,
"learning_rate": 8.689711228245021e-06,
"loss": 3.23824405670166,
"step": 447
},
{
"epoch": 0.9411764705882353,
"grad_norm": 19.90454431534383,
"learning_rate": 8.681450608829916e-06,
"loss": 2.542668581008911,
"step": 448
},
{
"epoch": 0.9432773109243697,
"grad_norm": 14.413143934794212,
"learning_rate": 8.67316798487695e-06,
"loss": 3.257632255554199,
"step": 449
},
{
"epoch": 0.9453781512605042,
"grad_norm": 10.80231465762936,
"learning_rate": 8.664863405892506e-06,
"loss": 2.7072958946228027,
"step": 450
},
{
"epoch": 0.9474789915966386,
"grad_norm": 18.020582485094227,
"learning_rate": 8.656536921514195e-06,
"loss": 2.532301664352417,
"step": 451
},
{
"epoch": 0.9495798319327731,
"grad_norm": 12.503896279810512,
"learning_rate": 8.648188581510567e-06,
"loss": 2.726604461669922,
"step": 452
},
{
"epoch": 0.9516806722689075,
"grad_norm": 18.785189447389097,
"learning_rate": 8.639818435780797e-06,
"loss": 2.516594886779785,
"step": 453
},
{
"epoch": 0.9537815126050421,
"grad_norm": 12.0120687102085,
"learning_rate": 8.631426534354404e-06,
"loss": 2.7706644535064697,
"step": 454
},
{
"epoch": 0.9558823529411765,
"grad_norm": 11.506720081337315,
"learning_rate": 8.623012927390936e-06,
"loss": 3.2427144050598145,
"step": 455
},
{
"epoch": 0.957983193277311,
"grad_norm": 10.11083550503784,
"learning_rate": 8.614577665179684e-06,
"loss": 3.1202523708343506,
"step": 456
},
{
"epoch": 0.9600840336134454,
"grad_norm": 15.945109216294865,
"learning_rate": 8.606120798139375e-06,
"loss": 2.6210598945617676,
"step": 457
},
{
"epoch": 0.9621848739495799,
"grad_norm": 9.09618149788864,
"learning_rate": 8.597642376817865e-06,
"loss": 2.669271469116211,
"step": 458
},
{
"epoch": 0.9642857142857143,
"grad_norm": 8.714640631605363,
"learning_rate": 8.589142451891849e-06,
"loss": 2.6489734649658203,
"step": 459
},
{
"epoch": 0.9663865546218487,
"grad_norm": 7.855597298788909,
"learning_rate": 8.580621074166553e-06,
"loss": 3.10178804397583,
"step": 460
},
{
"epoch": 0.9684873949579832,
"grad_norm": 10.502691052340555,
"learning_rate": 8.572078294575423e-06,
"loss": 2.589158296585083,
"step": 461
},
{
"epoch": 0.9705882352941176,
"grad_norm": 10.459968052493494,
"learning_rate": 8.56351416417983e-06,
"loss": 2.5543792247772217,
"step": 462
},
{
"epoch": 0.9726890756302521,
"grad_norm": 12.885512846289808,
"learning_rate": 8.554928734168767e-06,
"loss": 2.65985369682312,
"step": 463
},
{
"epoch": 0.9747899159663865,
"grad_norm": 9.639047199230617,
"learning_rate": 8.546322055858526e-06,
"loss": 3.0177440643310547,
"step": 464
},
{
"epoch": 0.976890756302521,
"grad_norm": 9.494268049756599,
"learning_rate": 8.537694180692416e-06,
"loss": 2.2767248153686523,
"step": 465
},
{
"epoch": 0.9789915966386554,
"grad_norm": 12.56887928459161,
"learning_rate": 8.529045160240433e-06,
"loss": 2.7835707664489746,
"step": 466
},
{
"epoch": 0.9810924369747899,
"grad_norm": 10.580355179128095,
"learning_rate": 8.520375046198965e-06,
"loss": 2.4373722076416016,
"step": 467
},
{
"epoch": 0.9831932773109243,
"grad_norm": 10.13582135951574,
"learning_rate": 8.51168389039048e-06,
"loss": 2.464303731918335,
"step": 468
},
{
"epoch": 0.9852941176470589,
"grad_norm": 12.209700818401375,
"learning_rate": 8.502971744763216e-06,
"loss": 2.2609100341796875,
"step": 469
},
{
"epoch": 0.9873949579831933,
"grad_norm": 21.359445929891656,
"learning_rate": 8.494238661390865e-06,
"loss": 3.0135858058929443,
"step": 470
},
{
"epoch": 0.9894957983193278,
"grad_norm": 15.087072293517004,
"learning_rate": 8.485484692472272e-06,
"loss": 2.770965099334717,
"step": 471
},
{
"epoch": 0.9915966386554622,
"grad_norm": 8.181199645745421,
"learning_rate": 8.476709890331116e-06,
"loss": 2.6243722438812256,
"step": 472
},
{
"epoch": 0.9936974789915967,
"grad_norm": 7.527423998031555,
"learning_rate": 8.467914307415601e-06,
"loss": 2.9319207668304443,
"step": 473
},
{
"epoch": 0.9957983193277311,
"grad_norm": 9.424234237676545,
"learning_rate": 8.459097996298137e-06,
"loss": 3.0626072883605957,
"step": 474
},
{
"epoch": 0.9978991596638656,
"grad_norm": 14.444274317338678,
"learning_rate": 8.45026100967503e-06,
"loss": 3.000889778137207,
"step": 475
},
{
"epoch": 1.0,
"grad_norm": 8.45019782867115,
"learning_rate": 8.441403400366169e-06,
"loss": 3.112825393676758,
"step": 476
},
{
"epoch": 1.0021008403361344,
"grad_norm": 19.596775314152666,
"learning_rate": 8.432525221314708e-06,
"loss": 1.4137624502182007,
"step": 477
},
{
"epoch": 1.004201680672269,
"grad_norm": 9.233023882113994,
"learning_rate": 8.423626525586744e-06,
"loss": 1.6808059215545654,
"step": 478
},
{
"epoch": 1.0063025210084033,
"grad_norm": 9.789186389046735,
"learning_rate": 8.414707366371006e-06,
"loss": 1.8797330856323242,
"step": 479
},
{
"epoch": 1.0084033613445378,
"grad_norm": 7.894274079237724,
"learning_rate": 8.405767796978546e-06,
"loss": 1.9548699855804443,
"step": 480
},
{
"epoch": 1.0105042016806722,
"grad_norm": 11.882995555931503,
"learning_rate": 8.396807870842396e-06,
"loss": 1.5713114738464355,
"step": 481
},
{
"epoch": 1.0126050420168067,
"grad_norm": 14.948396348319923,
"learning_rate": 8.387827641517274e-06,
"loss": 1.69504976272583,
"step": 482
},
{
"epoch": 1.0147058823529411,
"grad_norm": 6.935744624929541,
"learning_rate": 8.378827162679248e-06,
"loss": 1.3813257217407227,
"step": 483
},
{
"epoch": 1.0168067226890756,
"grad_norm": 9.50729885231966,
"learning_rate": 8.369806488125418e-06,
"loss": 2.4568567276000977,
"step": 484
},
{
"epoch": 1.01890756302521,
"grad_norm": 11.62592077082348,
"learning_rate": 8.360765671773603e-06,
"loss": 2.602184534072876,
"step": 485
},
{
"epoch": 1.0210084033613445,
"grad_norm": 15.469624436922395,
"learning_rate": 8.351704767662005e-06,
"loss": 1.8193070888519287,
"step": 486
},
{
"epoch": 1.023109243697479,
"grad_norm": 12.389371131721145,
"learning_rate": 8.3426238299489e-06,
"loss": 1.4549766778945923,
"step": 487
},
{
"epoch": 1.0252100840336134,
"grad_norm": 7.898711913261212,
"learning_rate": 8.333522912912308e-06,
"loss": 1.4681106805801392,
"step": 488
},
{
"epoch": 1.0273109243697478,
"grad_norm": 14.553557605821632,
"learning_rate": 8.324402070949658e-06,
"loss": 1.4224164485931396,
"step": 489
},
{
"epoch": 1.0294117647058822,
"grad_norm": 21.0322684953627,
"learning_rate": 8.315261358577485e-06,
"loss": 2.200676441192627,
"step": 490
},
{
"epoch": 1.0315126050420167,
"grad_norm": 14.230965851092702,
"learning_rate": 8.306100830431085e-06,
"loss": 1.867397665977478,
"step": 491
},
{
"epoch": 1.0336134453781514,
"grad_norm": 11.330315084805383,
"learning_rate": 8.296920541264197e-06,
"loss": 1.4270985126495361,
"step": 492
},
{
"epoch": 1.0357142857142858,
"grad_norm": 11.452248734086307,
"learning_rate": 8.287720545948676e-06,
"loss": 1.464069128036499,
"step": 493
},
{
"epoch": 1.0378151260504203,
"grad_norm": 18.476525141242952,
"learning_rate": 8.278500899474162e-06,
"loss": 1.192551612854004,
"step": 494
},
{
"epoch": 1.0399159663865547,
"grad_norm": 13.695173322132312,
"learning_rate": 8.269261656947755e-06,
"loss": 2.367762327194214,
"step": 495
},
{
"epoch": 1.0420168067226891,
"grad_norm": 12.101022572223535,
"learning_rate": 8.260002873593679e-06,
"loss": 1.6752372980117798,
"step": 496
},
{
"epoch": 1.0441176470588236,
"grad_norm": 14.763270168918805,
"learning_rate": 8.25072460475296e-06,
"loss": 1.409712314605713,
"step": 497
},
{
"epoch": 1.046218487394958,
"grad_norm": 12.622229054224464,
"learning_rate": 8.24142690588309e-06,
"loss": 1.6270588636398315,
"step": 498
},
{
"epoch": 1.0483193277310925,
"grad_norm": 7.889964988601032,
"learning_rate": 8.232109832557696e-06,
"loss": 1.4294947385787964,
"step": 499
},
{
"epoch": 1.050420168067227,
"grad_norm": 9.640341277497848,
"learning_rate": 8.222773440466213e-06,
"loss": 1.2340010404586792,
"step": 500
},
{
"epoch": 1.0525210084033614,
"grad_norm": 9.361065825268032,
"learning_rate": 8.213417785413538e-06,
"loss": 1.451041340827942,
"step": 501
},
{
"epoch": 1.0546218487394958,
"grad_norm": 10.851800895184763,
"learning_rate": 8.204042923319717e-06,
"loss": 0.8124719858169556,
"step": 502
},
{
"epoch": 1.0567226890756303,
"grad_norm": 13.939415896202156,
"learning_rate": 8.19464891021959e-06,
"loss": 1.5310864448547363,
"step": 503
},
{
"epoch": 1.0588235294117647,
"grad_norm": 12.545903899817956,
"learning_rate": 8.18523580226247e-06,
"loss": 1.2139228582382202,
"step": 504
},
{
"epoch": 1.0609243697478992,
"grad_norm": 7.8688457688530455,
"learning_rate": 8.1758036557118e-06,
"loss": 1.3573241233825684,
"step": 505
},
{
"epoch": 1.0630252100840336,
"grad_norm": 29.274148786110516,
"learning_rate": 8.166352526944821e-06,
"loss": 1.9899749755859375,
"step": 506
},
{
"epoch": 1.065126050420168,
"grad_norm": 12.789841758713314,
"learning_rate": 8.156882472452232e-06,
"loss": 1.4103593826293945,
"step": 507
},
{
"epoch": 1.0672268907563025,
"grad_norm": 11.46688535188232,
"learning_rate": 8.147393548837856e-06,
"loss": 1.227393627166748,
"step": 508
},
{
"epoch": 1.069327731092437,
"grad_norm": 11.67493017233716,
"learning_rate": 8.137885812818296e-06,
"loss": 1.7060927152633667,
"step": 509
},
{
"epoch": 1.0714285714285714,
"grad_norm": 13.183390423963338,
"learning_rate": 8.128359321222601e-06,
"loss": 1.890432357788086,
"step": 510
},
{
"epoch": 1.0735294117647058,
"grad_norm": 8.769195455641308,
"learning_rate": 8.118814130991925e-06,
"loss": 1.8258857727050781,
"step": 511
},
{
"epoch": 1.0756302521008403,
"grad_norm": 9.016866647141889,
"learning_rate": 8.109250299179188e-06,
"loss": 0.9584097862243652,
"step": 512
},
{
"epoch": 1.0777310924369747,
"grad_norm": 8.866656672277916,
"learning_rate": 8.09966788294873e-06,
"loss": 1.4017150402069092,
"step": 513
},
{
"epoch": 1.0798319327731092,
"grad_norm": 12.12920225890514,
"learning_rate": 8.090066939575972e-06,
"loss": 1.3034381866455078,
"step": 514
},
{
"epoch": 1.0819327731092436,
"grad_norm": 11.169332765461306,
"learning_rate": 8.080447526447079e-06,
"loss": 1.0734150409698486,
"step": 515
},
{
"epoch": 1.084033613445378,
"grad_norm": 15.988980575396647,
"learning_rate": 8.070809701058606e-06,
"loss": 0.8819087743759155,
"step": 516
},
{
"epoch": 1.0861344537815125,
"grad_norm": 10.445041930863859,
"learning_rate": 8.061153521017169e-06,
"loss": 1.3253920078277588,
"step": 517
},
{
"epoch": 1.088235294117647,
"grad_norm": 7.477532974278996,
"learning_rate": 8.051479044039086e-06,
"loss": 1.0912744998931885,
"step": 518
},
{
"epoch": 1.0903361344537814,
"grad_norm": 15.386742532344485,
"learning_rate": 8.041786327950037e-06,
"loss": 1.6941767930984497,
"step": 519
},
{
"epoch": 1.092436974789916,
"grad_norm": 13.631587045212196,
"learning_rate": 8.032075430684724e-06,
"loss": 1.058671236038208,
"step": 520
},
{
"epoch": 1.0945378151260505,
"grad_norm": 9.174394889796707,
"learning_rate": 8.02234641028652e-06,
"loss": 1.1603420972824097,
"step": 521
},
{
"epoch": 1.096638655462185,
"grad_norm": 18.009634618634845,
"learning_rate": 8.012599324907121e-06,
"loss": 1.4285218715667725,
"step": 522
},
{
"epoch": 1.0987394957983194,
"grad_norm": 18.317588738929096,
"learning_rate": 8.0028342328062e-06,
"loss": 1.3041057586669922,
"step": 523
},
{
"epoch": 1.1008403361344539,
"grad_norm": 17.245361771703262,
"learning_rate": 7.993051192351056e-06,
"loss": 2.329005718231201,
"step": 524
},
{
"epoch": 1.1029411764705883,
"grad_norm": 5.466501144551759,
"learning_rate": 7.983250262016276e-06,
"loss": 0.7331016063690186,
"step": 525
},
{
"epoch": 1.1050420168067228,
"grad_norm": 19.76792957260025,
"learning_rate": 7.973431500383366e-06,
"loss": 2.193528175354004,
"step": 526
},
{
"epoch": 1.1071428571428572,
"grad_norm": 11.04973790435175,
"learning_rate": 7.963594966140423e-06,
"loss": 1.3245251178741455,
"step": 527
},
{
"epoch": 1.1092436974789917,
"grad_norm": 14.50002827076454,
"learning_rate": 7.953740718081765e-06,
"loss": 1.1308670043945312,
"step": 528
},
{
"epoch": 1.111344537815126,
"grad_norm": 8.457254255014693,
"learning_rate": 7.943868815107594e-06,
"loss": 1.3318034410476685,
"step": 529
},
{
"epoch": 1.1134453781512605,
"grad_norm": 12.48006901565296,
"learning_rate": 7.933979316223632e-06,
"loss": 1.2564438581466675,
"step": 530
},
{
"epoch": 1.115546218487395,
"grad_norm": 13.952521489657013,
"learning_rate": 7.92407228054078e-06,
"loss": 1.2420412302017212,
"step": 531
},
{
"epoch": 1.1176470588235294,
"grad_norm": 11.927118732913993,
"learning_rate": 7.914147767274756e-06,
"loss": 1.9582582712173462,
"step": 532
},
{
"epoch": 1.1197478991596639,
"grad_norm": 29.1836862977554,
"learning_rate": 7.904205835745744e-06,
"loss": 1.7057411670684814,
"step": 533
},
{
"epoch": 1.1218487394957983,
"grad_norm": 8.77699695792644,
"learning_rate": 7.894246545378037e-06,
"loss": 1.810387134552002,
"step": 534
},
{
"epoch": 1.1239495798319328,
"grad_norm": 11.812154757139437,
"learning_rate": 7.884269955699689e-06,
"loss": 1.6038577556610107,
"step": 535
},
{
"epoch": 1.1260504201680672,
"grad_norm": 11.347334970124107,
"learning_rate": 7.874276126342151e-06,
"loss": 1.1410393714904785,
"step": 536
},
{
"epoch": 1.1281512605042017,
"grad_norm": 23.969457981422316,
"learning_rate": 7.86426511703992e-06,
"loss": 2.28239369392395,
"step": 537
},
{
"epoch": 1.1302521008403361,
"grad_norm": 11.3793937172999,
"learning_rate": 7.854236987630178e-06,
"loss": 2.1672444343566895,
"step": 538
},
{
"epoch": 1.1323529411764706,
"grad_norm": 8.571185039369908,
"learning_rate": 7.844191798052438e-06,
"loss": 1.7712535858154297,
"step": 539
},
{
"epoch": 1.134453781512605,
"grad_norm": 8.155743104110897,
"learning_rate": 7.834129608348183e-06,
"loss": 1.4109793901443481,
"step": 540
},
{
"epoch": 1.1365546218487395,
"grad_norm": 12.006945471100122,
"learning_rate": 7.824050478660506e-06,
"loss": 1.4405725002288818,
"step": 541
},
{
"epoch": 1.138655462184874,
"grad_norm": 16.24385934265993,
"learning_rate": 7.813954469233758e-06,
"loss": 2.2450976371765137,
"step": 542
},
{
"epoch": 1.1407563025210083,
"grad_norm": 63.00358955157523,
"learning_rate": 7.803841640413177e-06,
"loss": 2.16367244720459,
"step": 543
},
{
"epoch": 1.1428571428571428,
"grad_norm": 10.995277933527825,
"learning_rate": 7.793712052644535e-06,
"loss": 2.3919224739074707,
"step": 544
},
{
"epoch": 1.1449579831932772,
"grad_norm": 9.931645247221951,
"learning_rate": 7.783565766473777e-06,
"loss": 1.4211726188659668,
"step": 545
},
{
"epoch": 1.1470588235294117,
"grad_norm": 12.106564772704573,
"learning_rate": 7.773402842546654e-06,
"loss": 1.2502498626708984,
"step": 546
},
{
"epoch": 1.1491596638655461,
"grad_norm": 8.144149987908426,
"learning_rate": 7.76322334160836e-06,
"loss": 1.423762321472168,
"step": 547
},
{
"epoch": 1.1512605042016806,
"grad_norm": 10.152738619426868,
"learning_rate": 7.75302732450318e-06,
"loss": 1.1090279817581177,
"step": 548
},
{
"epoch": 1.153361344537815,
"grad_norm": 11.024880610484013,
"learning_rate": 7.742814852174112e-06,
"loss": 1.0321426391601562,
"step": 549
},
{
"epoch": 1.1554621848739495,
"grad_norm": 10.4112886492949,
"learning_rate": 7.73258598566251e-06,
"loss": 1.0928632020950317,
"step": 550
},
{
"epoch": 1.157563025210084,
"grad_norm": 17.17079853756711,
"learning_rate": 7.72234078610772e-06,
"loss": 1.2369472980499268,
"step": 551
},
{
"epoch": 1.1596638655462184,
"grad_norm": 12.662228894532866,
"learning_rate": 7.712079314746716e-06,
"loss": 1.2957392930984497,
"step": 552
},
{
"epoch": 1.161764705882353,
"grad_norm": 8.967923305212855,
"learning_rate": 7.701801632913722e-06,
"loss": 1.6709070205688477,
"step": 553
},
{
"epoch": 1.1638655462184875,
"grad_norm": 9.520057506790387,
"learning_rate": 7.691507802039861e-06,
"loss": 1.6091077327728271,
"step": 554
},
{
"epoch": 1.165966386554622,
"grad_norm": 12.924582534581134,
"learning_rate": 7.68119788365278e-06,
"loss": 1.8003133535385132,
"step": 555
},
{
"epoch": 1.1680672268907564,
"grad_norm": 8.027840739484652,
"learning_rate": 7.670871939376281e-06,
"loss": 1.0151593685150146,
"step": 556
},
{
"epoch": 1.1701680672268908,
"grad_norm": 10.792867985796137,
"learning_rate": 7.660530030929961e-06,
"loss": 1.3084783554077148,
"step": 557
},
{
"epoch": 1.1722689075630253,
"grad_norm": 10.336895443268714,
"learning_rate": 7.650172220128828e-06,
"loss": 1.3882572650909424,
"step": 558
},
{
"epoch": 1.1743697478991597,
"grad_norm": 11.46121788240209,
"learning_rate": 7.639798568882947e-06,
"loss": 1.3919298648834229,
"step": 559
},
{
"epoch": 1.1764705882352942,
"grad_norm": 11.442052901701038,
"learning_rate": 7.629409139197063e-06,
"loss": 1.3745830059051514,
"step": 560
},
{
"epoch": 1.1785714285714286,
"grad_norm": 22.18812336562329,
"learning_rate": 7.619003993170226e-06,
"loss": 1.2964568138122559,
"step": 561
},
{
"epoch": 1.180672268907563,
"grad_norm": 14.305068677598294,
"learning_rate": 7.608583192995433e-06,
"loss": 1.75518798828125,
"step": 562
},
{
"epoch": 1.1827731092436975,
"grad_norm": 38.68186634407232,
"learning_rate": 7.598146800959238e-06,
"loss": 2.156588554382324,
"step": 563
},
{
"epoch": 1.184873949579832,
"grad_norm": 15.824079652626462,
"learning_rate": 7.5876948794414015e-06,
"loss": 1.3602566719055176,
"step": 564
},
{
"epoch": 1.1869747899159664,
"grad_norm": 13.134376075413467,
"learning_rate": 7.577227490914495e-06,
"loss": 1.5620733499526978,
"step": 565
},
{
"epoch": 1.1890756302521008,
"grad_norm": 9.860309886809128,
"learning_rate": 7.5667446979435445e-06,
"loss": 0.971282422542572,
"step": 566
},
{
"epoch": 1.1911764705882353,
"grad_norm": 11.617960075857892,
"learning_rate": 7.556246563185648e-06,
"loss": 1.1717581748962402,
"step": 567
},
{
"epoch": 1.1932773109243697,
"grad_norm": 14.78629106010037,
"learning_rate": 7.545733149389605e-06,
"loss": 1.8813025951385498,
"step": 568
},
{
"epoch": 1.1953781512605042,
"grad_norm": 11.92994585452875,
"learning_rate": 7.535204519395538e-06,
"loss": 1.280207633972168,
"step": 569
},
{
"epoch": 1.1974789915966386,
"grad_norm": 14.537731397359755,
"learning_rate": 7.5246607361345215e-06,
"loss": 1.5685778856277466,
"step": 570
},
{
"epoch": 1.199579831932773,
"grad_norm": 9.978722079402786,
"learning_rate": 7.514101862628203e-06,
"loss": 2.2011172771453857,
"step": 571
},
{
"epoch": 1.2016806722689075,
"grad_norm": 14.328584272935853,
"learning_rate": 7.503527961988422e-06,
"loss": 2.0038180351257324,
"step": 572
},
{
"epoch": 1.203781512605042,
"grad_norm": 11.49676437218398,
"learning_rate": 7.492939097416842e-06,
"loss": 1.1275922060012817,
"step": 573
},
{
"epoch": 1.2058823529411764,
"grad_norm": 13.603928637496292,
"learning_rate": 7.482335332204568e-06,
"loss": 1.208678960800171,
"step": 574
},
{
"epoch": 1.2079831932773109,
"grad_norm": 10.710849924738463,
"learning_rate": 7.471716729731764e-06,
"loss": 1.7450125217437744,
"step": 575
},
{
"epoch": 1.2100840336134453,
"grad_norm": 10.8408813790809,
"learning_rate": 7.461083353467283e-06,
"loss": 1.5381510257720947,
"step": 576
},
{
"epoch": 1.2121848739495797,
"grad_norm": 10.502717838660322,
"learning_rate": 7.450435266968279e-06,
"loss": 1.6857651472091675,
"step": 577
},
{
"epoch": 1.2142857142857142,
"grad_norm": 10.194196645130454,
"learning_rate": 7.4397725338798365e-06,
"loss": 1.9049471616744995,
"step": 578
},
{
"epoch": 1.2163865546218489,
"grad_norm": 8.336901180250376,
"learning_rate": 7.429095217934578e-06,
"loss": 2.2398974895477295,
"step": 579
},
{
"epoch": 1.2184873949579833,
"grad_norm": 8.289301563947674,
"learning_rate": 7.4184033829522935e-06,
"loss": 1.8767409324645996,
"step": 580
},
{
"epoch": 1.2205882352941178,
"grad_norm": 7.83258681688038,
"learning_rate": 7.4076970928395565e-06,
"loss": 1.4787061214447021,
"step": 581
},
{
"epoch": 1.2226890756302522,
"grad_norm": 11.288493150816146,
"learning_rate": 7.396976411589338e-06,
"loss": 1.1055876016616821,
"step": 582
},
{
"epoch": 1.2247899159663866,
"grad_norm": 8.627197279612671,
"learning_rate": 7.386241403280629e-06,
"loss": 1.668757438659668,
"step": 583
},
{
"epoch": 1.226890756302521,
"grad_norm": 7.9829732080808276,
"learning_rate": 7.375492132078051e-06,
"loss": 1.2818783521652222,
"step": 584
},
{
"epoch": 1.2289915966386555,
"grad_norm": 9.132163063845432,
"learning_rate": 7.364728662231484e-06,
"loss": 1.578829050064087,
"step": 585
},
{
"epoch": 1.23109243697479,
"grad_norm": 9.541187433357738,
"learning_rate": 7.353951058075669e-06,
"loss": 1.572939157485962,
"step": 586
},
{
"epoch": 1.2331932773109244,
"grad_norm": 29.472008336805924,
"learning_rate": 7.343159384029833e-06,
"loss": 3.977992057800293,
"step": 587
},
{
"epoch": 1.2352941176470589,
"grad_norm": 12.577355177733914,
"learning_rate": 7.332353704597299e-06,
"loss": 1.955003023147583,
"step": 588
},
{
"epoch": 1.2373949579831933,
"grad_norm": 10.61755598072498,
"learning_rate": 7.321534084365101e-06,
"loss": 1.5401737689971924,
"step": 589
},
{
"epoch": 1.2394957983193278,
"grad_norm": 17.052134953118316,
"learning_rate": 7.310700588003605e-06,
"loss": 1.895308017730713,
"step": 590
},
{
"epoch": 1.2415966386554622,
"grad_norm": 13.914617942504853,
"learning_rate": 7.299853280266109e-06,
"loss": 1.6920474767684937,
"step": 591
},
{
"epoch": 1.2436974789915967,
"grad_norm": 18.300626922757814,
"learning_rate": 7.28899222598847e-06,
"loss": 1.9865736961364746,
"step": 592
},
{
"epoch": 1.245798319327731,
"grad_norm": 8.604822405832417,
"learning_rate": 7.278117490088703e-06,
"loss": 1.2350941896438599,
"step": 593
},
{
"epoch": 1.2478991596638656,
"grad_norm": 15.714181520858954,
"learning_rate": 7.267229137566607e-06,
"loss": 1.800095558166504,
"step": 594
},
{
"epoch": 1.25,
"grad_norm": 7.625924941471246,
"learning_rate": 7.256327233503365e-06,
"loss": 1.848137617111206,
"step": 595
},
{
"epoch": 1.2521008403361344,
"grad_norm": 8.480492494477819,
"learning_rate": 7.24541184306116e-06,
"loss": 1.7656617164611816,
"step": 596
},
{
"epoch": 1.254201680672269,
"grad_norm": 11.960261973795399,
"learning_rate": 7.234483031482787e-06,
"loss": 1.0096323490142822,
"step": 597
},
{
"epoch": 1.2563025210084033,
"grad_norm": 9.709001923888373,
"learning_rate": 7.223540864091259e-06,
"loss": 1.428197979927063,
"step": 598
},
{
"epoch": 1.2584033613445378,
"grad_norm": 11.628908186348927,
"learning_rate": 7.2125854062894184e-06,
"loss": 1.0703970193862915,
"step": 599
},
{
"epoch": 1.2605042016806722,
"grad_norm": 14.20204722362147,
"learning_rate": 7.201616723559548e-06,
"loss": 1.7873646020889282,
"step": 600
},
{
"epoch": 1.2626050420168067,
"grad_norm": 12.326258967391198,
"learning_rate": 7.190634881462976e-06,
"loss": 1.3262135982513428,
"step": 601
},
{
"epoch": 1.2647058823529411,
"grad_norm": 13.762619560991299,
"learning_rate": 7.179639945639688e-06,
"loss": 1.6294150352478027,
"step": 602
},
{
"epoch": 1.2668067226890756,
"grad_norm": 12.793929462404881,
"learning_rate": 7.168631981807931e-06,
"loss": 2.6409220695495605,
"step": 603
},
{
"epoch": 1.26890756302521,
"grad_norm": 12.75285051440542,
"learning_rate": 7.15761105576382e-06,
"loss": 1.3407433032989502,
"step": 604
},
{
"epoch": 1.2710084033613445,
"grad_norm": 11.811026706721915,
"learning_rate": 7.1465772333809524e-06,
"loss": 1.1475789546966553,
"step": 605
},
{
"epoch": 1.273109243697479,
"grad_norm": 16.182274466548407,
"learning_rate": 7.1355305806100036e-06,
"loss": 1.8270117044448853,
"step": 606
},
{
"epoch": 1.2752100840336134,
"grad_norm": 9.390889705782493,
"learning_rate": 7.124471163478344e-06,
"loss": 2.168900489807129,
"step": 607
},
{
"epoch": 1.2773109243697478,
"grad_norm": 14.960557905830523,
"learning_rate": 7.113399048089631e-06,
"loss": 2.0142345428466797,
"step": 608
},
{
"epoch": 1.2794117647058822,
"grad_norm": 14.63642311907181,
"learning_rate": 7.102314300623425e-06,
"loss": 2.015444755554199,
"step": 609
},
{
"epoch": 1.2815126050420167,
"grad_norm": 13.291155405094099,
"learning_rate": 7.091216987334792e-06,
"loss": 1.5882906913757324,
"step": 610
},
{
"epoch": 1.2836134453781511,
"grad_norm": 17.727064634923273,
"learning_rate": 7.080107174553903e-06,
"loss": 1.4543545246124268,
"step": 611
},
{
"epoch": 1.2857142857142856,
"grad_norm": 13.123573018342379,
"learning_rate": 7.068984928685638e-06,
"loss": 1.3196444511413574,
"step": 612
},
{
"epoch": 1.28781512605042,
"grad_norm": 11.204963124082711,
"learning_rate": 7.057850316209198e-06,
"loss": 0.8601089715957642,
"step": 613
},
{
"epoch": 1.2899159663865547,
"grad_norm": 11.507041064870066,
"learning_rate": 7.0467034036776945e-06,
"loss": 1.334380865097046,
"step": 614
},
{
"epoch": 1.2920168067226891,
"grad_norm": 9.153184893600336,
"learning_rate": 7.035544257717761e-06,
"loss": 1.4980111122131348,
"step": 615
},
{
"epoch": 1.2941176470588236,
"grad_norm": 11.208470095807519,
"learning_rate": 7.024372945029152e-06,
"loss": 1.9393174648284912,
"step": 616
},
{
"epoch": 1.296218487394958,
"grad_norm": 9.33539024674701,
"learning_rate": 7.013189532384343e-06,
"loss": 1.1070374250411987,
"step": 617
},
{
"epoch": 1.2983193277310925,
"grad_norm": 22.088040059228636,
"learning_rate": 7.001994086628133e-06,
"loss": 2.146557331085205,
"step": 618
},
{
"epoch": 1.300420168067227,
"grad_norm": 12.461539796415895,
"learning_rate": 6.990786674677246e-06,
"loss": 1.097703456878662,
"step": 619
},
{
"epoch": 1.3025210084033614,
"grad_norm": 10.337144677645794,
"learning_rate": 6.979567363519927e-06,
"loss": 1.9619685411453247,
"step": 620
},
{
"epoch": 1.3046218487394958,
"grad_norm": 8.583774398203186,
"learning_rate": 6.9683362202155465e-06,
"loss": 1.2424434423446655,
"step": 621
},
{
"epoch": 1.3067226890756303,
"grad_norm": 10.235846664061171,
"learning_rate": 6.957093311894199e-06,
"loss": 1.8912100791931152,
"step": 622
},
{
"epoch": 1.3088235294117647,
"grad_norm": 12.7496233438477,
"learning_rate": 6.945838705756293e-06,
"loss": 1.4234580993652344,
"step": 623
},
{
"epoch": 1.3109243697478992,
"grad_norm": 12.664108172155123,
"learning_rate": 6.934572469072163e-06,
"loss": 1.7631306648254395,
"step": 624
},
{
"epoch": 1.3130252100840336,
"grad_norm": 9.043940926283064,
"learning_rate": 6.923294669181659e-06,
"loss": 1.275686264038086,
"step": 625
},
{
"epoch": 1.315126050420168,
"grad_norm": 7.7562010562396155,
"learning_rate": 6.912005373493747e-06,
"loss": 1.8493428230285645,
"step": 626
},
{
"epoch": 1.3172268907563025,
"grad_norm": 10.778946101337466,
"learning_rate": 6.900704649486103e-06,
"loss": 1.0401699542999268,
"step": 627
},
{
"epoch": 1.319327731092437,
"grad_norm": 9.474741424665671,
"learning_rate": 6.889392564704712e-06,
"loss": 1.932092547416687,
"step": 628
},
{
"epoch": 1.3214285714285714,
"grad_norm": 18.187204049633937,
"learning_rate": 6.878069186763466e-06,
"loss": 2.0269484519958496,
"step": 629
},
{
"epoch": 1.3235294117647058,
"grad_norm": 12.371698082139902,
"learning_rate": 6.866734583343753e-06,
"loss": 1.6765419244766235,
"step": 630
},
{
"epoch": 1.3256302521008403,
"grad_norm": 20.859173211033255,
"learning_rate": 6.855388822194061e-06,
"loss": 1.7931967973709106,
"step": 631
},
{
"epoch": 1.3277310924369747,
"grad_norm": 10.82122547870125,
"learning_rate": 6.844031971129571e-06,
"loss": 0.9582860469818115,
"step": 632
},
{
"epoch": 1.3298319327731092,
"grad_norm": 13.255068667352083,
"learning_rate": 6.8326640980317475e-06,
"loss": 1.7692348957061768,
"step": 633
},
{
"epoch": 1.3319327731092436,
"grad_norm": 12.632729369596628,
"learning_rate": 6.821285270847934e-06,
"loss": 2.143463373184204,
"step": 634
},
{
"epoch": 1.334033613445378,
"grad_norm": 31.76404047719635,
"learning_rate": 6.80989555759095e-06,
"loss": 2.290733814239502,
"step": 635
},
{
"epoch": 1.3361344537815127,
"grad_norm": 18.9996916353526,
"learning_rate": 6.79849502633868e-06,
"loss": 1.4548063278198242,
"step": 636
},
{
"epoch": 1.3382352941176472,
"grad_norm": 10.423656653462372,
"learning_rate": 6.787083745233674e-06,
"loss": 1.6137502193450928,
"step": 637
},
{
"epoch": 1.3403361344537816,
"grad_norm": 9.260024233354208,
"learning_rate": 6.775661782482732e-06,
"loss": 1.277546763420105,
"step": 638
},
{
"epoch": 1.342436974789916,
"grad_norm": 17.22623817552147,
"learning_rate": 6.764229206356498e-06,
"loss": 1.4183297157287598,
"step": 639
},
{
"epoch": 1.3445378151260505,
"grad_norm": 13.655193467078059,
"learning_rate": 6.752786085189059e-06,
"loss": 2.352818012237549,
"step": 640
},
{
"epoch": 1.346638655462185,
"grad_norm": 10.832229231352626,
"learning_rate": 6.741332487377525e-06,
"loss": 1.1966056823730469,
"step": 641
},
{
"epoch": 1.3487394957983194,
"grad_norm": 16.450581846244585,
"learning_rate": 6.729868481381632e-06,
"loss": 2.1670610904693604,
"step": 642
},
{
"epoch": 1.3508403361344539,
"grad_norm": 14.092922253172704,
"learning_rate": 6.718394135723321e-06,
"loss": 1.4478580951690674,
"step": 643
},
{
"epoch": 1.3529411764705883,
"grad_norm": 9.381957347730207,
"learning_rate": 6.706909518986341e-06,
"loss": 1.1712067127227783,
"step": 644
},
{
"epoch": 1.3550420168067228,
"grad_norm": 10.457393453015948,
"learning_rate": 6.695414699815828e-06,
"loss": 1.241437315940857,
"step": 645
},
{
"epoch": 1.3571428571428572,
"grad_norm": 26.13803770478073,
"learning_rate": 6.6839097469179e-06,
"loss": 1.5295310020446777,
"step": 646
},
{
"epoch": 1.3592436974789917,
"grad_norm": 7.790998799935961,
"learning_rate": 6.6723947290592505e-06,
"loss": 1.3555617332458496,
"step": 647
},
{
"epoch": 1.361344537815126,
"grad_norm": 14.276342203489932,
"learning_rate": 6.660869715066725e-06,
"loss": 1.3158948421478271,
"step": 648
},
{
"epoch": 1.3634453781512605,
"grad_norm": 7.962812237225353,
"learning_rate": 6.649334773826924e-06,
"loss": 1.7540979385375977,
"step": 649
},
{
"epoch": 1.365546218487395,
"grad_norm": 12.139617079516373,
"learning_rate": 6.63778997428578e-06,
"loss": 1.7170000076293945,
"step": 650
},
{
"epoch": 1.3676470588235294,
"grad_norm": 9.424987040512477,
"learning_rate": 6.626235385448152e-06,
"loss": 1.2551283836364746,
"step": 651
},
{
"epoch": 1.3697478991596639,
"grad_norm": 11.731648378217931,
"learning_rate": 6.61467107637741e-06,
"loss": 1.468104362487793,
"step": 652
},
{
"epoch": 1.3718487394957983,
"grad_norm": 22.089135904446437,
"learning_rate": 6.603097116195026e-06,
"loss": 1.3832511901855469,
"step": 653
},
{
"epoch": 1.3739495798319328,
"grad_norm": 13.408397747285187,
"learning_rate": 6.591513574080152e-06,
"loss": 1.1895179748535156,
"step": 654
},
{
"epoch": 1.3760504201680672,
"grad_norm": 10.506415017764112,
"learning_rate": 6.579920519269218e-06,
"loss": 1.57008957862854,
"step": 655
},
{
"epoch": 1.3781512605042017,
"grad_norm": 7.3609644144158315,
"learning_rate": 6.568318021055512e-06,
"loss": 1.1686642169952393,
"step": 656
},
{
"epoch": 1.3802521008403361,
"grad_norm": 19.968281534972263,
"learning_rate": 6.556706148788765e-06,
"loss": 1.831925392150879,
"step": 657
},
{
"epoch": 1.3823529411764706,
"grad_norm": 14.858290457520233,
"learning_rate": 6.545084971874738e-06,
"loss": 1.1927814483642578,
"step": 658
},
{
"epoch": 1.384453781512605,
"grad_norm": 13.471589913344788,
"learning_rate": 6.5334545597748075e-06,
"loss": 1.225053310394287,
"step": 659
},
{
"epoch": 1.3865546218487395,
"grad_norm": 10.64391058893006,
"learning_rate": 6.521814982005552e-06,
"loss": 1.489911437034607,
"step": 660
},
{
"epoch": 1.388655462184874,
"grad_norm": 14.084203318094486,
"learning_rate": 6.510166308138328e-06,
"loss": 1.3653918504714966,
"step": 661
},
{
"epoch": 1.3907563025210083,
"grad_norm": 10.331380068295612,
"learning_rate": 6.498508607798872e-06,
"loss": 1.7082477807998657,
"step": 662
},
{
"epoch": 1.3928571428571428,
"grad_norm": 10.828390377137284,
"learning_rate": 6.48684195066686e-06,
"loss": 1.1122634410858154,
"step": 663
},
{
"epoch": 1.3949579831932772,
"grad_norm": 14.858926439296923,
"learning_rate": 6.475166406475515e-06,
"loss": 0.9572471380233765,
"step": 664
},
{
"epoch": 1.3970588235294117,
"grad_norm": 17.02414371173566,
"learning_rate": 6.4634820450111715e-06,
"loss": 1.8282674551010132,
"step": 665
},
{
"epoch": 1.3991596638655461,
"grad_norm": 12.803154660225488,
"learning_rate": 6.451788936112868e-06,
"loss": 1.2026221752166748,
"step": 666
},
{
"epoch": 1.4012605042016806,
"grad_norm": 10.424391789653072,
"learning_rate": 6.440087149671932e-06,
"loss": 1.3183879852294922,
"step": 667
},
{
"epoch": 1.403361344537815,
"grad_norm": 8.925077970843816,
"learning_rate": 6.428376755631553e-06,
"loss": 1.216771125793457,
"step": 668
},
{
"epoch": 1.4054621848739495,
"grad_norm": 12.5613140309092,
"learning_rate": 6.41665782398637e-06,
"loss": 1.6759852170944214,
"step": 669
},
{
"epoch": 1.407563025210084,
"grad_norm": 13.278399152081807,
"learning_rate": 6.404930424782052e-06,
"loss": 1.6593937873840332,
"step": 670
},
{
"epoch": 1.4096638655462184,
"grad_norm": 11.405836988138063,
"learning_rate": 6.393194628114885e-06,
"loss": 1.672929286956787,
"step": 671
},
{
"epoch": 1.4117647058823528,
"grad_norm": 13.1586681372233,
"learning_rate": 6.381450504131339e-06,
"loss": 1.2778139114379883,
"step": 672
},
{
"epoch": 1.4138655462184873,
"grad_norm": 9.462564203496632,
"learning_rate": 6.369698123027664e-06,
"loss": 1.6472318172454834,
"step": 673
},
{
"epoch": 1.415966386554622,
"grad_norm": 12.367075684146661,
"learning_rate": 6.357937555049465e-06,
"loss": 1.5301233530044556,
"step": 674
},
{
"epoch": 1.4180672268907564,
"grad_norm": 9.502388277835697,
"learning_rate": 6.3461688704912735e-06,
"loss": 1.5423755645751953,
"step": 675
},
{
"epoch": 1.4201680672268908,
"grad_norm": 13.340625777023925,
"learning_rate": 6.334392139696144e-06,
"loss": 0.8435590863227844,
"step": 676
},
{
"epoch": 1.4222689075630253,
"grad_norm": 9.498703079540906,
"learning_rate": 6.322607433055217e-06,
"loss": 0.9243001937866211,
"step": 677
},
{
"epoch": 1.4243697478991597,
"grad_norm": 16.692002135074148,
"learning_rate": 6.310814821007312e-06,
"loss": 1.1370623111724854,
"step": 678
},
{
"epoch": 1.4264705882352942,
"grad_norm": 14.252581322539957,
"learning_rate": 6.299014374038493e-06,
"loss": 1.8121721744537354,
"step": 679
},
{
"epoch": 1.4285714285714286,
"grad_norm": 12.146719665307664,
"learning_rate": 6.287206162681663e-06,
"loss": 1.5701857805252075,
"step": 680
},
{
"epoch": 1.430672268907563,
"grad_norm": 11.383072024184132,
"learning_rate": 6.275390257516125e-06,
"loss": 1.7376922369003296,
"step": 681
},
{
"epoch": 1.4327731092436975,
"grad_norm": 7.389859803918485,
"learning_rate": 6.263566729167177e-06,
"loss": 1.722080111503601,
"step": 682
},
{
"epoch": 1.434873949579832,
"grad_norm": 11.051749495669629,
"learning_rate": 6.251735648305676e-06,
"loss": 1.8646998405456543,
"step": 683
},
{
"epoch": 1.4369747899159664,
"grad_norm": 8.056416794494698,
"learning_rate": 6.239897085647624e-06,
"loss": 1.6373791694641113,
"step": 684
},
{
"epoch": 1.4390756302521008,
"grad_norm": 14.269272004271027,
"learning_rate": 6.228051111953742e-06,
"loss": 1.5332825183868408,
"step": 685
},
{
"epoch": 1.4411764705882353,
"grad_norm": 11.569261548147155,
"learning_rate": 6.216197798029049e-06,
"loss": 1.7713117599487305,
"step": 686
},
{
"epoch": 1.4432773109243697,
"grad_norm": 8.013171505509781,
"learning_rate": 6.204337214722435e-06,
"loss": 1.3197343349456787,
"step": 687
},
{
"epoch": 1.4453781512605042,
"grad_norm": 7.988425778687254,
"learning_rate": 6.192469432926241e-06,
"loss": 1.3940856456756592,
"step": 688
},
{
"epoch": 1.4474789915966386,
"grad_norm": 14.591944041736712,
"learning_rate": 6.180594523575838e-06,
"loss": 2.0876762866973877,
"step": 689
},
{
"epoch": 1.449579831932773,
"grad_norm": 13.421486753054541,
"learning_rate": 6.1687125576491945e-06,
"loss": 2.5141618251800537,
"step": 690
},
{
"epoch": 1.4516806722689075,
"grad_norm": 11.284766831118931,
"learning_rate": 6.156823606166461e-06,
"loss": 0.9575009942054749,
"step": 691
},
{
"epoch": 1.453781512605042,
"grad_norm": 10.180747973970707,
"learning_rate": 6.144927740189537e-06,
"loss": 1.2732771635055542,
"step": 692
},
{
"epoch": 1.4558823529411764,
"grad_norm": 18.346962905469923,
"learning_rate": 6.133025030821656e-06,
"loss": 1.0447793006896973,
"step": 693
},
{
"epoch": 1.4579831932773109,
"grad_norm": 9.72623535803224,
"learning_rate": 6.12111554920695e-06,
"loss": 2.069892406463623,
"step": 694
},
{
"epoch": 1.4600840336134453,
"grad_norm": 12.298140767363686,
"learning_rate": 6.1091993665300354e-06,
"loss": 1.4193060398101807,
"step": 695
},
{
"epoch": 1.46218487394958,
"grad_norm": 13.962463696814781,
"learning_rate": 6.0972765540155764e-06,
"loss": 1.8489269018173218,
"step": 696
},
{
"epoch": 1.4642857142857144,
"grad_norm": 28.27872673921732,
"learning_rate": 6.08534718292787e-06,
"loss": 1.8245782852172852,
"step": 697
},
{
"epoch": 1.4663865546218489,
"grad_norm": 11.704836274205533,
"learning_rate": 6.07341132457041e-06,
"loss": 1.8135966062545776,
"step": 698
},
{
"epoch": 1.4684873949579833,
"grad_norm": 10.329932140617693,
"learning_rate": 6.061469050285469e-06,
"loss": 1.2886388301849365,
"step": 699
},
{
"epoch": 1.4705882352941178,
"grad_norm": 7.515435335019253,
"learning_rate": 6.049520431453666e-06,
"loss": 1.8994669914245605,
"step": 700
},
{
"epoch": 1.4726890756302522,
"grad_norm": 8.559344765158919,
"learning_rate": 6.037565539493542e-06,
"loss": 1.830640196800232,
"step": 701
},
{
"epoch": 1.4747899159663866,
"grad_norm": 11.041841545301232,
"learning_rate": 6.025604445861137e-06,
"loss": 1.253919005393982,
"step": 702
},
{
"epoch": 1.476890756302521,
"grad_norm": 8.205733723815058,
"learning_rate": 6.013637222049554e-06,
"loss": 1.4687739610671997,
"step": 703
},
{
"epoch": 1.4789915966386555,
"grad_norm": 11.178455036225843,
"learning_rate": 6.0016639395885424e-06,
"loss": 0.8241528272628784,
"step": 704
},
{
"epoch": 1.48109243697479,
"grad_norm": 9.38917529173767,
"learning_rate": 5.98968467004406e-06,
"loss": 0.9833969473838806,
"step": 705
},
{
"epoch": 1.4831932773109244,
"grad_norm": 12.194704863353925,
"learning_rate": 5.977699485017855e-06,
"loss": 1.4603691101074219,
"step": 706
},
{
"epoch": 1.4852941176470589,
"grad_norm": 15.962403885996371,
"learning_rate": 5.965708456147028e-06,
"loss": 1.2566254138946533,
"step": 707
},
{
"epoch": 1.4873949579831933,
"grad_norm": 12.538339280369007,
"learning_rate": 5.953711655103615e-06,
"loss": 1.1779121160507202,
"step": 708
},
{
"epoch": 1.4894957983193278,
"grad_norm": 13.11480387819097,
"learning_rate": 5.941709153594146e-06,
"loss": 2.1752524375915527,
"step": 709
},
{
"epoch": 1.4915966386554622,
"grad_norm": 13.674164350381794,
"learning_rate": 5.92970102335923e-06,
"loss": 1.368391752243042,
"step": 710
},
{
"epoch": 1.4936974789915967,
"grad_norm": 14.25334329729132,
"learning_rate": 5.917687336173116e-06,
"loss": 1.4870836734771729,
"step": 711
},
{
"epoch": 1.495798319327731,
"grad_norm": 6.953756120767862,
"learning_rate": 5.905668163843269e-06,
"loss": 1.5822714567184448,
"step": 712
},
{
"epoch": 1.4978991596638656,
"grad_norm": 11.644576591550592,
"learning_rate": 5.893643578209939e-06,
"loss": 1.5158865451812744,
"step": 713
},
{
"epoch": 1.5,
"grad_norm": 7.891690515863711,
"learning_rate": 5.881613651145732e-06,
"loss": 1.0833930969238281,
"step": 714
},
{
"epoch": 1.5021008403361344,
"grad_norm": 10.374951659973064,
"learning_rate": 5.8695784545551815e-06,
"loss": 1.2957074642181396,
"step": 715
},
{
"epoch": 1.504201680672269,
"grad_norm": 11.404797448034625,
"learning_rate": 5.8575380603743155e-06,
"loss": 1.541155457496643,
"step": 716
},
{
"epoch": 1.5063025210084033,
"grad_norm": 9.289021596431404,
"learning_rate": 5.8454925405702326e-06,
"loss": 1.7509238719940186,
"step": 717
},
{
"epoch": 1.5084033613445378,
"grad_norm": 15.014764167830913,
"learning_rate": 5.833441967140662e-06,
"loss": 1.8062071800231934,
"step": 718
},
{
"epoch": 1.5105042016806722,
"grad_norm": 11.714801378063543,
"learning_rate": 5.821386412113546e-06,
"loss": 1.4850780963897705,
"step": 719
},
{
"epoch": 1.5126050420168067,
"grad_norm": 12.362413690497235,
"learning_rate": 5.809325947546596e-06,
"loss": 1.1842257976531982,
"step": 720
},
{
"epoch": 1.5147058823529411,
"grad_norm": 18.21069319599996,
"learning_rate": 5.797260645526873e-06,
"loss": 1.396120548248291,
"step": 721
},
{
"epoch": 1.5168067226890756,
"grad_norm": 9.57576869626496,
"learning_rate": 5.785190578170351e-06,
"loss": 1.2990989685058594,
"step": 722
},
{
"epoch": 1.51890756302521,
"grad_norm": 9.618803676236782,
"learning_rate": 5.773115817621487e-06,
"loss": 1.8467388153076172,
"step": 723
},
{
"epoch": 1.5210084033613445,
"grad_norm": 11.794758813101549,
"learning_rate": 5.761036436052788e-06,
"loss": 1.6907732486724854,
"step": 724
},
{
"epoch": 1.523109243697479,
"grad_norm": 10.45395101852907,
"learning_rate": 5.748952505664385e-06,
"loss": 1.265946388244629,
"step": 725
},
{
"epoch": 1.5252100840336134,
"grad_norm": 39.03800117968252,
"learning_rate": 5.736864098683595e-06,
"loss": 1.2473053932189941,
"step": 726
},
{
"epoch": 1.5273109243697478,
"grad_norm": 12.736788173749753,
"learning_rate": 5.724771287364492e-06,
"loss": 0.8382349014282227,
"step": 727
},
{
"epoch": 1.5294117647058822,
"grad_norm": 9.695123568985625,
"learning_rate": 5.712674143987478e-06,
"loss": 0.6312862038612366,
"step": 728
},
{
"epoch": 1.5315126050420167,
"grad_norm": 19.807295601128907,
"learning_rate": 5.700572740858847e-06,
"loss": 2.154848575592041,
"step": 729
},
{
"epoch": 1.5336134453781511,
"grad_norm": 8.348583198098744,
"learning_rate": 5.688467150310353e-06,
"loss": 2.034533739089966,
"step": 730
},
{
"epoch": 1.5357142857142856,
"grad_norm": 17.36525324904992,
"learning_rate": 5.67635744469878e-06,
"loss": 1.1331748962402344,
"step": 731
},
{
"epoch": 1.53781512605042,
"grad_norm": 16.447167153207392,
"learning_rate": 5.664243696405509e-06,
"loss": 2.139069080352783,
"step": 732
},
{
"epoch": 1.5399159663865545,
"grad_norm": 8.422362967066016,
"learning_rate": 5.652125977836083e-06,
"loss": 1.5174198150634766,
"step": 733
},
{
"epoch": 1.542016806722689,
"grad_norm": 11.885541322376927,
"learning_rate": 5.640004361419776e-06,
"loss": 1.4445990324020386,
"step": 734
},
{
"epoch": 1.5441176470588234,
"grad_norm": 13.004468722411309,
"learning_rate": 5.627878919609162e-06,
"loss": 1.3474795818328857,
"step": 735
},
{
"epoch": 1.5462184873949578,
"grad_norm": 8.116038341885554,
"learning_rate": 5.615749724879677e-06,
"loss": 1.4871881008148193,
"step": 736
},
{
"epoch": 1.5483193277310925,
"grad_norm": 9.971232266174457,
"learning_rate": 5.603616849729191e-06,
"loss": 1.308741569519043,
"step": 737
},
{
"epoch": 1.550420168067227,
"grad_norm": 9.853281920667216,
"learning_rate": 5.591480366677571e-06,
"loss": 1.712050199508667,
"step": 738
},
{
"epoch": 1.5525210084033614,
"grad_norm": 10.993711611399497,
"learning_rate": 5.579340348266251e-06,
"loss": 1.5636662244796753,
"step": 739
},
{
"epoch": 1.5546218487394958,
"grad_norm": 9.673522828347147,
"learning_rate": 5.5671968670577935e-06,
"loss": 2.132948160171509,
"step": 740
},
{
"epoch": 1.5567226890756303,
"grad_norm": 8.375544989800046,
"learning_rate": 5.55504999563546e-06,
"loss": 1.3193635940551758,
"step": 741
},
{
"epoch": 1.5588235294117647,
"grad_norm": 17.452697703036844,
"learning_rate": 5.542899806602776e-06,
"loss": 2.288175582885742,
"step": 742
},
{
"epoch": 1.5609243697478992,
"grad_norm": 9.46737262414516,
"learning_rate": 5.530746372583097e-06,
"loss": 0.9925522804260254,
"step": 743
},
{
"epoch": 1.5630252100840336,
"grad_norm": 11.56785515084437,
"learning_rate": 5.518589766219173e-06,
"loss": 1.0975109338760376,
"step": 744
},
{
"epoch": 1.565126050420168,
"grad_norm": 7.691252772883318,
"learning_rate": 5.506430060172714e-06,
"loss": 1.5021933317184448,
"step": 745
},
{
"epoch": 1.5672268907563025,
"grad_norm": 18.209072269714934,
"learning_rate": 5.494267327123965e-06,
"loss": 1.3946982622146606,
"step": 746
},
{
"epoch": 1.569327731092437,
"grad_norm": 8.665816006385743,
"learning_rate": 5.482101639771255e-06,
"loss": 1.1381559371948242,
"step": 747
},
{
"epoch": 1.5714285714285714,
"grad_norm": 8.44106615949262,
"learning_rate": 5.469933070830574e-06,
"loss": 1.6340922117233276,
"step": 748
},
{
"epoch": 1.5735294117647058,
"grad_norm": 15.525808686682517,
"learning_rate": 5.457761693035139e-06,
"loss": 1.3076329231262207,
"step": 749
},
{
"epoch": 1.5756302521008403,
"grad_norm": 17.23937998928761,
"learning_rate": 5.44558757913495e-06,
"loss": 1.4544854164123535,
"step": 750
},
{
"epoch": 1.5777310924369747,
"grad_norm": 12.32276892072033,
"learning_rate": 5.433410801896366e-06,
"loss": 1.254534363746643,
"step": 751
},
{
"epoch": 1.5798319327731094,
"grad_norm": 15.375685102500293,
"learning_rate": 5.4212314341016645e-06,
"loss": 1.6915278434753418,
"step": 752
},
{
"epoch": 1.5819327731092439,
"grad_norm": 11.585921032972996,
"learning_rate": 5.409049548548604e-06,
"loss": 1.9941121339797974,
"step": 753
},
{
"epoch": 1.5840336134453783,
"grad_norm": 20.541716513837855,
"learning_rate": 5.396865218049995e-06,
"loss": 1.8611130714416504,
"step": 754
},
{
"epoch": 1.5861344537815127,
"grad_norm": 8.615776619323707,
"learning_rate": 5.38467851543326e-06,
"loss": 1.5740795135498047,
"step": 755
},
{
"epoch": 1.5882352941176472,
"grad_norm": 11.309502746225869,
"learning_rate": 5.3724895135400015e-06,
"loss": 2.713351249694824,
"step": 756
},
{
"epoch": 1.5903361344537816,
"grad_norm": 10.253542895381939,
"learning_rate": 5.360298285225564e-06,
"loss": 1.3163414001464844,
"step": 757
},
{
"epoch": 1.592436974789916,
"grad_norm": 9.938347580214066,
"learning_rate": 5.3481049033586e-06,
"loss": 1.2127149105072021,
"step": 758
},
{
"epoch": 1.5945378151260505,
"grad_norm": 8.342953472089572,
"learning_rate": 5.335909440820635e-06,
"loss": 1.7897974252700806,
"step": 759
},
{
"epoch": 1.596638655462185,
"grad_norm": 15.069858504573101,
"learning_rate": 5.323711970505627e-06,
"loss": 1.1387288570404053,
"step": 760
},
{
"epoch": 1.5987394957983194,
"grad_norm": 11.085133444710992,
"learning_rate": 5.311512565319542e-06,
"loss": 1.3364837169647217,
"step": 761
},
{
"epoch": 1.6008403361344539,
"grad_norm": 32.930598343392575,
"learning_rate": 5.299311298179904e-06,
"loss": 1.5517654418945312,
"step": 762
},
{
"epoch": 1.6029411764705883,
"grad_norm": 8.444149682468657,
"learning_rate": 5.287108242015371e-06,
"loss": 1.3167724609375,
"step": 763
},
{
"epoch": 1.6050420168067228,
"grad_norm": 11.755149351980176,
"learning_rate": 5.27490346976529e-06,
"loss": 0.9891781210899353,
"step": 764
},
{
"epoch": 1.6071428571428572,
"grad_norm": 7.559294905407309,
"learning_rate": 5.2626970543792685e-06,
"loss": 1.4272327423095703,
"step": 765
},
{
"epoch": 1.6092436974789917,
"grad_norm": 15.95519407006552,
"learning_rate": 5.250489068816734e-06,
"loss": 1.5374692678451538,
"step": 766
},
{
"epoch": 1.611344537815126,
"grad_norm": 11.13469591726469,
"learning_rate": 5.238279586046499e-06,
"loss": 2.130378484725952,
"step": 767
},
{
"epoch": 1.6134453781512605,
"grad_norm": 7.655252597687492,
"learning_rate": 5.226068679046327e-06,
"loss": 1.0989816188812256,
"step": 768
},
{
"epoch": 1.615546218487395,
"grad_norm": 13.788475789937872,
"learning_rate": 5.21385642080249e-06,
"loss": 1.4945666790008545,
"step": 769
},
{
"epoch": 1.6176470588235294,
"grad_norm": 10.474320077857882,
"learning_rate": 5.201642884309341e-06,
"loss": 2.0057296752929688,
"step": 770
},
{
"epoch": 1.6197478991596639,
"grad_norm": 15.324254986466453,
"learning_rate": 5.189428142568872e-06,
"loss": 1.1791839599609375,
"step": 771
},
{
"epoch": 1.6218487394957983,
"grad_norm": 9.947837053439045,
"learning_rate": 5.177212268590277e-06,
"loss": 1.836449384689331,
"step": 772
},
{
"epoch": 1.6239495798319328,
"grad_norm": 13.279110268267903,
"learning_rate": 5.16499533538952e-06,
"loss": 1.711057424545288,
"step": 773
},
{
"epoch": 1.6260504201680672,
"grad_norm": 8.331904700424623,
"learning_rate": 5.152777415988894e-06,
"loss": 1.5274529457092285,
"step": 774
},
{
"epoch": 1.6281512605042017,
"grad_norm": 8.55624828912363,
"learning_rate": 5.140558583416591e-06,
"loss": 1.687756061553955,
"step": 775
},
{
"epoch": 1.6302521008403361,
"grad_norm": 13.622302699665928,
"learning_rate": 5.128338910706254e-06,
"loss": 1.857285976409912,
"step": 776
},
{
"epoch": 1.6323529411764706,
"grad_norm": 10.381639920961357,
"learning_rate": 5.1161184708965525e-06,
"loss": 2.2893779277801514,
"step": 777
},
{
"epoch": 1.634453781512605,
"grad_norm": 13.136015822018155,
"learning_rate": 5.103897337030742e-06,
"loss": 1.299177646636963,
"step": 778
},
{
"epoch": 1.6365546218487395,
"grad_norm": 12.717300364122215,
"learning_rate": 5.091675582156224e-06,
"loss": 1.156067132949829,
"step": 779
},
{
"epoch": 1.638655462184874,
"grad_norm": 12.064277910399447,
"learning_rate": 5.07945327932411e-06,
"loss": 1.6888867616653442,
"step": 780
},
{
"epoch": 1.6407563025210083,
"grad_norm": 18.58505757321674,
"learning_rate": 5.067230501588792e-06,
"loss": 2.480485677719116,
"step": 781
},
{
"epoch": 1.6428571428571428,
"grad_norm": 10.797297449293279,
"learning_rate": 5.055007322007497e-06,
"loss": 1.1827846765518188,
"step": 782
},
{
"epoch": 1.6449579831932772,
"grad_norm": 8.451797960661173,
"learning_rate": 5.0427838136398545e-06,
"loss": 0.9974920749664307,
"step": 783
},
{
"epoch": 1.6470588235294117,
"grad_norm": 9.35430238275204,
"learning_rate": 5.0305600495474586e-06,
"loss": 1.3341560363769531,
"step": 784
},
{
"epoch": 1.6491596638655461,
"grad_norm": 12.853381185509344,
"learning_rate": 5.018336102793433e-06,
"loss": 1.7801398038864136,
"step": 785
},
{
"epoch": 1.6512605042016806,
"grad_norm": 9.421435792652487,
"learning_rate": 5.006112046441993e-06,
"loss": 1.5409959554672241,
"step": 786
},
{
"epoch": 1.653361344537815,
"grad_norm": 17.569411033601973,
"learning_rate": 4.993887953558008e-06,
"loss": 1.7089118957519531,
"step": 787
},
{
"epoch": 1.6554621848739495,
"grad_norm": 7.577368880962854,
"learning_rate": 4.981663897206568e-06,
"loss": 1.5989807844161987,
"step": 788
},
{
"epoch": 1.657563025210084,
"grad_norm": 8.077886545894733,
"learning_rate": 4.969439950452543e-06,
"loss": 1.4693567752838135,
"step": 789
},
{
"epoch": 1.6596638655462184,
"grad_norm": 9.785283960809164,
"learning_rate": 4.957216186360147e-06,
"loss": 1.7319889068603516,
"step": 790
},
{
"epoch": 1.6617647058823528,
"grad_norm": 9.333867287657988,
"learning_rate": 4.944992677992505e-06,
"loss": 1.38368558883667,
"step": 791
},
{
"epoch": 1.6638655462184873,
"grad_norm": 12.124257442308217,
"learning_rate": 4.932769498411209e-06,
"loss": 1.9276368618011475,
"step": 792
},
{
"epoch": 1.6659663865546217,
"grad_norm": 11.917503250417354,
"learning_rate": 4.9205467206758914e-06,
"loss": 1.5189965963363647,
"step": 793
},
{
"epoch": 1.6680672268907561,
"grad_norm": 10.697092148525952,
"learning_rate": 4.908324417843779e-06,
"loss": 1.8080897331237793,
"step": 794
},
{
"epoch": 1.6701680672268906,
"grad_norm": 13.667659355410398,
"learning_rate": 4.896102662969259e-06,
"loss": 1.3283686637878418,
"step": 795
},
{
"epoch": 1.6722689075630253,
"grad_norm": 10.250252779637346,
"learning_rate": 4.883881529103448e-06,
"loss": 1.7254778146743774,
"step": 796
},
{
"epoch": 1.6743697478991597,
"grad_norm": 23.09605024251299,
"learning_rate": 4.8716610892937486e-06,
"loss": 1.882294774055481,
"step": 797
},
{
"epoch": 1.6764705882352942,
"grad_norm": 9.75260822435192,
"learning_rate": 4.859441416583412e-06,
"loss": 1.955444574356079,
"step": 798
},
{
"epoch": 1.6785714285714286,
"grad_norm": 9.214097989478232,
"learning_rate": 4.847222584011107e-06,
"loss": 1.3442355394363403,
"step": 799
},
{
"epoch": 1.680672268907563,
"grad_norm": 7.0621430973608295,
"learning_rate": 4.8350046646104815e-06,
"loss": 1.3532618284225464,
"step": 800
},
{
"epoch": 1.6827731092436975,
"grad_norm": 9.43032823639271,
"learning_rate": 4.8227877314097245e-06,
"loss": 1.8527226448059082,
"step": 801
},
{
"epoch": 1.684873949579832,
"grad_norm": 42.238546118436666,
"learning_rate": 4.81057185743113e-06,
"loss": 2.1312098503112793,
"step": 802
},
{
"epoch": 1.6869747899159664,
"grad_norm": 19.238236915867475,
"learning_rate": 4.798357115690661e-06,
"loss": 1.3131635189056396,
"step": 803
},
{
"epoch": 1.6890756302521008,
"grad_norm": 11.839412971623531,
"learning_rate": 4.7861435791975124e-06,
"loss": 1.057523488998413,
"step": 804
},
{
"epoch": 1.6911764705882353,
"grad_norm": 16.330203992434377,
"learning_rate": 4.7739313209536755e-06,
"loss": 1.510682225227356,
"step": 805
},
{
"epoch": 1.6932773109243697,
"grad_norm": 9.476255130895225,
"learning_rate": 4.761720413953503e-06,
"loss": 2.0877933502197266,
"step": 806
},
{
"epoch": 1.6953781512605042,
"grad_norm": 18.00296013944893,
"learning_rate": 4.7495109311832665e-06,
"loss": 0.9936963319778442,
"step": 807
},
{
"epoch": 1.6974789915966386,
"grad_norm": 12.213588808955969,
"learning_rate": 4.737302945620732e-06,
"loss": 1.4148988723754883,
"step": 808
},
{
"epoch": 1.699579831932773,
"grad_norm": 16.652878791868638,
"learning_rate": 4.72509653023471e-06,
"loss": 1.5457355976104736,
"step": 809
},
{
"epoch": 1.7016806722689075,
"grad_norm": 12.137844142300786,
"learning_rate": 4.712891757984629e-06,
"loss": 1.1069682836532593,
"step": 810
},
{
"epoch": 1.7037815126050422,
"grad_norm": 10.383790506526612,
"learning_rate": 4.700688701820096e-06,
"loss": 2.270923376083374,
"step": 811
},
{
"epoch": 1.7058823529411766,
"grad_norm": 9.870143573450948,
"learning_rate": 4.688487434680459e-06,
"loss": 2.1212430000305176,
"step": 812
},
{
"epoch": 1.707983193277311,
"grad_norm": 7.378583815892385,
"learning_rate": 4.6762880294943734e-06,
"loss": 1.340724229812622,
"step": 813
},
{
"epoch": 1.7100840336134455,
"grad_norm": 10.60633944294548,
"learning_rate": 4.664090559179367e-06,
"loss": 1.1250860691070557,
"step": 814
},
{
"epoch": 1.71218487394958,
"grad_norm": 10.395256226604776,
"learning_rate": 4.651895096641402e-06,
"loss": 1.3906278610229492,
"step": 815
},
{
"epoch": 1.7142857142857144,
"grad_norm": 14.563103162972197,
"learning_rate": 4.639701714774439e-06,
"loss": 1.5373984575271606,
"step": 816
},
{
"epoch": 1.7163865546218489,
"grad_norm": 11.845585553697056,
"learning_rate": 4.627510486459999e-06,
"loss": 1.1511554718017578,
"step": 817
},
{
"epoch": 1.7184873949579833,
"grad_norm": 12.016172591706953,
"learning_rate": 4.615321484566741e-06,
"loss": 1.0511482954025269,
"step": 818
},
{
"epoch": 1.7205882352941178,
"grad_norm": 24.328922920047308,
"learning_rate": 4.603134781950007e-06,
"loss": 1.6539651155471802,
"step": 819
},
{
"epoch": 1.7226890756302522,
"grad_norm": 10.354684370723726,
"learning_rate": 4.590950451451397e-06,
"loss": 1.7340842485427856,
"step": 820
},
{
"epoch": 1.7247899159663866,
"grad_norm": 9.128876197606015,
"learning_rate": 4.578768565898337e-06,
"loss": 1.9771497249603271,
"step": 821
},
{
"epoch": 1.726890756302521,
"grad_norm": 13.12308018791223,
"learning_rate": 4.566589198103635e-06,
"loss": 1.6702903509140015,
"step": 822
},
{
"epoch": 1.7289915966386555,
"grad_norm": 14.032448855066312,
"learning_rate": 4.554412420865052e-06,
"loss": 1.2594914436340332,
"step": 823
},
{
"epoch": 1.73109243697479,
"grad_norm": 11.631886990165667,
"learning_rate": 4.542238306964863e-06,
"loss": 1.2319787740707397,
"step": 824
},
{
"epoch": 1.7331932773109244,
"grad_norm": 12.012430999144566,
"learning_rate": 4.530066929169427e-06,
"loss": 1.631975769996643,
"step": 825
},
{
"epoch": 1.7352941176470589,
"grad_norm": 14.157695450219515,
"learning_rate": 4.5178983602287476e-06,
"loss": 1.4831879138946533,
"step": 826
},
{
"epoch": 1.7373949579831933,
"grad_norm": 12.493623072525319,
"learning_rate": 4.505732672876037e-06,
"loss": 1.225109338760376,
"step": 827
},
{
"epoch": 1.7394957983193278,
"grad_norm": 30.459517703838213,
"learning_rate": 4.493569939827288e-06,
"loss": 1.6191500425338745,
"step": 828
},
{
"epoch": 1.7415966386554622,
"grad_norm": 10.531191971687601,
"learning_rate": 4.48141023378083e-06,
"loss": 1.562519907951355,
"step": 829
},
{
"epoch": 1.7436974789915967,
"grad_norm": 20.955204277046732,
"learning_rate": 4.4692536274169055e-06,
"loss": 1.5889461040496826,
"step": 830
},
{
"epoch": 1.745798319327731,
"grad_norm": 16.275459231962,
"learning_rate": 4.457100193397226e-06,
"loss": 0.8582566976547241,
"step": 831
},
{
"epoch": 1.7478991596638656,
"grad_norm": 8.59181477774897,
"learning_rate": 4.444950004364542e-06,
"loss": 1.2409437894821167,
"step": 832
},
{
"epoch": 1.75,
"grad_norm": 11.280859647806443,
"learning_rate": 4.432803132942208e-06,
"loss": 1.2449380159378052,
"step": 833
},
{
"epoch": 1.7521008403361344,
"grad_norm": 13.088404793322951,
"learning_rate": 4.420659651733751e-06,
"loss": 1.2676522731781006,
"step": 834
},
{
"epoch": 1.754201680672269,
"grad_norm": 10.60776961423833,
"learning_rate": 4.40851963332243e-06,
"loss": 1.5941420793533325,
"step": 835
},
{
"epoch": 1.7563025210084033,
"grad_norm": 17.131655688023486,
"learning_rate": 4.396383150270811e-06,
"loss": 1.1451562643051147,
"step": 836
},
{
"epoch": 1.7584033613445378,
"grad_norm": 16.683478300158875,
"learning_rate": 4.384250275120325e-06,
"loss": 1.4305951595306396,
"step": 837
},
{
"epoch": 1.7605042016806722,
"grad_norm": 23.521629142849456,
"learning_rate": 4.372121080390841e-06,
"loss": 1.9824583530426025,
"step": 838
},
{
"epoch": 1.7626050420168067,
"grad_norm": 12.354999677804352,
"learning_rate": 4.359995638580226e-06,
"loss": 1.2548645734786987,
"step": 839
},
{
"epoch": 1.7647058823529411,
"grad_norm": 11.031648245105625,
"learning_rate": 4.34787402216392e-06,
"loss": 1.8208611011505127,
"step": 840
},
{
"epoch": 1.7668067226890756,
"grad_norm": 12.87350659067892,
"learning_rate": 4.335756303594493e-06,
"loss": 1.3555166721343994,
"step": 841
},
{
"epoch": 1.76890756302521,
"grad_norm": 11.399790863860508,
"learning_rate": 4.323642555301222e-06,
"loss": 0.9843342900276184,
"step": 842
},
{
"epoch": 1.7710084033613445,
"grad_norm": 9.734229808167726,
"learning_rate": 4.311532849689649e-06,
"loss": 1.5203514099121094,
"step": 843
},
{
"epoch": 1.773109243697479,
"grad_norm": 12.62619531995832,
"learning_rate": 4.299427259141155e-06,
"loss": 2.226682662963867,
"step": 844
},
{
"epoch": 1.7752100840336134,
"grad_norm": 9.352423304681453,
"learning_rate": 4.2873258560125244e-06,
"loss": 1.4532074928283691,
"step": 845
},
{
"epoch": 1.7773109243697478,
"grad_norm": 13.614475982132188,
"learning_rate": 4.275228712635511e-06,
"loss": 0.939800500869751,
"step": 846
},
{
"epoch": 1.7794117647058822,
"grad_norm": 7.898241318034454,
"learning_rate": 4.263135901316406e-06,
"loss": 0.9362924098968506,
"step": 847
},
{
"epoch": 1.7815126050420167,
"grad_norm": 12.878302238594825,
"learning_rate": 4.251047494335616e-06,
"loss": 1.4257563352584839,
"step": 848
},
{
"epoch": 1.7836134453781511,
"grad_norm": 11.607398907584903,
"learning_rate": 4.238963563947212e-06,
"loss": 1.393942952156067,
"step": 849
},
{
"epoch": 1.7857142857142856,
"grad_norm": 9.414377919796664,
"learning_rate": 4.226884182378513e-06,
"loss": 1.588603138923645,
"step": 850
},
{
"epoch": 1.78781512605042,
"grad_norm": 9.24807113557597,
"learning_rate": 4.2148094218296485e-06,
"loss": 1.198427677154541,
"step": 851
},
{
"epoch": 1.7899159663865545,
"grad_norm": 9.93269092312877,
"learning_rate": 4.202739354473127e-06,
"loss": 1.0912418365478516,
"step": 852
},
{
"epoch": 1.792016806722689,
"grad_norm": 9.934910320433355,
"learning_rate": 4.190674052453405e-06,
"loss": 1.8104877471923828,
"step": 853
},
{
"epoch": 1.7941176470588234,
"grad_norm": 9.124793784485341,
"learning_rate": 4.178613587886455e-06,
"loss": 1.337807297706604,
"step": 854
},
{
"epoch": 1.7962184873949578,
"grad_norm": 12.887138564407095,
"learning_rate": 4.166558032859339e-06,
"loss": 1.0441133975982666,
"step": 855
},
{
"epoch": 1.7983193277310925,
"grad_norm": 13.183784075535518,
"learning_rate": 4.154507459429769e-06,
"loss": 1.8002381324768066,
"step": 856
},
{
"epoch": 1.800420168067227,
"grad_norm": 17.636030537350624,
"learning_rate": 4.142461939625685e-06,
"loss": 1.7534747123718262,
"step": 857
},
{
"epoch": 1.8025210084033614,
"grad_norm": 13.439089280649503,
"learning_rate": 4.13042154544482e-06,
"loss": 2.5967888832092285,
"step": 858
},
{
"epoch": 1.8046218487394958,
"grad_norm": 11.531721271683752,
"learning_rate": 4.1183863488542686e-06,
"loss": 0.9714012145996094,
"step": 859
},
{
"epoch": 1.8067226890756303,
"grad_norm": 39.93239963926744,
"learning_rate": 4.106356421790062e-06,
"loss": 2.0358502864837646,
"step": 860
},
{
"epoch": 1.8088235294117647,
"grad_norm": 13.143824988546921,
"learning_rate": 4.094331836156732e-06,
"loss": 1.2078362703323364,
"step": 861
},
{
"epoch": 1.8109243697478992,
"grad_norm": 17.216726387192157,
"learning_rate": 4.082312663826886e-06,
"loss": 1.3551952838897705,
"step": 862
},
{
"epoch": 1.8130252100840336,
"grad_norm": 9.960021372460178,
"learning_rate": 4.070298976640772e-06,
"loss": 1.7473708391189575,
"step": 863
},
{
"epoch": 1.815126050420168,
"grad_norm": 8.856051876807816,
"learning_rate": 4.058290846405856e-06,
"loss": 1.1888244152069092,
"step": 864
},
{
"epoch": 1.8172268907563025,
"grad_norm": 16.353016649836196,
"learning_rate": 4.046288344896388e-06,
"loss": 1.7867594957351685,
"step": 865
},
{
"epoch": 1.819327731092437,
"grad_norm": 12.062167246659023,
"learning_rate": 4.034291543852973e-06,
"loss": 1.2903845310211182,
"step": 866
},
{
"epoch": 1.8214285714285714,
"grad_norm": 12.560697459985716,
"learning_rate": 4.022300514982146e-06,
"loss": 1.4051203727722168,
"step": 867
},
{
"epoch": 1.8235294117647058,
"grad_norm": 7.497031379547495,
"learning_rate": 4.010315329955941e-06,
"loss": 1.7378381490707397,
"step": 868
},
{
"epoch": 1.8256302521008403,
"grad_norm": 13.858534174862163,
"learning_rate": 3.998336060411459e-06,
"loss": 1.1623207330703735,
"step": 869
},
{
"epoch": 1.8277310924369747,
"grad_norm": 16.260048868681064,
"learning_rate": 3.986362777950448e-06,
"loss": 1.9922326803207397,
"step": 870
},
{
"epoch": 1.8298319327731094,
"grad_norm": 10.526911105706054,
"learning_rate": 3.9743955541388645e-06,
"loss": 1.8183355331420898,
"step": 871
},
{
"epoch": 1.8319327731092439,
"grad_norm": 30.127353686918507,
"learning_rate": 3.962434460506459e-06,
"loss": 1.20865797996521,
"step": 872
},
{
"epoch": 1.8340336134453783,
"grad_norm": 9.693811930511602,
"learning_rate": 3.950479568546336e-06,
"loss": 1.2787063121795654,
"step": 873
},
{
"epoch": 1.8361344537815127,
"grad_norm": 18.05960154864148,
"learning_rate": 3.938530949714533e-06,
"loss": 2.1469886302948,
"step": 874
},
{
"epoch": 1.8382352941176472,
"grad_norm": 18.30460348258425,
"learning_rate": 3.926588675429591e-06,
"loss": 2.5014071464538574,
"step": 875
},
{
"epoch": 1.8403361344537816,
"grad_norm": 9.407437407393076,
"learning_rate": 3.914652817072132e-06,
"loss": 1.2857444286346436,
"step": 876
},
{
"epoch": 1.842436974789916,
"grad_norm": 16.37890925961833,
"learning_rate": 3.902723445984425e-06,
"loss": 0.8846265077590942,
"step": 877
},
{
"epoch": 1.8445378151260505,
"grad_norm": 23.642202819656244,
"learning_rate": 3.890800633469968e-06,
"loss": 3.6164169311523438,
"step": 878
},
{
"epoch": 1.846638655462185,
"grad_norm": 13.713411145435602,
"learning_rate": 3.878884450793053e-06,
"loss": 1.778512954711914,
"step": 879
},
{
"epoch": 1.8487394957983194,
"grad_norm": 14.191930149580415,
"learning_rate": 3.866974969178348e-06,
"loss": 1.2984943389892578,
"step": 880
},
{
"epoch": 1.8508403361344539,
"grad_norm": 11.358190736464952,
"learning_rate": 3.855072259810465e-06,
"loss": 1.233088493347168,
"step": 881
},
{
"epoch": 1.8529411764705883,
"grad_norm": 12.19590848916144,
"learning_rate": 3.8431763938335415e-06,
"loss": 1.0973716974258423,
"step": 882
},
{
"epoch": 1.8550420168067228,
"grad_norm": 8.299847504153597,
"learning_rate": 3.831287442350806e-06,
"loss": 1.9479036331176758,
"step": 883
},
{
"epoch": 1.8571428571428572,
"grad_norm": 25.32246065502413,
"learning_rate": 3.819405476424164e-06,
"loss": 2.3243212699890137,
"step": 884
},
{
"epoch": 1.8592436974789917,
"grad_norm": 16.82297657925073,
"learning_rate": 3.8075305670737605e-06,
"loss": 2.167454242706299,
"step": 885
},
{
"epoch": 1.861344537815126,
"grad_norm": 21.753642919267325,
"learning_rate": 3.795662785277568e-06,
"loss": 1.3896931409835815,
"step": 886
},
{
"epoch": 1.8634453781512605,
"grad_norm": 11.74483761622084,
"learning_rate": 3.783802201970953e-06,
"loss": 1.7062684297561646,
"step": 887
},
{
"epoch": 1.865546218487395,
"grad_norm": 10.437798829559927,
"learning_rate": 3.7719488880462596e-06,
"loss": 1.996096134185791,
"step": 888
},
{
"epoch": 1.8676470588235294,
"grad_norm": 12.757708819402238,
"learning_rate": 3.7601029143523767e-06,
"loss": 0.9396399259567261,
"step": 889
},
{
"epoch": 1.8697478991596639,
"grad_norm": 9.851656503119592,
"learning_rate": 3.748264351694324e-06,
"loss": 1.384545922279358,
"step": 890
},
{
"epoch": 1.8718487394957983,
"grad_norm": 9.806551408884758,
"learning_rate": 3.7364332708328232e-06,
"loss": 1.298504114151001,
"step": 891
},
{
"epoch": 1.8739495798319328,
"grad_norm": 10.623737292924032,
"learning_rate": 3.7246097424838746e-06,
"loss": 1.395151138305664,
"step": 892
},
{
"epoch": 1.8760504201680672,
"grad_norm": 10.061693679195699,
"learning_rate": 3.712793837318338e-06,
"loss": 0.9280238747596741,
"step": 893
},
{
"epoch": 1.8781512605042017,
"grad_norm": 15.132177107654861,
"learning_rate": 3.7009856259615074e-06,
"loss": 1.3795464038848877,
"step": 894
},
{
"epoch": 1.8802521008403361,
"grad_norm": 6.163863926370169,
"learning_rate": 3.689185178992689e-06,
"loss": 1.3113572597503662,
"step": 895
},
{
"epoch": 1.8823529411764706,
"grad_norm": 9.130793358710777,
"learning_rate": 3.677392566944783e-06,
"loss": 1.580859661102295,
"step": 896
},
{
"epoch": 1.884453781512605,
"grad_norm": 9.259911678328434,
"learning_rate": 3.665607860303857e-06,
"loss": 1.5074641704559326,
"step": 897
},
{
"epoch": 1.8865546218487395,
"grad_norm": 11.62240320198324,
"learning_rate": 3.653831129508727e-06,
"loss": 1.438436508178711,
"step": 898
},
{
"epoch": 1.888655462184874,
"grad_norm": 21.40632893627489,
"learning_rate": 3.642062444950537e-06,
"loss": 2.4116339683532715,
"step": 899
},
{
"epoch": 1.8907563025210083,
"grad_norm": 11.75786970161833,
"learning_rate": 3.630301876972337e-06,
"loss": 1.5296099185943604,
"step": 900
},
{
"epoch": 1.8928571428571428,
"grad_norm": 13.176259265738059,
"learning_rate": 3.618549495868662e-06,
"loss": 1.7645788192749023,
"step": 901
},
{
"epoch": 1.8949579831932772,
"grad_norm": 8.946035833702966,
"learning_rate": 3.606805371885117e-06,
"loss": 1.528565526008606,
"step": 902
},
{
"epoch": 1.8970588235294117,
"grad_norm": 9.910748332846175,
"learning_rate": 3.5950695752179487e-06,
"loss": 1.5352060794830322,
"step": 903
},
{
"epoch": 1.8991596638655461,
"grad_norm": 8.872120526454859,
"learning_rate": 3.5833421760136323e-06,
"loss": 1.2181806564331055,
"step": 904
},
{
"epoch": 1.9012605042016806,
"grad_norm": 8.52566041071061,
"learning_rate": 3.5716232443684486e-06,
"loss": 1.2715753316879272,
"step": 905
},
{
"epoch": 1.903361344537815,
"grad_norm": 21.774545687178733,
"learning_rate": 3.559912850328069e-06,
"loss": 1.4792617559432983,
"step": 906
},
{
"epoch": 1.9054621848739495,
"grad_norm": 19.592792336690916,
"learning_rate": 3.5482110638871325e-06,
"loss": 1.072256326675415,
"step": 907
},
{
"epoch": 1.907563025210084,
"grad_norm": 13.065810555435853,
"learning_rate": 3.5365179549888306e-06,
"loss": 1.5988600254058838,
"step": 908
},
{
"epoch": 1.9096638655462184,
"grad_norm": 21.891373834171244,
"learning_rate": 3.524833593524487e-06,
"loss": 2.473078489303589,
"step": 909
},
{
"epoch": 1.9117647058823528,
"grad_norm": 10.756654205561983,
"learning_rate": 3.513158049333141e-06,
"loss": 1.8987966775894165,
"step": 910
},
{
"epoch": 1.9138655462184873,
"grad_norm": 11.56568429501611,
"learning_rate": 3.50149139220113e-06,
"loss": 1.6164718866348267,
"step": 911
},
{
"epoch": 1.9159663865546217,
"grad_norm": 11.650144499924947,
"learning_rate": 3.4898336918616726e-06,
"loss": 1.3376764059066772,
"step": 912
},
{
"epoch": 1.9180672268907561,
"grad_norm": 10.167435576576402,
"learning_rate": 3.47818501799445e-06,
"loss": 1.7546143531799316,
"step": 913
},
{
"epoch": 1.9201680672268906,
"grad_norm": 16.044159587879186,
"learning_rate": 3.4665454402251937e-06,
"loss": 1.4916424751281738,
"step": 914
},
{
"epoch": 1.9222689075630253,
"grad_norm": 9.485222637010883,
"learning_rate": 3.4549150281252635e-06,
"loss": 1.6852712631225586,
"step": 915
},
{
"epoch": 1.9243697478991597,
"grad_norm": 17.238783509738973,
"learning_rate": 3.443293851211237e-06,
"loss": 1.7150108814239502,
"step": 916
},
{
"epoch": 1.9264705882352942,
"grad_norm": 13.404050851317407,
"learning_rate": 3.4316819789444893e-06,
"loss": 1.7211201190948486,
"step": 917
},
{
"epoch": 1.9285714285714286,
"grad_norm": 12.556393984422412,
"learning_rate": 3.4200794807307834e-06,
"loss": 0.7268713712692261,
"step": 918
},
{
"epoch": 1.930672268907563,
"grad_norm": 18.049874392140797,
"learning_rate": 3.40848642591985e-06,
"loss": 1.4612197875976562,
"step": 919
},
{
"epoch": 1.9327731092436975,
"grad_norm": 15.899805779516837,
"learning_rate": 3.3969028838049765e-06,
"loss": 2.3325533866882324,
"step": 920
},
{
"epoch": 1.934873949579832,
"grad_norm": 12.244325200092801,
"learning_rate": 3.3853289236225917e-06,
"loss": 1.3644397258758545,
"step": 921
},
{
"epoch": 1.9369747899159664,
"grad_norm": 12.06507650622523,
"learning_rate": 3.37376461455185e-06,
"loss": 1.405503273010254,
"step": 922
},
{
"epoch": 1.9390756302521008,
"grad_norm": 16.537370916012062,
"learning_rate": 3.362210025714222e-06,
"loss": 1.1844987869262695,
"step": 923
},
{
"epoch": 1.9411764705882353,
"grad_norm": 10.411642432611085,
"learning_rate": 3.350665226173078e-06,
"loss": 2.294912815093994,
"step": 924
},
{
"epoch": 1.9432773109243697,
"grad_norm": 7.385187019711569,
"learning_rate": 3.339130284933276e-06,
"loss": 1.3060452938079834,
"step": 925
},
{
"epoch": 1.9453781512605042,
"grad_norm": 11.928015027901997,
"learning_rate": 3.327605270940751e-06,
"loss": 1.5017865896224976,
"step": 926
},
{
"epoch": 1.9474789915966386,
"grad_norm": 23.754968850349012,
"learning_rate": 3.316090253082101e-06,
"loss": 1.9816479682922363,
"step": 927
},
{
"epoch": 1.949579831932773,
"grad_norm": 15.3357371335637,
"learning_rate": 3.304585300184173e-06,
"loss": 1.4715440273284912,
"step": 928
},
{
"epoch": 1.9516806722689075,
"grad_norm": 12.706896195884193,
"learning_rate": 3.293090481013661e-06,
"loss": 1.321998953819275,
"step": 929
},
{
"epoch": 1.9537815126050422,
"grad_norm": 8.254920710769508,
"learning_rate": 3.28160586427668e-06,
"loss": 1.1600078344345093,
"step": 930
},
{
"epoch": 1.9558823529411766,
"grad_norm": 12.971411985932859,
"learning_rate": 3.2701315186183692e-06,
"loss": 1.5981496572494507,
"step": 931
},
{
"epoch": 1.957983193277311,
"grad_norm": 9.707338087138305,
"learning_rate": 3.258667512622475e-06,
"loss": 1.9018357992172241,
"step": 932
},
{
"epoch": 1.9600840336134455,
"grad_norm": 19.34885988489593,
"learning_rate": 3.2472139148109416e-06,
"loss": 1.175397515296936,
"step": 933
},
{
"epoch": 1.96218487394958,
"grad_norm": 14.470424387669999,
"learning_rate": 3.2357707936435013e-06,
"loss": 0.5444597005844116,
"step": 934
},
{
"epoch": 1.9642857142857144,
"grad_norm": 5.567855687640852,
"learning_rate": 3.224338217517269e-06,
"loss": 1.3773345947265625,
"step": 935
},
{
"epoch": 1.9663865546218489,
"grad_norm": 14.622402074305704,
"learning_rate": 3.212916254766326e-06,
"loss": 2.028517007827759,
"step": 936
},
{
"epoch": 1.9684873949579833,
"grad_norm": 9.79269410710096,
"learning_rate": 3.20150497366132e-06,
"loss": 0.9947667121887207,
"step": 937
},
{
"epoch": 1.9705882352941178,
"grad_norm": 27.334085978635635,
"learning_rate": 3.190104442409052e-06,
"loss": 1.7532271146774292,
"step": 938
},
{
"epoch": 1.9726890756302522,
"grad_norm": 12.841320289359894,
"learning_rate": 3.1787147291520675e-06,
"loss": 2.114809036254883,
"step": 939
},
{
"epoch": 1.9747899159663866,
"grad_norm": 11.483734843753465,
"learning_rate": 3.1673359019682538e-06,
"loss": 2.2796754837036133,
"step": 940
},
{
"epoch": 1.976890756302521,
"grad_norm": 11.440585157823008,
"learning_rate": 3.1559680288704297e-06,
"loss": 1.172208547592163,
"step": 941
},
{
"epoch": 1.9789915966386555,
"grad_norm": 17.223673892561905,
"learning_rate": 3.1446111778059405e-06,
"loss": 1.1454124450683594,
"step": 942
},
{
"epoch": 1.98109243697479,
"grad_norm": 9.23862037561014,
"learning_rate": 3.1332654166562494e-06,
"loss": 1.6078896522521973,
"step": 943
},
{
"epoch": 1.9831932773109244,
"grad_norm": 8.824526781008897,
"learning_rate": 3.1219308132365365e-06,
"loss": 1.2369673252105713,
"step": 944
},
{
"epoch": 1.9852941176470589,
"grad_norm": 10.697868664174909,
"learning_rate": 3.110607435295289e-06,
"loss": 1.075582504272461,
"step": 945
},
{
"epoch": 1.9873949579831933,
"grad_norm": 10.407621014847015,
"learning_rate": 3.099295350513898e-06,
"loss": 0.9495413899421692,
"step": 946
},
{
"epoch": 1.9894957983193278,
"grad_norm": 12.92768742395414,
"learning_rate": 3.087994626506254e-06,
"loss": 1.3577098846435547,
"step": 947
},
{
"epoch": 1.9915966386554622,
"grad_norm": 15.824553960297969,
"learning_rate": 3.0767053308183416e-06,
"loss": 1.229673981666565,
"step": 948
},
{
"epoch": 1.9936974789915967,
"grad_norm": 10.163559398603317,
"learning_rate": 3.0654275309278382e-06,
"loss": 1.3727761507034302,
"step": 949
},
{
"epoch": 1.995798319327731,
"grad_norm": 24.18617513536731,
"learning_rate": 3.0541612942437095e-06,
"loss": 1.2849650382995605,
"step": 950
},
{
"epoch": 1.9978991596638656,
"grad_norm": 11.306358671227525,
"learning_rate": 3.0429066881058036e-06,
"loss": 1.644538164138794,
"step": 951
},
{
"epoch": 2.0,
"grad_norm": 8.518728725517601,
"learning_rate": 3.031663779784454e-06,
"loss": 1.627841591835022,
"step": 952
},
{
"epoch": 2.0021008403361344,
"grad_norm": 7.931423247551538,
"learning_rate": 3.020432636480074e-06,
"loss": 0.2781870365142822,
"step": 953
},
{
"epoch": 2.004201680672269,
"grad_norm": 10.005905501706337,
"learning_rate": 3.0092133253227563e-06,
"loss": 0.4595562815666199,
"step": 954
},
{
"epoch": 2.0063025210084033,
"grad_norm": 11.609856477894333,
"learning_rate": 2.9980059133718687e-06,
"loss": 0.5985803604125977,
"step": 955
},
{
"epoch": 2.008403361344538,
"grad_norm": 9.586217141888886,
"learning_rate": 2.986810467615659e-06,
"loss": 0.41152679920196533,
"step": 956
},
{
"epoch": 2.0105042016806722,
"grad_norm": 7.487148279080336,
"learning_rate": 2.9756270549708497e-06,
"loss": 0.2737478017807007,
"step": 957
},
{
"epoch": 2.0126050420168067,
"grad_norm": 7.91452475392683,
"learning_rate": 2.9644557422822406e-06,
"loss": 0.3578256368637085,
"step": 958
},
{
"epoch": 2.014705882352941,
"grad_norm": 8.457279529419324,
"learning_rate": 2.9532965963223076e-06,
"loss": 0.3519413471221924,
"step": 959
},
{
"epoch": 2.0168067226890756,
"grad_norm": 12.98619129259147,
"learning_rate": 2.9421496837908036e-06,
"loss": 0.5450835227966309,
"step": 960
},
{
"epoch": 2.01890756302521,
"grad_norm": 10.10649833997759,
"learning_rate": 2.9310150713143637e-06,
"loss": 0.7118933796882629,
"step": 961
},
{
"epoch": 2.0210084033613445,
"grad_norm": 14.582945225729798,
"learning_rate": 2.9198928254461e-06,
"loss": 0.6735545992851257,
"step": 962
},
{
"epoch": 2.023109243697479,
"grad_norm": 13.612285721248329,
"learning_rate": 2.908783012665209e-06,
"loss": 0.6521182060241699,
"step": 963
},
{
"epoch": 2.0252100840336134,
"grad_norm": 9.78372244316324,
"learning_rate": 2.8976856993765766e-06,
"loss": 0.476604163646698,
"step": 964
},
{
"epoch": 2.027310924369748,
"grad_norm": 12.44882078007661,
"learning_rate": 2.8866009519103705e-06,
"loss": 0.46952176094055176,
"step": 965
},
{
"epoch": 2.0294117647058822,
"grad_norm": 9.57528793268096,
"learning_rate": 2.875528836521658e-06,
"loss": 0.4453829526901245,
"step": 966
},
{
"epoch": 2.0315126050420167,
"grad_norm": 10.439598731466846,
"learning_rate": 2.864469419389997e-06,
"loss": 0.2944750189781189,
"step": 967
},
{
"epoch": 2.033613445378151,
"grad_norm": 11.844927445149192,
"learning_rate": 2.8534227666190484e-06,
"loss": 0.8550271391868591,
"step": 968
},
{
"epoch": 2.0357142857142856,
"grad_norm": 13.15280870383674,
"learning_rate": 2.8423889442361797e-06,
"loss": 0.38192200660705566,
"step": 969
},
{
"epoch": 2.03781512605042,
"grad_norm": 8.57118977839964,
"learning_rate": 2.831368018192071e-06,
"loss": 0.7316254377365112,
"step": 970
},
{
"epoch": 2.0399159663865545,
"grad_norm": 11.697813667138346,
"learning_rate": 2.8203600543603116e-06,
"loss": 0.7615312337875366,
"step": 971
},
{
"epoch": 2.042016806722689,
"grad_norm": 15.849948895683397,
"learning_rate": 2.809365118537024e-06,
"loss": 0.8274880647659302,
"step": 972
},
{
"epoch": 2.0441176470588234,
"grad_norm": 10.693659421700279,
"learning_rate": 2.7983832764404517e-06,
"loss": 0.31469643115997314,
"step": 973
},
{
"epoch": 2.046218487394958,
"grad_norm": 11.44650988759124,
"learning_rate": 2.787414593710583e-06,
"loss": 0.20855772495269775,
"step": 974
},
{
"epoch": 2.0483193277310923,
"grad_norm": 15.81504400596588,
"learning_rate": 2.7764591359087415e-06,
"loss": 0.6759412884712219,
"step": 975
},
{
"epoch": 2.0504201680672267,
"grad_norm": 10.204964393759596,
"learning_rate": 2.7655169685172146e-06,
"loss": 0.4555593430995941,
"step": 976
},
{
"epoch": 2.052521008403361,
"grad_norm": 11.261244778956014,
"learning_rate": 2.7545881569388404e-06,
"loss": 0.22477459907531738,
"step": 977
},
{
"epoch": 2.0546218487394956,
"grad_norm": 12.246362257874592,
"learning_rate": 2.7436727664966368e-06,
"loss": 0.47387319803237915,
"step": 978
},
{
"epoch": 2.05672268907563,
"grad_norm": 8.866095217298547,
"learning_rate": 2.7327708624333936e-06,
"loss": 0.46857523918151855,
"step": 979
},
{
"epoch": 2.0588235294117645,
"grad_norm": 13.695082205363835,
"learning_rate": 2.7218825099112966e-06,
"loss": 0.4427967071533203,
"step": 980
},
{
"epoch": 2.060924369747899,
"grad_norm": 14.011986042608353,
"learning_rate": 2.7110077740115315e-06,
"loss": 1.3617768287658691,
"step": 981
},
{
"epoch": 2.0630252100840334,
"grad_norm": 11.10530101134504,
"learning_rate": 2.7001467197338905e-06,
"loss": 0.8060270547866821,
"step": 982
},
{
"epoch": 2.0651260504201683,
"grad_norm": 11.494347595831918,
"learning_rate": 2.6892994119963965e-06,
"loss": 0.29366880655288696,
"step": 983
},
{
"epoch": 2.0672268907563027,
"grad_norm": 11.085826364505666,
"learning_rate": 2.678465915634899e-06,
"loss": 0.40074852108955383,
"step": 984
},
{
"epoch": 2.069327731092437,
"grad_norm": 12.536178143522665,
"learning_rate": 2.667646295402704e-06,
"loss": 0.4710817337036133,
"step": 985
},
{
"epoch": 2.0714285714285716,
"grad_norm": 10.305745046176337,
"learning_rate": 2.656840615970169e-06,
"loss": 0.37437137961387634,
"step": 986
},
{
"epoch": 2.073529411764706,
"grad_norm": 19.03565667772653,
"learning_rate": 2.646048941924333e-06,
"loss": 0.9739346504211426,
"step": 987
},
{
"epoch": 2.0756302521008405,
"grad_norm": 14.96080509908609,
"learning_rate": 2.635271337768517e-06,
"loss": 0.6326197981834412,
"step": 988
},
{
"epoch": 2.077731092436975,
"grad_norm": 7.840013094660732,
"learning_rate": 2.6245078679219503e-06,
"loss": 0.15397483110427856,
"step": 989
},
{
"epoch": 2.0798319327731094,
"grad_norm": 14.418579873307118,
"learning_rate": 2.613758596719373e-06,
"loss": 0.5905511379241943,
"step": 990
},
{
"epoch": 2.081932773109244,
"grad_norm": 11.365880088991135,
"learning_rate": 2.603023588410662e-06,
"loss": 0.588984489440918,
"step": 991
},
{
"epoch": 2.0840336134453783,
"grad_norm": 8.467121557746795,
"learning_rate": 2.5923029071604443e-06,
"loss": 0.33690521121025085,
"step": 992
},
{
"epoch": 2.0861344537815127,
"grad_norm": 12.160131630042047,
"learning_rate": 2.5815966170477065e-06,
"loss": 0.23294681310653687,
"step": 993
},
{
"epoch": 2.088235294117647,
"grad_norm": 10.365330226343618,
"learning_rate": 2.5709047820654236e-06,
"loss": 0.4404110908508301,
"step": 994
},
{
"epoch": 2.0903361344537816,
"grad_norm": 11.818988939924239,
"learning_rate": 2.5602274661201643e-06,
"loss": 0.37340593338012695,
"step": 995
},
{
"epoch": 2.092436974789916,
"grad_norm": 8.208877260345274,
"learning_rate": 2.549564733031722e-06,
"loss": 0.3671455979347229,
"step": 996
},
{
"epoch": 2.0945378151260505,
"grad_norm": 7.093067310603152,
"learning_rate": 2.538916646532718e-06,
"loss": 0.6218878030776978,
"step": 997
},
{
"epoch": 2.096638655462185,
"grad_norm": 18.77124675473162,
"learning_rate": 2.528283270268238e-06,
"loss": 1.2778301239013672,
"step": 998
},
{
"epoch": 2.0987394957983194,
"grad_norm": 11.063294944827689,
"learning_rate": 2.517664667795434e-06,
"loss": 0.6543454527854919,
"step": 999
},
{
"epoch": 2.100840336134454,
"grad_norm": 14.644880561077354,
"learning_rate": 2.5070609025831605e-06,
"loss": 0.42762574553489685,
"step": 1000
},
{
"epoch": 2.1029411764705883,
"grad_norm": 9.207415458189004,
"learning_rate": 2.49647203801158e-06,
"loss": 0.40861833095550537,
"step": 1001
},
{
"epoch": 2.1050420168067228,
"grad_norm": 13.2306061568643,
"learning_rate": 2.4858981373718006e-06,
"loss": 0.6941218376159668,
"step": 1002
},
{
"epoch": 2.107142857142857,
"grad_norm": 7.498871262137285,
"learning_rate": 2.47533926386548e-06,
"loss": 0.1615523397922516,
"step": 1003
},
{
"epoch": 2.1092436974789917,
"grad_norm": 10.133323758514624,
"learning_rate": 2.4647954806044633e-06,
"loss": 0.30699750781059265,
"step": 1004
},
{
"epoch": 2.111344537815126,
"grad_norm": 7.757159399213717,
"learning_rate": 2.454266850610398e-06,
"loss": 0.27435654401779175,
"step": 1005
},
{
"epoch": 2.1134453781512605,
"grad_norm": 12.86368889886839,
"learning_rate": 2.443753436814354e-06,
"loss": 0.6352673172950745,
"step": 1006
},
{
"epoch": 2.115546218487395,
"grad_norm": 10.035026243076201,
"learning_rate": 2.433255302056458e-06,
"loss": 0.3478729724884033,
"step": 1007
},
{
"epoch": 2.1176470588235294,
"grad_norm": 24.70500681984219,
"learning_rate": 2.4227725090855063e-06,
"loss": 0.3971726894378662,
"step": 1008
},
{
"epoch": 2.119747899159664,
"grad_norm": 10.231694956778009,
"learning_rate": 2.412305120558599e-06,
"loss": 0.9241357445716858,
"step": 1009
},
{
"epoch": 2.1218487394957983,
"grad_norm": 10.754074840458836,
"learning_rate": 2.40185319904076e-06,
"loss": 0.2883678674697876,
"step": 1010
},
{
"epoch": 2.1239495798319328,
"grad_norm": 11.61442466478921,
"learning_rate": 2.391416807004568e-06,
"loss": 0.39812758564949036,
"step": 1011
},
{
"epoch": 2.1260504201680672,
"grad_norm": 12.063207487307261,
"learning_rate": 2.3809960068297732e-06,
"loss": 0.6487483978271484,
"step": 1012
},
{
"epoch": 2.1281512605042017,
"grad_norm": 10.36458033229305,
"learning_rate": 2.370590860802938e-06,
"loss": 0.44781216979026794,
"step": 1013
},
{
"epoch": 2.130252100840336,
"grad_norm": 12.86609548868944,
"learning_rate": 2.3602014311170524e-06,
"loss": 0.3241298496723175,
"step": 1014
},
{
"epoch": 2.1323529411764706,
"grad_norm": 15.003708633549396,
"learning_rate": 2.3498277798711725e-06,
"loss": 0.4608106315135956,
"step": 1015
},
{
"epoch": 2.134453781512605,
"grad_norm": 12.622777494736392,
"learning_rate": 2.3394699690700395e-06,
"loss": 0.5967488884925842,
"step": 1016
},
{
"epoch": 2.1365546218487395,
"grad_norm": 12.188684362144896,
"learning_rate": 2.3291280606237186e-06,
"loss": 0.4074782729148865,
"step": 1017
},
{
"epoch": 2.138655462184874,
"grad_norm": 8.220918786617895,
"learning_rate": 2.3188021163472206e-06,
"loss": 0.354820191860199,
"step": 1018
},
{
"epoch": 2.1407563025210083,
"grad_norm": 11.553371341140592,
"learning_rate": 2.308492197960141e-06,
"loss": 0.23287059366703033,
"step": 1019
},
{
"epoch": 2.142857142857143,
"grad_norm": 14.317771847578689,
"learning_rate": 2.2981983670862796e-06,
"loss": 1.0973201990127563,
"step": 1020
},
{
"epoch": 2.1449579831932772,
"grad_norm": 15.607772284187238,
"learning_rate": 2.2879206852532854e-06,
"loss": 0.4452645778656006,
"step": 1021
},
{
"epoch": 2.1470588235294117,
"grad_norm": 7.00439488336282,
"learning_rate": 2.2776592138922806e-06,
"loss": 0.273881733417511,
"step": 1022
},
{
"epoch": 2.149159663865546,
"grad_norm": 7.187420218160357,
"learning_rate": 2.2674140143374904e-06,
"loss": 0.20633578300476074,
"step": 1023
},
{
"epoch": 2.1512605042016806,
"grad_norm": 13.420920542298727,
"learning_rate": 2.2571851478258903e-06,
"loss": 0.38969674706459045,
"step": 1024
},
{
"epoch": 2.153361344537815,
"grad_norm": 10.472493973829605,
"learning_rate": 2.2469726754968207e-06,
"loss": 0.26989856362342834,
"step": 1025
},
{
"epoch": 2.1554621848739495,
"grad_norm": 7.949292792396312,
"learning_rate": 2.236776658391641e-06,
"loss": 0.5260115265846252,
"step": 1026
},
{
"epoch": 2.157563025210084,
"grad_norm": 10.053195823647455,
"learning_rate": 2.2265971574533474e-06,
"loss": 0.2469472587108612,
"step": 1027
},
{
"epoch": 2.1596638655462184,
"grad_norm": 10.2298360545254,
"learning_rate": 2.2164342335262244e-06,
"loss": 0.201723113656044,
"step": 1028
},
{
"epoch": 2.161764705882353,
"grad_norm": 8.12616695246335,
"learning_rate": 2.2062879473554654e-06,
"loss": 0.6355183124542236,
"step": 1029
},
{
"epoch": 2.1638655462184873,
"grad_norm": 8.264174037140188,
"learning_rate": 2.1961583595868253e-06,
"loss": 0.32272863388061523,
"step": 1030
},
{
"epoch": 2.1659663865546217,
"grad_norm": 7.57735519623744,
"learning_rate": 2.186045530766244e-06,
"loss": 0.24386917054653168,
"step": 1031
},
{
"epoch": 2.168067226890756,
"grad_norm": 14.627933253501514,
"learning_rate": 2.1759495213394965e-06,
"loss": 0.482686847448349,
"step": 1032
},
{
"epoch": 2.1701680672268906,
"grad_norm": 11.019581985915595,
"learning_rate": 2.165870391651819e-06,
"loss": 0.5142661333084106,
"step": 1033
},
{
"epoch": 2.172268907563025,
"grad_norm": 5.2143700025854605,
"learning_rate": 2.155808201947563e-06,
"loss": 0.21703539788722992,
"step": 1034
},
{
"epoch": 2.1743697478991595,
"grad_norm": 8.891172137251035,
"learning_rate": 2.145763012369824e-06,
"loss": 0.4068147540092468,
"step": 1035
},
{
"epoch": 2.176470588235294,
"grad_norm": 8.535335024751161,
"learning_rate": 2.1357348829600816e-06,
"loss": 0.5949288606643677,
"step": 1036
},
{
"epoch": 2.1785714285714284,
"grad_norm": 9.215827064680754,
"learning_rate": 2.125723873657852e-06,
"loss": 0.44353166222572327,
"step": 1037
},
{
"epoch": 2.180672268907563,
"grad_norm": 10.942670044267897,
"learning_rate": 2.115730044300313e-06,
"loss": 0.4212431013584137,
"step": 1038
},
{
"epoch": 2.1827731092436973,
"grad_norm": 9.670977047855832,
"learning_rate": 2.105753454621966e-06,
"loss": 0.37279778718948364,
"step": 1039
},
{
"epoch": 2.184873949579832,
"grad_norm": 15.451697195363522,
"learning_rate": 2.095794164254259e-06,
"loss": 0.6137001514434814,
"step": 1040
},
{
"epoch": 2.1869747899159666,
"grad_norm": 9.221135375907481,
"learning_rate": 2.0858522327252467e-06,
"loss": 0.20706136524677277,
"step": 1041
},
{
"epoch": 2.189075630252101,
"grad_norm": 7.176753088694497,
"learning_rate": 2.0759277194592208e-06,
"loss": 0.3732944130897522,
"step": 1042
},
{
"epoch": 2.1911764705882355,
"grad_norm": 6.567221722351546,
"learning_rate": 2.06602068377637e-06,
"loss": 0.11849310249090195,
"step": 1043
},
{
"epoch": 2.19327731092437,
"grad_norm": 8.709699534320997,
"learning_rate": 2.0561311848924082e-06,
"loss": 0.35089147090911865,
"step": 1044
},
{
"epoch": 2.1953781512605044,
"grad_norm": 6.892712538403868,
"learning_rate": 2.0462592819182377e-06,
"loss": 0.3482816219329834,
"step": 1045
},
{
"epoch": 2.197478991596639,
"grad_norm": 11.886398314518281,
"learning_rate": 2.0364050338595792e-06,
"loss": 0.9048193097114563,
"step": 1046
},
{
"epoch": 2.1995798319327733,
"grad_norm": 9.38373013746351,
"learning_rate": 2.0265684996166345e-06,
"loss": 0.34331268072128296,
"step": 1047
},
{
"epoch": 2.2016806722689077,
"grad_norm": 13.372941805785942,
"learning_rate": 2.0167497379837254e-06,
"loss": 0.35536718368530273,
"step": 1048
},
{
"epoch": 2.203781512605042,
"grad_norm": 7.556671458015662,
"learning_rate": 2.0069488076489445e-06,
"loss": 0.20954403281211853,
"step": 1049
},
{
"epoch": 2.2058823529411766,
"grad_norm": 8.35211924521852,
"learning_rate": 1.997165767193801e-06,
"loss": 0.5290908813476562,
"step": 1050
},
{
"epoch": 2.207983193277311,
"grad_norm": 10.477600594311985,
"learning_rate": 1.9874006750928783e-06,
"loss": 0.44289880990982056,
"step": 1051
},
{
"epoch": 2.2100840336134455,
"grad_norm": 8.191084415042441,
"learning_rate": 1.97765358971348e-06,
"loss": 0.48035284876823425,
"step": 1052
},
{
"epoch": 2.21218487394958,
"grad_norm": 14.892166225942573,
"learning_rate": 1.967924569315275e-06,
"loss": 0.2514810562133789,
"step": 1053
},
{
"epoch": 2.2142857142857144,
"grad_norm": 9.73259092640212,
"learning_rate": 1.958213672049964e-06,
"loss": 0.9599279165267944,
"step": 1054
},
{
"epoch": 2.216386554621849,
"grad_norm": 10.01655023470503,
"learning_rate": 1.9485209559609148e-06,
"loss": 0.30860060453414917,
"step": 1055
},
{
"epoch": 2.2184873949579833,
"grad_norm": 21.410996670654146,
"learning_rate": 1.9388464789828316e-06,
"loss": 0.7747633457183838,
"step": 1056
},
{
"epoch": 2.2205882352941178,
"grad_norm": 14.431755517939498,
"learning_rate": 1.9291902989413935e-06,
"loss": 0.3529064655303955,
"step": 1057
},
{
"epoch": 2.222689075630252,
"grad_norm": 19.684041196466477,
"learning_rate": 1.9195524735529237e-06,
"loss": 1.0967960357666016,
"step": 1058
},
{
"epoch": 2.2247899159663866,
"grad_norm": 9.812143417300405,
"learning_rate": 1.909933060424029e-06,
"loss": 0.700248122215271,
"step": 1059
},
{
"epoch": 2.226890756302521,
"grad_norm": 10.765309787627796,
"learning_rate": 1.9003321170512728e-06,
"loss": 0.9177491068840027,
"step": 1060
},
{
"epoch": 2.2289915966386555,
"grad_norm": 12.811209076397098,
"learning_rate": 1.890749700820813e-06,
"loss": 0.543596625328064,
"step": 1061
},
{
"epoch": 2.23109243697479,
"grad_norm": 17.31969106411562,
"learning_rate": 1.8811858690080764e-06,
"loss": 0.7324357032775879,
"step": 1062
},
{
"epoch": 2.2331932773109244,
"grad_norm": 9.418477503451474,
"learning_rate": 1.8716406787774e-06,
"loss": 0.4075426459312439,
"step": 1063
},
{
"epoch": 2.235294117647059,
"grad_norm": 7.9458980097838605,
"learning_rate": 1.862114187181705e-06,
"loss": 0.39563894271850586,
"step": 1064
},
{
"epoch": 2.2373949579831933,
"grad_norm": 8.299260491259234,
"learning_rate": 1.8526064511621455e-06,
"loss": 0.37604600191116333,
"step": 1065
},
{
"epoch": 2.2394957983193278,
"grad_norm": 10.435458479716717,
"learning_rate": 1.843117527547768e-06,
"loss": 0.6682062745094299,
"step": 1066
},
{
"epoch": 2.241596638655462,
"grad_norm": 8.776734857977067,
"learning_rate": 1.8336474730551807e-06,
"loss": 0.19220635294914246,
"step": 1067
},
{
"epoch": 2.2436974789915967,
"grad_norm": 14.521651377727974,
"learning_rate": 1.8241963442882005e-06,
"loss": 0.27735865116119385,
"step": 1068
},
{
"epoch": 2.245798319327731,
"grad_norm": 8.651493755796526,
"learning_rate": 1.8147641977375313e-06,
"loss": 0.41572022438049316,
"step": 1069
},
{
"epoch": 2.2478991596638656,
"grad_norm": 7.20135853576087,
"learning_rate": 1.8053510897804105e-06,
"loss": 0.25049227476119995,
"step": 1070
},
{
"epoch": 2.25,
"grad_norm": 8.485627286621954,
"learning_rate": 1.7959570766802847e-06,
"loss": 0.17869159579277039,
"step": 1071
},
{
"epoch": 2.2521008403361344,
"grad_norm": 8.831664553556859,
"learning_rate": 1.786582214586462e-06,
"loss": 0.2621746361255646,
"step": 1072
},
{
"epoch": 2.254201680672269,
"grad_norm": 13.640791806331189,
"learning_rate": 1.77722655953379e-06,
"loss": 0.33446362614631653,
"step": 1073
},
{
"epoch": 2.2563025210084033,
"grad_norm": 5.657158630793571,
"learning_rate": 1.7678901674423044e-06,
"loss": 0.17267954349517822,
"step": 1074
},
{
"epoch": 2.258403361344538,
"grad_norm": 11.516922535812704,
"learning_rate": 1.7585730941169105e-06,
"loss": 0.5281901955604553,
"step": 1075
},
{
"epoch": 2.2605042016806722,
"grad_norm": 11.298403957574713,
"learning_rate": 1.7492753952470415e-06,
"loss": 0.2754780352115631,
"step": 1076
},
{
"epoch": 2.2626050420168067,
"grad_norm": 10.413722402153681,
"learning_rate": 1.739997126406322e-06,
"loss": 0.3246016502380371,
"step": 1077
},
{
"epoch": 2.264705882352941,
"grad_norm": 14.097971965363062,
"learning_rate": 1.7307383430522474e-06,
"loss": 0.6660511493682861,
"step": 1078
},
{
"epoch": 2.2668067226890756,
"grad_norm": 22.503701517732946,
"learning_rate": 1.7214991005258386e-06,
"loss": 1.2165361642837524,
"step": 1079
},
{
"epoch": 2.26890756302521,
"grad_norm": 8.328219817576464,
"learning_rate": 1.7122794540513265e-06,
"loss": 0.18396508693695068,
"step": 1080
},
{
"epoch": 2.2710084033613445,
"grad_norm": 7.768308930354123,
"learning_rate": 1.703079458735805e-06,
"loss": 0.42018991708755493,
"step": 1081
},
{
"epoch": 2.273109243697479,
"grad_norm": 9.610477928803583,
"learning_rate": 1.6938991695689184e-06,
"loss": 0.38192903995513916,
"step": 1082
},
{
"epoch": 2.2752100840336134,
"grad_norm": 9.57071965935329,
"learning_rate": 1.684738641422517e-06,
"loss": 0.4953494966030121,
"step": 1083
},
{
"epoch": 2.277310924369748,
"grad_norm": 11.62580762547179,
"learning_rate": 1.6755979290503437e-06,
"loss": 0.5324037075042725,
"step": 1084
},
{
"epoch": 2.2794117647058822,
"grad_norm": 9.119930665905265,
"learning_rate": 1.666477087087694e-06,
"loss": 0.6618460416793823,
"step": 1085
},
{
"epoch": 2.2815126050420167,
"grad_norm": 12.668770516893803,
"learning_rate": 1.6573761700511004e-06,
"loss": 0.29154300689697266,
"step": 1086
},
{
"epoch": 2.283613445378151,
"grad_norm": 10.126878534173718,
"learning_rate": 1.6482952323379958e-06,
"loss": 0.39994263648986816,
"step": 1087
},
{
"epoch": 2.2857142857142856,
"grad_norm": 8.084921146733947,
"learning_rate": 1.639234328226399e-06,
"loss": 0.2049681693315506,
"step": 1088
},
{
"epoch": 2.28781512605042,
"grad_norm": 9.167757841002748,
"learning_rate": 1.6301935118745826e-06,
"loss": 0.35848674178123474,
"step": 1089
},
{
"epoch": 2.2899159663865545,
"grad_norm": 12.543365522318467,
"learning_rate": 1.621172837320754e-06,
"loss": 0.4794918894767761,
"step": 1090
},
{
"epoch": 2.292016806722689,
"grad_norm": 6.873717233986044,
"learning_rate": 1.6121723584827259e-06,
"loss": 0.3671627342700958,
"step": 1091
},
{
"epoch": 2.2941176470588234,
"grad_norm": 9.315544619619539,
"learning_rate": 1.6031921291576048e-06,
"loss": 0.25063830614089966,
"step": 1092
},
{
"epoch": 2.296218487394958,
"grad_norm": 11.618408926786485,
"learning_rate": 1.5942322030214547e-06,
"loss": 0.7581193447113037,
"step": 1093
},
{
"epoch": 2.2983193277310923,
"grad_norm": 7.9613247000723595,
"learning_rate": 1.5852926336289926e-06,
"loss": 0.4217086434364319,
"step": 1094
},
{
"epoch": 2.3004201680672267,
"grad_norm": 10.341036096752598,
"learning_rate": 1.5763734744132587e-06,
"loss": 0.5018645524978638,
"step": 1095
},
{
"epoch": 2.302521008403361,
"grad_norm": 14.166467122386207,
"learning_rate": 1.5674747786852935e-06,
"loss": 0.5745636224746704,
"step": 1096
},
{
"epoch": 2.3046218487394956,
"grad_norm": 9.77165887856765,
"learning_rate": 1.5585965996338314e-06,
"loss": 0.9145222902297974,
"step": 1097
},
{
"epoch": 2.30672268907563,
"grad_norm": 15.937224453039251,
"learning_rate": 1.5497389903249705e-06,
"loss": 0.4312666058540344,
"step": 1098
},
{
"epoch": 2.3088235294117645,
"grad_norm": 8.945920679970577,
"learning_rate": 1.5409020037018652e-06,
"loss": 0.4121660590171814,
"step": 1099
},
{
"epoch": 2.310924369747899,
"grad_norm": 10.839281933281265,
"learning_rate": 1.5320856925843997e-06,
"loss": 0.8646482825279236,
"step": 1100
},
{
"epoch": 2.3130252100840334,
"grad_norm": 12.806561724880765,
"learning_rate": 1.5232901096688847e-06,
"loss": 0.784586489200592,
"step": 1101
},
{
"epoch": 2.315126050420168,
"grad_norm": 10.817682905964707,
"learning_rate": 1.5145153075277286e-06,
"loss": 0.9424635171890259,
"step": 1102
},
{
"epoch": 2.3172268907563023,
"grad_norm": 8.922023653272449,
"learning_rate": 1.505761338609137e-06,
"loss": 0.28385645151138306,
"step": 1103
},
{
"epoch": 2.3193277310924367,
"grad_norm": 15.30593506620364,
"learning_rate": 1.4970282552367854e-06,
"loss": 0.6689031720161438,
"step": 1104
},
{
"epoch": 2.3214285714285716,
"grad_norm": 10.05546946420467,
"learning_rate": 1.4883161096095189e-06,
"loss": 0.691364586353302,
"step": 1105
},
{
"epoch": 2.323529411764706,
"grad_norm": 13.976863852979069,
"learning_rate": 1.4796249538010354e-06,
"loss": 0.23520073294639587,
"step": 1106
},
{
"epoch": 2.3256302521008405,
"grad_norm": 9.578643377397341,
"learning_rate": 1.4709548397595674e-06,
"loss": 0.4271107316017151,
"step": 1107
},
{
"epoch": 2.327731092436975,
"grad_norm": 16.17388877757899,
"learning_rate": 1.4623058193075852e-06,
"loss": 0.9280604720115662,
"step": 1108
},
{
"epoch": 2.3298319327731094,
"grad_norm": 13.041308775276805,
"learning_rate": 1.453677944141474e-06,
"loss": 0.33376407623291016,
"step": 1109
},
{
"epoch": 2.331932773109244,
"grad_norm": 13.186142451412863,
"learning_rate": 1.4450712658312356e-06,
"loss": 0.7442219853401184,
"step": 1110
},
{
"epoch": 2.3340336134453783,
"grad_norm": 10.969810510823187,
"learning_rate": 1.43648583582017e-06,
"loss": 1.27920663356781,
"step": 1111
},
{
"epoch": 2.3361344537815127,
"grad_norm": 22.653518753891586,
"learning_rate": 1.4279217054245793e-06,
"loss": 0.6456579566001892,
"step": 1112
},
{
"epoch": 2.338235294117647,
"grad_norm": 13.638307761366974,
"learning_rate": 1.4193789258334485e-06,
"loss": 1.1350394487380981,
"step": 1113
},
{
"epoch": 2.3403361344537816,
"grad_norm": 10.59397199917471,
"learning_rate": 1.4108575481081522e-06,
"loss": 0.5290108919143677,
"step": 1114
},
{
"epoch": 2.342436974789916,
"grad_norm": 9.100247445169298,
"learning_rate": 1.4023576231821362e-06,
"loss": 0.2833002209663391,
"step": 1115
},
{
"epoch": 2.3445378151260505,
"grad_norm": 18.49442431345445,
"learning_rate": 1.3938792018606278e-06,
"loss": 0.37826409935951233,
"step": 1116
},
{
"epoch": 2.346638655462185,
"grad_norm": 12.477810112402349,
"learning_rate": 1.3854223348203171e-06,
"loss": 0.3945717215538025,
"step": 1117
},
{
"epoch": 2.3487394957983194,
"grad_norm": 8.789544191123422,
"learning_rate": 1.376987072609065e-06,
"loss": 0.31352269649505615,
"step": 1118
},
{
"epoch": 2.350840336134454,
"grad_norm": 14.219313270123468,
"learning_rate": 1.368573465645599e-06,
"loss": 0.8024647235870361,
"step": 1119
},
{
"epoch": 2.3529411764705883,
"grad_norm": 10.007349065084831,
"learning_rate": 1.360181564219204e-06,
"loss": 0.7791054248809814,
"step": 1120
},
{
"epoch": 2.3550420168067228,
"grad_norm": 12.213364584526106,
"learning_rate": 1.351811418489436e-06,
"loss": 0.37381619215011597,
"step": 1121
},
{
"epoch": 2.357142857142857,
"grad_norm": 10.713765677783302,
"learning_rate": 1.3434630784858067e-06,
"loss": 0.3184419870376587,
"step": 1122
},
{
"epoch": 2.3592436974789917,
"grad_norm": 11.470256693930569,
"learning_rate": 1.335136594107498e-06,
"loss": 0.3431350886821747,
"step": 1123
},
{
"epoch": 2.361344537815126,
"grad_norm": 9.119684880351647,
"learning_rate": 1.3268320151230518e-06,
"loss": 0.4296434819698334,
"step": 1124
},
{
"epoch": 2.3634453781512605,
"grad_norm": 10.866853294417046,
"learning_rate": 1.3185493911700854e-06,
"loss": 0.48791950941085815,
"step": 1125
},
{
"epoch": 2.365546218487395,
"grad_norm": 9.540925370722046,
"learning_rate": 1.3102887717549812e-06,
"loss": 0.29711413383483887,
"step": 1126
},
{
"epoch": 2.3676470588235294,
"grad_norm": 11.627989144711366,
"learning_rate": 1.302050206252602e-06,
"loss": 0.39902636408805847,
"step": 1127
},
{
"epoch": 2.369747899159664,
"grad_norm": 6.365770038684127,
"learning_rate": 1.2938337439059868e-06,
"loss": 0.2864948511123657,
"step": 1128
},
{
"epoch": 2.3718487394957983,
"grad_norm": 12.606248234313094,
"learning_rate": 1.2856394338260691e-06,
"loss": 0.42151930928230286,
"step": 1129
},
{
"epoch": 2.3739495798319328,
"grad_norm": 8.544922775672411,
"learning_rate": 1.2774673249913656e-06,
"loss": 0.330949604511261,
"step": 1130
},
{
"epoch": 2.3760504201680672,
"grad_norm": 12.681787759512487,
"learning_rate": 1.2693174662477003e-06,
"loss": 0.832221508026123,
"step": 1131
},
{
"epoch": 2.3781512605042017,
"grad_norm": 16.15687539830067,
"learning_rate": 1.2611899063079002e-06,
"loss": 0.3243201971054077,
"step": 1132
},
{
"epoch": 2.380252100840336,
"grad_norm": 11.42137338593432,
"learning_rate": 1.253084693751514e-06,
"loss": 0.4209938049316406,
"step": 1133
},
{
"epoch": 2.3823529411764706,
"grad_norm": 10.49566833203582,
"learning_rate": 1.245001877024512e-06,
"loss": 0.1905173659324646,
"step": 1134
},
{
"epoch": 2.384453781512605,
"grad_norm": 9.325292405896798,
"learning_rate": 1.2369415044390055e-06,
"loss": 0.31655293703079224,
"step": 1135
},
{
"epoch": 2.3865546218487395,
"grad_norm": 12.150405014710023,
"learning_rate": 1.228903624172954e-06,
"loss": 0.2780379354953766,
"step": 1136
},
{
"epoch": 2.388655462184874,
"grad_norm": 7.132176058282011,
"learning_rate": 1.220888284269874e-06,
"loss": 0.5738459825515747,
"step": 1137
},
{
"epoch": 2.3907563025210083,
"grad_norm": 9.199984669814489,
"learning_rate": 1.2128955326385595e-06,
"loss": 0.4594503343105316,
"step": 1138
},
{
"epoch": 2.392857142857143,
"grad_norm": 246.0490199481034,
"learning_rate": 1.2049254170527857e-06,
"loss": 1.6502771377563477,
"step": 1139
},
{
"epoch": 2.3949579831932772,
"grad_norm": 10.645480745934366,
"learning_rate": 1.196977985151036e-06,
"loss": 0.7063793540000916,
"step": 1140
},
{
"epoch": 2.3970588235294117,
"grad_norm": 12.466682957005606,
"learning_rate": 1.1890532844362035e-06,
"loss": 0.4885460138320923,
"step": 1141
},
{
"epoch": 2.399159663865546,
"grad_norm": 8.222098406246245,
"learning_rate": 1.1811513622753196e-06,
"loss": 0.29537534713745117,
"step": 1142
},
{
"epoch": 2.4012605042016806,
"grad_norm": 10.08444013945275,
"learning_rate": 1.1732722658992597e-06,
"loss": 0.6734664440155029,
"step": 1143
},
{
"epoch": 2.403361344537815,
"grad_norm": 12.85839873964936,
"learning_rate": 1.1654160424024718e-06,
"loss": 0.39790263772010803,
"step": 1144
},
{
"epoch": 2.4054621848739495,
"grad_norm": 19.32478545248631,
"learning_rate": 1.1575827387426846e-06,
"loss": 0.2750331163406372,
"step": 1145
},
{
"epoch": 2.407563025210084,
"grad_norm": 9.972826363554564,
"learning_rate": 1.149772401740637e-06,
"loss": 0.44170406460762024,
"step": 1146
},
{
"epoch": 2.4096638655462184,
"grad_norm": 7.314481031395291,
"learning_rate": 1.1419850780797864e-06,
"loss": 0.19013899564743042,
"step": 1147
},
{
"epoch": 2.411764705882353,
"grad_norm": 7.432016042351664,
"learning_rate": 1.1342208143060423e-06,
"loss": 0.4140137732028961,
"step": 1148
},
{
"epoch": 2.4138655462184873,
"grad_norm": 11.414471382112064,
"learning_rate": 1.1264796568274811e-06,
"loss": 0.4861386716365814,
"step": 1149
},
{
"epoch": 2.4159663865546217,
"grad_norm": 13.684580354320987,
"learning_rate": 1.118761651914065e-06,
"loss": 0.3487178683280945,
"step": 1150
},
{
"epoch": 2.418067226890756,
"grad_norm": 8.714659720362214,
"learning_rate": 1.1110668456973761e-06,
"loss": 0.6119335889816284,
"step": 1151
},
{
"epoch": 2.4201680672268906,
"grad_norm": 10.585809423186294,
"learning_rate": 1.10339528417033e-06,
"loss": 0.24830467998981476,
"step": 1152
},
{
"epoch": 2.422268907563025,
"grad_norm": 6.6530689382799375,
"learning_rate": 1.0957470131869102e-06,
"loss": 0.20413950085639954,
"step": 1153
},
{
"epoch": 2.4243697478991595,
"grad_norm": 11.585229233250407,
"learning_rate": 1.088122078461884e-06,
"loss": 0.7759865522384644,
"step": 1154
},
{
"epoch": 2.426470588235294,
"grad_norm": 14.183804098321202,
"learning_rate": 1.0805205255705403e-06,
"loss": 0.9713194370269775,
"step": 1155
},
{
"epoch": 2.4285714285714284,
"grad_norm": 11.392423008755229,
"learning_rate": 1.0729423999484062e-06,
"loss": 0.3234805464744568,
"step": 1156
},
{
"epoch": 2.4306722689075633,
"grad_norm": 7.504753087219636,
"learning_rate": 1.0653877468909857e-06,
"loss": 0.2364063262939453,
"step": 1157
},
{
"epoch": 2.4327731092436977,
"grad_norm": 9.823099282463206,
"learning_rate": 1.0578566115534794e-06,
"loss": 0.4705219268798828,
"step": 1158
},
{
"epoch": 2.434873949579832,
"grad_norm": 9.451122750213175,
"learning_rate": 1.0503490389505244e-06,
"loss": 0.26277682185173035,
"step": 1159
},
{
"epoch": 2.4369747899159666,
"grad_norm": 9.336273451144258,
"learning_rate": 1.0428650739559138e-06,
"loss": 0.13882672786712646,
"step": 1160
},
{
"epoch": 2.439075630252101,
"grad_norm": 7.52407325404656,
"learning_rate": 1.0354047613023404e-06,
"loss": 0.5188834071159363,
"step": 1161
},
{
"epoch": 2.4411764705882355,
"grad_norm": 7.853041816369125,
"learning_rate": 1.0279681455811219e-06,
"loss": 0.24887529015541077,
"step": 1162
},
{
"epoch": 2.44327731092437,
"grad_norm": 10.755975449643415,
"learning_rate": 1.0205552712419343e-06,
"loss": 0.28220975399017334,
"step": 1163
},
{
"epoch": 2.4453781512605044,
"grad_norm": 10.623188982430918,
"learning_rate": 1.013166182592551e-06,
"loss": 0.24789491295814514,
"step": 1164
},
{
"epoch": 2.447478991596639,
"grad_norm": 10.265380274150749,
"learning_rate": 1.0058009237985721e-06,
"loss": 0.7892224788665771,
"step": 1165
},
{
"epoch": 2.4495798319327733,
"grad_norm": 11.896169947706998,
"learning_rate": 9.98459538883167e-07,
"loss": 0.26245754957199097,
"step": 1166
},
{
"epoch": 2.4516806722689077,
"grad_norm": 11.049089295820975,
"learning_rate": 9.911420717268023e-07,
"loss": 0.27979156374931335,
"step": 1167
},
{
"epoch": 2.453781512605042,
"grad_norm": 13.265137697114756,
"learning_rate": 9.838485660669906e-07,
"loss": 0.7934341430664062,
"step": 1168
},
{
"epoch": 2.4558823529411766,
"grad_norm": 10.018047542365926,
"learning_rate": 9.765790654980195e-07,
"loss": 0.45289355516433716,
"step": 1169
},
{
"epoch": 2.457983193277311,
"grad_norm": 13.567130161558774,
"learning_rate": 9.693336134706988e-07,
"loss": 0.992337703704834,
"step": 1170
},
{
"epoch": 2.4600840336134455,
"grad_norm": 9.387778498410693,
"learning_rate": 9.621122532920908e-07,
"loss": 0.29417842626571655,
"step": 1171
},
{
"epoch": 2.46218487394958,
"grad_norm": 9.114348977338564,
"learning_rate": 9.549150281252633e-07,
"loss": 0.5845852494239807,
"step": 1172
},
{
"epoch": 2.4642857142857144,
"grad_norm": 9.288921226395173,
"learning_rate": 9.477419809890215e-07,
"loss": 0.22582799196243286,
"step": 1173
},
{
"epoch": 2.466386554621849,
"grad_norm": 13.268912659944744,
"learning_rate": 9.405931547576591e-07,
"loss": 0.26232588291168213,
"step": 1174
},
{
"epoch": 2.4684873949579833,
"grad_norm": 9.072509999987034,
"learning_rate": 9.334685921606946e-07,
"loss": 0.9084593057632446,
"step": 1175
},
{
"epoch": 2.4705882352941178,
"grad_norm": 11.285010838093699,
"learning_rate": 9.26368335782622e-07,
"loss": 0.8386296629905701,
"step": 1176
},
{
"epoch": 2.472689075630252,
"grad_norm": 11.02721308505799,
"learning_rate": 9.192924280626514e-07,
"loss": 1.0152020454406738,
"step": 1177
},
{
"epoch": 2.4747899159663866,
"grad_norm": 12.924069581096365,
"learning_rate": 9.122409112944591e-07,
"loss": 0.42396751046180725,
"step": 1178
},
{
"epoch": 2.476890756302521,
"grad_norm": 13.79023717049261,
"learning_rate": 9.052138276259348e-07,
"loss": 0.3439130485057831,
"step": 1179
},
{
"epoch": 2.4789915966386555,
"grad_norm": 8.00992187627695,
"learning_rate": 8.982112190589237e-07,
"loss": 0.21849340200424194,
"step": 1180
},
{
"epoch": 2.48109243697479,
"grad_norm": 17.238112347826142,
"learning_rate": 8.912331274489855e-07,
"loss": 1.047693133354187,
"step": 1181
},
{
"epoch": 2.4831932773109244,
"grad_norm": 10.977700782429032,
"learning_rate": 8.842795945051335e-07,
"loss": 0.4458342492580414,
"step": 1182
},
{
"epoch": 2.485294117647059,
"grad_norm": 6.2173703827542735,
"learning_rate": 8.773506617895944e-07,
"loss": 0.26556795835494995,
"step": 1183
},
{
"epoch": 2.4873949579831933,
"grad_norm": 14.598955541616366,
"learning_rate": 8.704463707175526e-07,
"loss": 0.8663069605827332,
"step": 1184
},
{
"epoch": 2.4894957983193278,
"grad_norm": 9.599096123627477,
"learning_rate": 8.6356676255691e-07,
"loss": 0.7863715291023254,
"step": 1185
},
{
"epoch": 2.491596638655462,
"grad_norm": 7.756695128139413,
"learning_rate": 8.567118784280309e-07,
"loss": 0.2747763395309448,
"step": 1186
},
{
"epoch": 2.4936974789915967,
"grad_norm": 15.296434795066423,
"learning_rate": 8.498817593035053e-07,
"loss": 0.22008158266544342,
"step": 1187
},
{
"epoch": 2.495798319327731,
"grad_norm": 42.195018093662426,
"learning_rate": 8.430764460078938e-07,
"loss": 0.7790160179138184,
"step": 1188
},
{
"epoch": 2.4978991596638656,
"grad_norm": 14.930505610933327,
"learning_rate": 8.362959792174941e-07,
"loss": 0.3692745864391327,
"step": 1189
},
{
"epoch": 2.5,
"grad_norm": 15.547844843931736,
"learning_rate": 8.295403994600921e-07,
"loss": 0.5012900829315186,
"step": 1190
},
{
"epoch": 2.5021008403361344,
"grad_norm": 13.218952734739705,
"learning_rate": 8.228097471147167e-07,
"loss": 0.4049416780471802,
"step": 1191
},
{
"epoch": 2.504201680672269,
"grad_norm": 11.42318009744243,
"learning_rate": 8.161040624114075e-07,
"loss": 0.14171475172042847,
"step": 1192
},
{
"epoch": 2.5063025210084033,
"grad_norm": 8.26466575159723,
"learning_rate": 8.094233854309647e-07,
"loss": 0.32759952545166016,
"step": 1193
},
{
"epoch": 2.508403361344538,
"grad_norm": 13.359323997562882,
"learning_rate": 8.027677561047176e-07,
"loss": 0.5382500886917114,
"step": 1194
},
{
"epoch": 2.5105042016806722,
"grad_norm": 9.050399443504134,
"learning_rate": 7.961372142142776e-07,
"loss": 0.4815264940261841,
"step": 1195
},
{
"epoch": 2.5126050420168067,
"grad_norm": 8.2857361498368,
"learning_rate": 7.89531799391311e-07,
"loss": 0.28123000264167786,
"step": 1196
},
{
"epoch": 2.514705882352941,
"grad_norm": 14.813927596451204,
"learning_rate": 7.829515511172897e-07,
"loss": 0.5116557478904724,
"step": 1197
},
{
"epoch": 2.5168067226890756,
"grad_norm": 11.672590724543431,
"learning_rate": 7.763965087232678e-07,
"loss": 0.4502016603946686,
"step": 1198
},
{
"epoch": 2.51890756302521,
"grad_norm": 8.420294235923025,
"learning_rate": 7.698667113896346e-07,
"loss": 0.34997278451919556,
"step": 1199
},
{
"epoch": 2.5210084033613445,
"grad_norm": 4.856432012218632,
"learning_rate": 7.633621981458916e-07,
"loss": 0.15743517875671387,
"step": 1200
},
{
"epoch": 2.523109243697479,
"grad_norm": 10.116924629282346,
"learning_rate": 7.568830078704092e-07,
"loss": 0.4513791799545288,
"step": 1201
},
{
"epoch": 2.5252100840336134,
"grad_norm": 10.448219454906289,
"learning_rate": 7.504291792902024e-07,
"loss": 0.5203551054000854,
"step": 1202
},
{
"epoch": 2.527310924369748,
"grad_norm": 7.4896542285298,
"learning_rate": 7.440007509806946e-07,
"loss": 0.5805743932723999,
"step": 1203
},
{
"epoch": 2.5294117647058822,
"grad_norm": 6.637043733478462,
"learning_rate": 7.375977613654861e-07,
"loss": 0.21151217818260193,
"step": 1204
},
{
"epoch": 2.5315126050420167,
"grad_norm": 11.756396934264371,
"learning_rate": 7.312202487161318e-07,
"loss": 0.4486454725265503,
"step": 1205
},
{
"epoch": 2.533613445378151,
"grad_norm": 13.216341095384697,
"learning_rate": 7.248682511519006e-07,
"loss": 0.8350504040718079,
"step": 1206
},
{
"epoch": 2.5357142857142856,
"grad_norm": 14.368316188442714,
"learning_rate": 7.18541806639561e-07,
"loss": 0.37657079100608826,
"step": 1207
},
{
"epoch": 2.53781512605042,
"grad_norm": 10.572863577964558,
"learning_rate": 7.122409529931412e-07,
"loss": 0.5544061660766602,
"step": 1208
},
{
"epoch": 2.5399159663865545,
"grad_norm": 13.009489309703797,
"learning_rate": 7.059657278737136e-07,
"loss": 0.8755850791931152,
"step": 1209
},
{
"epoch": 2.542016806722689,
"grad_norm": 10.419835233671352,
"learning_rate": 6.997161687891635e-07,
"loss": 0.6084367036819458,
"step": 1210
},
{
"epoch": 2.5441176470588234,
"grad_norm": 7.424045502482636,
"learning_rate": 6.934923130939692e-07,
"loss": 0.3528558015823364,
"step": 1211
},
{
"epoch": 2.546218487394958,
"grad_norm": 22.05326914016899,
"learning_rate": 6.872941979889708e-07,
"loss": 0.3760122060775757,
"step": 1212
},
{
"epoch": 2.5483193277310923,
"grad_norm": 8.437103819513496,
"learning_rate": 6.811218605211606e-07,
"loss": 0.3798169195652008,
"step": 1213
},
{
"epoch": 2.5504201680672267,
"grad_norm": 15.105682353848836,
"learning_rate": 6.749753375834467e-07,
"loss": 0.20516347885131836,
"step": 1214
},
{
"epoch": 2.552521008403361,
"grad_norm": 15.328640967464176,
"learning_rate": 6.688546659144479e-07,
"loss": 0.39129936695098877,
"step": 1215
},
{
"epoch": 2.5546218487394956,
"grad_norm": 18.19539322746799,
"learning_rate": 6.627598820982595e-07,
"loss": 0.5815962553024292,
"step": 1216
},
{
"epoch": 2.55672268907563,
"grad_norm": 10.358040499956887,
"learning_rate": 6.566910225642475e-07,
"loss": 0.2462518960237503,
"step": 1217
},
{
"epoch": 2.5588235294117645,
"grad_norm": 23.882665351929745,
"learning_rate": 6.50648123586819e-07,
"loss": 0.7295534610748291,
"step": 1218
},
{
"epoch": 2.560924369747899,
"grad_norm": 11.419325337575849,
"learning_rate": 6.446312212852162e-07,
"loss": 0.4088057577610016,
"step": 1219
},
{
"epoch": 2.5630252100840334,
"grad_norm": 18.506668669014132,
"learning_rate": 6.386403516232948e-07,
"loss": 0.6498621106147766,
"step": 1220
},
{
"epoch": 2.565126050420168,
"grad_norm": 12.707186533109224,
"learning_rate": 6.326755504093063e-07,
"loss": 0.3554389476776123,
"step": 1221
},
{
"epoch": 2.5672268907563023,
"grad_norm": 12.462287833643975,
"learning_rate": 6.267368532956919e-07,
"loss": 1.3259708881378174,
"step": 1222
},
{
"epoch": 2.5693277310924367,
"grad_norm": 8.69258882253335,
"learning_rate": 6.208242957788613e-07,
"loss": 0.4336357116699219,
"step": 1223
},
{
"epoch": 2.571428571428571,
"grad_norm": 10.998188149878677,
"learning_rate": 6.14937913198988e-07,
"loss": 0.6199144124984741,
"step": 1224
},
{
"epoch": 2.5735294117647056,
"grad_norm": 20.134120954604086,
"learning_rate": 6.090777407397902e-07,
"loss": 1.075969934463501,
"step": 1225
},
{
"epoch": 2.57563025210084,
"grad_norm": 9.646318558023589,
"learning_rate": 6.032438134283286e-07,
"loss": 0.5996450185775757,
"step": 1226
},
{
"epoch": 2.5777310924369745,
"grad_norm": 10.633484513814087,
"learning_rate": 5.974361661347889e-07,
"loss": 0.37859058380126953,
"step": 1227
},
{
"epoch": 2.5798319327731094,
"grad_norm": 18.160538186398977,
"learning_rate": 5.916548335722822e-07,
"loss": 0.3595309853553772,
"step": 1228
},
{
"epoch": 2.581932773109244,
"grad_norm": 12.156397479975382,
"learning_rate": 5.858998502966273e-07,
"loss": 0.31986016035079956,
"step": 1229
},
{
"epoch": 2.5840336134453783,
"grad_norm": 12.635254524437713,
"learning_rate": 5.801712507061563e-07,
"loss": 0.3975721597671509,
"step": 1230
},
{
"epoch": 2.5861344537815127,
"grad_norm": 10.900463191925608,
"learning_rate": 5.74469069041495e-07,
"loss": 0.6717185974121094,
"step": 1231
},
{
"epoch": 2.588235294117647,
"grad_norm": 10.60292331277609,
"learning_rate": 5.687933393853718e-07,
"loss": 0.6171470880508423,
"step": 1232
},
{
"epoch": 2.5903361344537816,
"grad_norm": 11.817453815932138,
"learning_rate": 5.631440956624057e-07,
"loss": 0.47931092977523804,
"step": 1233
},
{
"epoch": 2.592436974789916,
"grad_norm": 14.65524897977516,
"learning_rate": 5.575213716389039e-07,
"loss": 0.44013679027557373,
"step": 1234
},
{
"epoch": 2.5945378151260505,
"grad_norm": 14.640686063418055,
"learning_rate": 5.519252009226639e-07,
"loss": 0.515785276889801,
"step": 1235
},
{
"epoch": 2.596638655462185,
"grad_norm": 9.51325634200356,
"learning_rate": 5.463556169627687e-07,
"loss": 0.3664918541908264,
"step": 1236
},
{
"epoch": 2.5987394957983194,
"grad_norm": 9.591516923545466,
"learning_rate": 5.408126530493918e-07,
"loss": 0.3711666762828827,
"step": 1237
},
{
"epoch": 2.600840336134454,
"grad_norm": 7.071071092917334,
"learning_rate": 5.352963423135893e-07,
"loss": 0.12698325514793396,
"step": 1238
},
{
"epoch": 2.6029411764705883,
"grad_norm": 8.042424735857201,
"learning_rate": 5.298067177271144e-07,
"loss": 0.3730424642562866,
"step": 1239
},
{
"epoch": 2.6050420168067228,
"grad_norm": 9.069780325522164,
"learning_rate": 5.243438121022077e-07,
"loss": 0.6243601441383362,
"step": 1240
},
{
"epoch": 2.607142857142857,
"grad_norm": 7.246887997362519,
"learning_rate": 5.18907658091411e-07,
"loss": 0.18001016974449158,
"step": 1241
},
{
"epoch": 2.6092436974789917,
"grad_norm": 15.652638965395807,
"learning_rate": 5.134982881873646e-07,
"loss": 0.6635949611663818,
"step": 1242
},
{
"epoch": 2.611344537815126,
"grad_norm": 9.642543803196963,
"learning_rate": 5.081157347226201e-07,
"loss": 0.4666215777397156,
"step": 1243
},
{
"epoch": 2.6134453781512605,
"grad_norm": 9.416633968819704,
"learning_rate": 5.027600298694397e-07,
"loss": 0.1682681143283844,
"step": 1244
},
{
"epoch": 2.615546218487395,
"grad_norm": 14.036854769880513,
"learning_rate": 4.974312056396113e-07,
"loss": 0.5077744722366333,
"step": 1245
},
{
"epoch": 2.6176470588235294,
"grad_norm": 11.722652840072532,
"learning_rate": 4.92129293884252e-07,
"loss": 0.44359397888183594,
"step": 1246
},
{
"epoch": 2.619747899159664,
"grad_norm": 15.585836072486865,
"learning_rate": 4.868543262936176e-07,
"loss": 1.2246967554092407,
"step": 1247
},
{
"epoch": 2.6218487394957983,
"grad_norm": 10.770044484279795,
"learning_rate": 4.816063343969196e-07,
"loss": 0.32194000482559204,
"step": 1248
},
{
"epoch": 2.6239495798319328,
"grad_norm": 11.639608924375384,
"learning_rate": 4.763853495621251e-07,
"loss": 0.5496278405189514,
"step": 1249
},
{
"epoch": 2.6260504201680672,
"grad_norm": 6.671350027648182,
"learning_rate": 4.7119140299578424e-07,
"loss": 0.21257492899894714,
"step": 1250
},
{
"epoch": 2.6281512605042017,
"grad_norm": 12.713785072488509,
"learning_rate": 4.660245257428297e-07,
"loss": 0.3104386329650879,
"step": 1251
},
{
"epoch": 2.630252100840336,
"grad_norm": 14.031766333020213,
"learning_rate": 4.6088474868640045e-07,
"loss": 0.8334522843360901,
"step": 1252
},
{
"epoch": 2.6323529411764706,
"grad_norm": 9.251230979164895,
"learning_rate": 4.557721025476508e-07,
"loss": 0.29882583022117615,
"step": 1253
},
{
"epoch": 2.634453781512605,
"grad_norm": 10.2580288266136,
"learning_rate": 4.5068661788557345e-07,
"loss": 0.3209346830844879,
"step": 1254
},
{
"epoch": 2.6365546218487395,
"grad_norm": 5.604118390936418,
"learning_rate": 4.4562832509680963e-07,
"loss": 0.15333116054534912,
"step": 1255
},
{
"epoch": 2.638655462184874,
"grad_norm": 19.802213443985696,
"learning_rate": 4.4059725441547464e-07,
"loss": 0.48582714796066284,
"step": 1256
},
{
"epoch": 2.6407563025210083,
"grad_norm": 11.514218359185726,
"learning_rate": 4.355934359129699e-07,
"loss": 0.4873425364494324,
"step": 1257
},
{
"epoch": 2.642857142857143,
"grad_norm": 11.373550533887446,
"learning_rate": 4.3061689949780995e-07,
"loss": 0.2611161768436432,
"step": 1258
},
{
"epoch": 2.6449579831932772,
"grad_norm": 13.616066692598451,
"learning_rate": 4.2566767491543706e-07,
"loss": 0.27621158957481384,
"step": 1259
},
{
"epoch": 2.6470588235294117,
"grad_norm": 13.034515066864026,
"learning_rate": 4.2074579174805173e-07,
"loss": 0.849486231803894,
"step": 1260
},
{
"epoch": 2.649159663865546,
"grad_norm": 11.86057949603211,
"learning_rate": 4.1585127941442536e-07,
"loss": 0.7652707099914551,
"step": 1261
},
{
"epoch": 2.6512605042016806,
"grad_norm": 9.803056978877574,
"learning_rate": 4.1098416716973457e-07,
"loss": 0.27856025099754333,
"step": 1262
},
{
"epoch": 2.653361344537815,
"grad_norm": 10.956379977903175,
"learning_rate": 4.0614448410538077e-07,
"loss": 0.3749684691429138,
"step": 1263
},
{
"epoch": 2.6554621848739495,
"grad_norm": 12.001506859449199,
"learning_rate": 4.01332259148815e-07,
"loss": 0.6064971685409546,
"step": 1264
},
{
"epoch": 2.657563025210084,
"grad_norm": 8.750382381092477,
"learning_rate": 3.965475210633718e-07,
"loss": 0.31089282035827637,
"step": 1265
},
{
"epoch": 2.6596638655462184,
"grad_norm": 11.062101410973414,
"learning_rate": 3.917902984480881e-07,
"loss": 0.3686492443084717,
"step": 1266
},
{
"epoch": 2.661764705882353,
"grad_norm": 9.181597675394137,
"learning_rate": 3.870606197375415e-07,
"loss": 0.5900052785873413,
"step": 1267
},
{
"epoch": 2.6638655462184873,
"grad_norm": 11.229435985209061,
"learning_rate": 3.823585132016711e-07,
"loss": 0.23156413435935974,
"step": 1268
},
{
"epoch": 2.6659663865546217,
"grad_norm": 14.580552525176778,
"learning_rate": 3.776840069456189e-07,
"loss": 1.1965575218200684,
"step": 1269
},
{
"epoch": 2.668067226890756,
"grad_norm": 11.440843191964541,
"learning_rate": 3.730371289095508e-07,
"loss": 0.5137308835983276,
"step": 1270
},
{
"epoch": 2.6701680672268906,
"grad_norm": 10.19938284065176,
"learning_rate": 3.6841790686849897e-07,
"loss": 0.2563337981700897,
"step": 1271
},
{
"epoch": 2.6722689075630255,
"grad_norm": 12.902940912955524,
"learning_rate": 3.6382636843218967e-07,
"loss": 0.5659809708595276,
"step": 1272
},
{
"epoch": 2.67436974789916,
"grad_norm": 10.012067877403453,
"learning_rate": 3.592625410448813e-07,
"loss": 0.4689119756221771,
"step": 1273
},
{
"epoch": 2.6764705882352944,
"grad_norm": 7.152049482781003,
"learning_rate": 3.5472645198520064e-07,
"loss": 0.623033881187439,
"step": 1274
},
{
"epoch": 2.678571428571429,
"grad_norm": 12.87568935637631,
"learning_rate": 3.502181283659756e-07,
"loss": 0.5805165767669678,
"step": 1275
},
{
"epoch": 2.6806722689075633,
"grad_norm": 9.41187997958309,
"learning_rate": 3.4573759713407927e-07,
"loss": 0.5375624299049377,
"step": 1276
},
{
"epoch": 2.6827731092436977,
"grad_norm": 9.91288200334237,
"learning_rate": 3.4128488507026327e-07,
"loss": 0.3185434341430664,
"step": 1277
},
{
"epoch": 2.684873949579832,
"grad_norm": 15.97191633077991,
"learning_rate": 3.3686001878900365e-07,
"loss": 2.561387538909912,
"step": 1278
},
{
"epoch": 2.6869747899159666,
"grad_norm": 15.172775416815085,
"learning_rate": 3.324630247383337e-07,
"loss": 0.5536858439445496,
"step": 1279
},
{
"epoch": 2.689075630252101,
"grad_norm": 8.67689739732767,
"learning_rate": 3.2809392919969483e-07,
"loss": 0.18657177686691284,
"step": 1280
},
{
"epoch": 2.6911764705882355,
"grad_norm": 12.844508734340609,
"learning_rate": 3.2375275828777253e-07,
"loss": 0.9441865086555481,
"step": 1281
},
{
"epoch": 2.69327731092437,
"grad_norm": 9.066706121878353,
"learning_rate": 3.194395379503451e-07,
"loss": 0.5320143103599548,
"step": 1282
},
{
"epoch": 2.6953781512605044,
"grad_norm": 8.980483143209002,
"learning_rate": 3.151542939681235e-07,
"loss": 0.5943700075149536,
"step": 1283
},
{
"epoch": 2.697478991596639,
"grad_norm": 54.073743939162,
"learning_rate": 3.108970519546034e-07,
"loss": 1.0508530139923096,
"step": 1284
},
{
"epoch": 2.6995798319327733,
"grad_norm": 10.409633106680213,
"learning_rate": 3.066678373559062e-07,
"loss": 0.3096291124820709,
"step": 1285
},
{
"epoch": 2.7016806722689077,
"grad_norm": 17.871716634928575,
"learning_rate": 3.0246667545063057e-07,
"loss": 1.133009910583496,
"step": 1286
},
{
"epoch": 2.703781512605042,
"grad_norm": 10.727929813588565,
"learning_rate": 2.9829359134970206e-07,
"loss": 0.3362637162208557,
"step": 1287
},
{
"epoch": 2.7058823529411766,
"grad_norm": 10.027811395629024,
"learning_rate": 2.9414860999621764e-07,
"loss": 0.9418044090270996,
"step": 1288
},
{
"epoch": 2.707983193277311,
"grad_norm": 8.072200583551933,
"learning_rate": 2.9003175616530264e-07,
"loss": 0.2674849033355713,
"step": 1289
},
{
"epoch": 2.7100840336134455,
"grad_norm": 7.662563052553184,
"learning_rate": 2.8594305446396245e-07,
"loss": 0.39476725459098816,
"step": 1290
},
{
"epoch": 2.71218487394958,
"grad_norm": 7.931717377933664,
"learning_rate": 2.818825293309274e-07,
"loss": 0.5461002588272095,
"step": 1291
},
{
"epoch": 2.7142857142857144,
"grad_norm": 9.069033415947747,
"learning_rate": 2.7785020503651783e-07,
"loss": 0.36206185817718506,
"step": 1292
},
{
"epoch": 2.716386554621849,
"grad_norm": 11.269854164923549,
"learning_rate": 2.7384610568249313e-07,
"loss": 0.33151179552078247,
"step": 1293
},
{
"epoch": 2.7184873949579833,
"grad_norm": 10.182183999098427,
"learning_rate": 2.698702552019045e-07,
"loss": 0.3465487062931061,
"step": 1294
},
{
"epoch": 2.7205882352941178,
"grad_norm": 12.712025830447253,
"learning_rate": 2.659226773589607e-07,
"loss": 0.22317005693912506,
"step": 1295
},
{
"epoch": 2.722689075630252,
"grad_norm": 11.83879887595397,
"learning_rate": 2.620033957488777e-07,
"loss": 0.34791454672813416,
"step": 1296
},
{
"epoch": 2.7247899159663866,
"grad_norm": 12.416917803129223,
"learning_rate": 2.581124337977425e-07,
"loss": 0.4211697578430176,
"step": 1297
},
{
"epoch": 2.726890756302521,
"grad_norm": 15.495534828622619,
"learning_rate": 2.542498147623701e-07,
"loss": 0.4095291495323181,
"step": 1298
},
{
"epoch": 2.7289915966386555,
"grad_norm": 6.6678037455089925,
"learning_rate": 2.50415561730169e-07,
"loss": 0.2518484592437744,
"step": 1299
},
{
"epoch": 2.73109243697479,
"grad_norm": 8.694983560441388,
"learning_rate": 2.4660969761899576e-07,
"loss": 0.21484610438346863,
"step": 1300
},
{
"epoch": 2.7331932773109244,
"grad_norm": 11.820763178851392,
"learning_rate": 2.428322451770276e-07,
"loss": 0.39412614703178406,
"step": 1301
},
{
"epoch": 2.735294117647059,
"grad_norm": 11.877133639126868,
"learning_rate": 2.3908322698261597e-07,
"loss": 0.34464430809020996,
"step": 1302
},
{
"epoch": 2.7373949579831933,
"grad_norm": 10.16702078484984,
"learning_rate": 2.3536266544416043e-07,
"loss": 0.5757449865341187,
"step": 1303
},
{
"epoch": 2.7394957983193278,
"grad_norm": 12.93026525257059,
"learning_rate": 2.3167058279997156e-07,
"loss": 0.7968210577964783,
"step": 1304
},
{
"epoch": 2.741596638655462,
"grad_norm": 10.635434378996248,
"learning_rate": 2.2800700111813456e-07,
"loss": 0.40927547216415405,
"step": 1305
},
{
"epoch": 2.7436974789915967,
"grad_norm": 9.037444336220418,
"learning_rate": 2.2437194229638415e-07,
"loss": 0.23368996381759644,
"step": 1306
},
{
"epoch": 2.745798319327731,
"grad_norm": 128.54979302169804,
"learning_rate": 2.2076542806196588e-07,
"loss": 0.7368482351303101,
"step": 1307
},
{
"epoch": 2.7478991596638656,
"grad_norm": 8.202648198989193,
"learning_rate": 2.17187479971514e-07,
"loss": 0.29558128118515015,
"step": 1308
},
{
"epoch": 2.75,
"grad_norm": 6.885187685182277,
"learning_rate": 2.136381194109166e-07,
"loss": 0.2764503061771393,
"step": 1309
},
{
"epoch": 2.7521008403361344,
"grad_norm": 8.730199073100707,
"learning_rate": 2.1011736759519286e-07,
"loss": 0.3793492615222931,
"step": 1310
},
{
"epoch": 2.754201680672269,
"grad_norm": 8.631276466119623,
"learning_rate": 2.0662524556835982e-07,
"loss": 0.5927262902259827,
"step": 1311
},
{
"epoch": 2.7563025210084033,
"grad_norm": 12.625981527108426,
"learning_rate": 2.0316177420331375e-07,
"loss": 0.4284164607524872,
"step": 1312
},
{
"epoch": 2.758403361344538,
"grad_norm": 9.329175719292097,
"learning_rate": 1.997269742016994e-07,
"loss": 0.4722291827201843,
"step": 1313
},
{
"epoch": 2.7605042016806722,
"grad_norm": 17.532238777546283,
"learning_rate": 1.9632086609379041e-07,
"loss": 0.6627257466316223,
"step": 1314
},
{
"epoch": 2.7626050420168067,
"grad_norm": 9.92918276948977,
"learning_rate": 1.929434702383648e-07,
"loss": 0.42083340883255005,
"step": 1315
},
{
"epoch": 2.764705882352941,
"grad_norm": 9.589305807880846,
"learning_rate": 1.895948068225828e-07,
"loss": 0.39910781383514404,
"step": 1316
},
{
"epoch": 2.7668067226890756,
"grad_norm": 11.560419759358716,
"learning_rate": 1.862748958618682e-07,
"loss": 0.2765321731567383,
"step": 1317
},
{
"epoch": 2.76890756302521,
"grad_norm": 6.28366130869059,
"learning_rate": 1.8298375719978501e-07,
"loss": 0.08827929198741913,
"step": 1318
},
{
"epoch": 2.7710084033613445,
"grad_norm": 10.943149316905583,
"learning_rate": 1.797214105079248e-07,
"loss": 0.5753570795059204,
"step": 1319
},
{
"epoch": 2.773109243697479,
"grad_norm": 13.46621194548743,
"learning_rate": 1.7648787528578127e-07,
"loss": 0.7518602013587952,
"step": 1320
},
{
"epoch": 2.7752100840336134,
"grad_norm": 9.047171390898557,
"learning_rate": 1.732831708606425e-07,
"loss": 0.6446128487586975,
"step": 1321
},
{
"epoch": 2.777310924369748,
"grad_norm": 13.738460244304907,
"learning_rate": 1.7010731638746668e-07,
"loss": 0.4714201092720032,
"step": 1322
},
{
"epoch": 2.7794117647058822,
"grad_norm": 9.086358928536246,
"learning_rate": 1.669603308487755e-07,
"loss": 0.23203890025615692,
"step": 1323
},
{
"epoch": 2.7815126050420167,
"grad_norm": 25.798347784352785,
"learning_rate": 1.6384223305453417e-07,
"loss": 0.5102007389068604,
"step": 1324
},
{
"epoch": 2.783613445378151,
"grad_norm": 11.210651055014003,
"learning_rate": 1.6075304164204385e-07,
"loss": 0.45608770847320557,
"step": 1325
},
{
"epoch": 2.7857142857142856,
"grad_norm": 7.6934549954682465,
"learning_rate": 1.5769277507582725e-07,
"loss": 0.5190253257751465,
"step": 1326
},
{
"epoch": 2.78781512605042,
"grad_norm": 13.485529024983622,
"learning_rate": 1.5466145164751977e-07,
"loss": 0.5670579075813293,
"step": 1327
},
{
"epoch": 2.7899159663865545,
"grad_norm": 8.323766066498216,
"learning_rate": 1.5165908947575914e-07,
"loss": 0.4676046073436737,
"step": 1328
},
{
"epoch": 2.792016806722689,
"grad_norm": 11.23683105022603,
"learning_rate": 1.4868570650607816e-07,
"loss": 0.2914016544818878,
"step": 1329
},
{
"epoch": 2.7941176470588234,
"grad_norm": 17.258281880666775,
"learning_rate": 1.4574132051079658e-07,
"loss": 1.312021017074585,
"step": 1330
},
{
"epoch": 2.796218487394958,
"grad_norm": 9.249285303088671,
"learning_rate": 1.4282594908891666e-07,
"loss": 0.3117330074310303,
"step": 1331
},
{
"epoch": 2.7983193277310923,
"grad_norm": 9.999090012754882,
"learning_rate": 1.3993960966601328e-07,
"loss": 0.2705899775028229,
"step": 1332
},
{
"epoch": 2.8004201680672267,
"grad_norm": 9.383011281190877,
"learning_rate": 1.3708231949413676e-07,
"loss": 0.2621600031852722,
"step": 1333
},
{
"epoch": 2.802521008403361,
"grad_norm": 11.854334740139995,
"learning_rate": 1.342540956517041e-07,
"loss": 0.40849626064300537,
"step": 1334
},
{
"epoch": 2.8046218487394956,
"grad_norm": 10.05169136975745,
"learning_rate": 1.3145495504339856e-07,
"loss": 0.2958400845527649,
"step": 1335
},
{
"epoch": 2.80672268907563,
"grad_norm": 13.23608329431821,
"learning_rate": 1.2868491440007015e-07,
"loss": 0.7148715257644653,
"step": 1336
},
{
"epoch": 2.8088235294117645,
"grad_norm": 11.059610717001991,
"learning_rate": 1.2594399027863302e-07,
"loss": 0.5344212055206299,
"step": 1337
},
{
"epoch": 2.810924369747899,
"grad_norm": 9.850144807315097,
"learning_rate": 1.232321990619695e-07,
"loss": 0.3390062749385834,
"step": 1338
},
{
"epoch": 2.8130252100840334,
"grad_norm": 12.782218028007712,
"learning_rate": 1.205495569588283e-07,
"loss": 0.6602462530136108,
"step": 1339
},
{
"epoch": 2.815126050420168,
"grad_norm": 9.16202056000073,
"learning_rate": 1.1789608000373209e-07,
"loss": 0.2165951430797577,
"step": 1340
},
{
"epoch": 2.8172268907563023,
"grad_norm": 9.639282432785762,
"learning_rate": 1.1527178405687845e-07,
"loss": 0.33785703778266907,
"step": 1341
},
{
"epoch": 2.8193277310924367,
"grad_norm": 16.018225078825093,
"learning_rate": 1.1267668480404559e-07,
"loss": 0.49403730034828186,
"step": 1342
},
{
"epoch": 2.821428571428571,
"grad_norm": 13.934169182843426,
"learning_rate": 1.1011079775649969e-07,
"loss": 0.5875406265258789,
"step": 1343
},
{
"epoch": 2.8235294117647056,
"grad_norm": 10.314949862812936,
"learning_rate": 1.0757413825090212e-07,
"loss": 0.4375740885734558,
"step": 1344
},
{
"epoch": 2.82563025210084,
"grad_norm": 11.258449104141572,
"learning_rate": 1.0506672144921515e-07,
"loss": 0.6797425746917725,
"step": 1345
},
{
"epoch": 2.8277310924369745,
"grad_norm": 10.94330965699598,
"learning_rate": 1.0258856233861524e-07,
"loss": 0.36085596680641174,
"step": 1346
},
{
"epoch": 2.8298319327731094,
"grad_norm": 9.174555872255727,
"learning_rate": 1.0013967573140216e-07,
"loss": 0.43387356400489807,
"step": 1347
},
{
"epoch": 2.831932773109244,
"grad_norm": 11.299994940843328,
"learning_rate": 9.77200762649072e-08,
"loss": 0.44897180795669556,
"step": 1348
},
{
"epoch": 2.8340336134453783,
"grad_norm": 10.295685178570979,
"learning_rate": 9.532977840141123e-08,
"loss": 0.22422294318675995,
"step": 1349
},
{
"epoch": 2.8361344537815127,
"grad_norm": 8.127889054633478,
"learning_rate": 9.29687964280529e-08,
"loss": 0.642038106918335,
"step": 1350
},
{
"epoch": 2.838235294117647,
"grad_norm": 11.9364629190832,
"learning_rate": 9.063714445674776e-08,
"loss": 0.8069763779640198,
"step": 1351
},
{
"epoch": 2.8403361344537816,
"grad_norm": 11.63460546823257,
"learning_rate": 8.833483642410101e-08,
"loss": 0.36828362941741943,
"step": 1352
},
{
"epoch": 2.842436974789916,
"grad_norm": 10.042654306225293,
"learning_rate": 8.606188609132593e-08,
"loss": 0.3019287586212158,
"step": 1353
},
{
"epoch": 2.8445378151260505,
"grad_norm": 10.265644800483537,
"learning_rate": 8.381830704415839e-08,
"loss": 0.8440870046615601,
"step": 1354
},
{
"epoch": 2.846638655462185,
"grad_norm": 11.67889502498505,
"learning_rate": 8.160411269278079e-08,
"loss": 2.0406436920166016,
"step": 1355
},
{
"epoch": 2.8487394957983194,
"grad_norm": 11.659752416837614,
"learning_rate": 7.941931627173827e-08,
"loss": 0.23328936100006104,
"step": 1356
},
{
"epoch": 2.850840336134454,
"grad_norm": 14.52369655098527,
"learning_rate": 7.726393083985929e-08,
"loss": 0.552147626876831,
"step": 1357
},
{
"epoch": 2.8529411764705883,
"grad_norm": 8.467901286703713,
"learning_rate": 7.513796928018069e-08,
"loss": 0.38458627462387085,
"step": 1358
},
{
"epoch": 2.8550420168067228,
"grad_norm": 7.6124021321848,
"learning_rate": 7.30414442998667e-08,
"loss": 0.3594217300415039,
"step": 1359
},
{
"epoch": 2.857142857142857,
"grad_norm": 12.272607778978339,
"learning_rate": 7.097436843013783e-08,
"loss": 0.5628789067268372,
"step": 1360
},
{
"epoch": 2.8592436974789917,
"grad_norm": 9.345724512814346,
"learning_rate": 6.893675402618982e-08,
"loss": 0.7206631898880005,
"step": 1361
},
{
"epoch": 2.861344537815126,
"grad_norm": 13.88429967852116,
"learning_rate": 6.692861326712652e-08,
"loss": 0.8038681745529175,
"step": 1362
},
{
"epoch": 2.8634453781512605,
"grad_norm": 15.55581945591023,
"learning_rate": 6.494995815588101e-08,
"loss": 0.7214268445968628,
"step": 1363
},
{
"epoch": 2.865546218487395,
"grad_norm": 10.920690128080313,
"learning_rate": 6.300080051914792e-08,
"loss": 0.3757812976837158,
"step": 1364
},
{
"epoch": 2.8676470588235294,
"grad_norm": 9.02181459032139,
"learning_rate": 6.108115200731069e-08,
"loss": 0.7154731154441833,
"step": 1365
},
{
"epoch": 2.869747899159664,
"grad_norm": 12.825891467379778,
"learning_rate": 5.9191024094374384e-08,
"loss": 0.6805951595306396,
"step": 1366
},
{
"epoch": 2.8718487394957983,
"grad_norm": 10.058469083040828,
"learning_rate": 5.7330428077893575e-08,
"loss": 0.41078895330429077,
"step": 1367
},
{
"epoch": 2.8739495798319328,
"grad_norm": 8.36551151872813,
"learning_rate": 5.5499375078906793e-08,
"loss": 0.35648801922798157,
"step": 1368
},
{
"epoch": 2.8760504201680672,
"grad_norm": 18.110033778975207,
"learning_rate": 5.369787604186993e-08,
"loss": 0.3897348642349243,
"step": 1369
},
{
"epoch": 2.8781512605042017,
"grad_norm": 8.750407331993259,
"learning_rate": 5.192594173459242e-08,
"loss": 0.613540530204773,
"step": 1370
},
{
"epoch": 2.880252100840336,
"grad_norm": 8.528139704622195,
"learning_rate": 5.018358274816892e-08,
"loss": 0.4445531964302063,
"step": 1371
},
{
"epoch": 2.8823529411764706,
"grad_norm": 11.859807560110708,
"learning_rate": 4.847080949691996e-08,
"loss": 0.5488522052764893,
"step": 1372
},
{
"epoch": 2.884453781512605,
"grad_norm": 10.23407853457865,
"learning_rate": 4.6787632218326385e-08,
"loss": 0.5596367716789246,
"step": 1373
},
{
"epoch": 2.8865546218487395,
"grad_norm": 11.296256406092558,
"learning_rate": 4.513406097297224e-08,
"loss": 0.38018864393234253,
"step": 1374
},
{
"epoch": 2.888655462184874,
"grad_norm": 12.156451974202069,
"learning_rate": 4.351010564447977e-08,
"loss": 0.661139726638794,
"step": 1375
},
{
"epoch": 2.8907563025210083,
"grad_norm": 7.3170273092091485,
"learning_rate": 4.1915775939454506e-08,
"loss": 0.2944487929344177,
"step": 1376
},
{
"epoch": 2.892857142857143,
"grad_norm": 9.331145503425732,
"learning_rate": 4.035108138742416e-08,
"loss": 0.23486556112766266,
"step": 1377
},
{
"epoch": 2.8949579831932772,
"grad_norm": 11.863919468865829,
"learning_rate": 3.881603134078482e-08,
"loss": 0.3030620813369751,
"step": 1378
},
{
"epoch": 2.8970588235294117,
"grad_norm": 12.573022210864796,
"learning_rate": 3.731063497474152e-08,
"loss": 0.3213701546192169,
"step": 1379
},
{
"epoch": 2.899159663865546,
"grad_norm": 6.674326596763006,
"learning_rate": 3.583490128725553e-08,
"loss": 0.22970488667488098,
"step": 1380
},
{
"epoch": 2.9012605042016806,
"grad_norm": 13.577742623896349,
"learning_rate": 3.4388839098992154e-08,
"loss": 0.32231050729751587,
"step": 1381
},
{
"epoch": 2.903361344537815,
"grad_norm": 16.538926889757885,
"learning_rate": 3.2972457053262466e-08,
"loss": 0.9544304609298706,
"step": 1382
},
{
"epoch": 2.9054621848739495,
"grad_norm": 20.22528777991979,
"learning_rate": 3.158576361597887e-08,
"loss": 0.7788558006286621,
"step": 1383
},
{
"epoch": 2.907563025210084,
"grad_norm": 16.130951952781945,
"learning_rate": 3.022876707559796e-08,
"loss": 0.2601931393146515,
"step": 1384
},
{
"epoch": 2.9096638655462184,
"grad_norm": 13.778242873434662,
"learning_rate": 2.890147554307665e-08,
"loss": 0.5957424640655518,
"step": 1385
},
{
"epoch": 2.911764705882353,
"grad_norm": 10.295105075509534,
"learning_rate": 2.7603896951817755e-08,
"loss": 0.2927376925945282,
"step": 1386
},
{
"epoch": 2.9138655462184873,
"grad_norm": 18.743122797679717,
"learning_rate": 2.633603905762838e-08,
"loss": 0.5990405082702637,
"step": 1387
},
{
"epoch": 2.9159663865546217,
"grad_norm": 14.887502358752755,
"learning_rate": 2.5097909438669964e-08,
"loss": 0.4513130784034729,
"step": 1388
},
{
"epoch": 2.918067226890756,
"grad_norm": 9.564277783357335,
"learning_rate": 2.3889515495413297e-08,
"loss": 0.6215352416038513,
"step": 1389
},
{
"epoch": 2.9201680672268906,
"grad_norm": 5.003008688132311,
"learning_rate": 2.2710864450596336e-08,
"loss": 0.33804643154144287,
"step": 1390
},
{
"epoch": 2.9222689075630255,
"grad_norm": 9.060022958520825,
"learning_rate": 2.1561963349178704e-08,
"loss": 0.4266011416912079,
"step": 1391
},
{
"epoch": 2.92436974789916,
"grad_norm": 9.49932040181115,
"learning_rate": 2.0442819058300588e-08,
"loss": 0.3738781809806824,
"step": 1392
},
{
"epoch": 2.9264705882352944,
"grad_norm": 11.474699381578137,
"learning_rate": 1.935343826724112e-08,
"loss": 0.26019287109375,
"step": 1393
},
{
"epoch": 2.928571428571429,
"grad_norm": 9.426307307224148,
"learning_rate": 1.8293827487380623e-08,
"loss": 0.3799281120300293,
"step": 1394
},
{
"epoch": 2.9306722689075633,
"grad_norm": 12.711822585165105,
"learning_rate": 1.726399305215787e-08,
"loss": 0.25459083914756775,
"step": 1395
},
{
"epoch": 2.9327731092436977,
"grad_norm": 12.88439286989085,
"learning_rate": 1.626394111703622e-08,
"loss": 0.4746205806732178,
"step": 1396
},
{
"epoch": 2.934873949579832,
"grad_norm": 15.570716719123634,
"learning_rate": 1.5293677659463104e-08,
"loss": 0.4622001647949219,
"step": 1397
},
{
"epoch": 2.9369747899159666,
"grad_norm": 6.94964938645385,
"learning_rate": 1.4353208478837256e-08,
"loss": 0.18047931790351868,
"step": 1398
},
{
"epoch": 2.939075630252101,
"grad_norm": 10.000416990177895,
"learning_rate": 1.3442539196472647e-08,
"loss": 0.37007540464401245,
"step": 1399
},
{
"epoch": 2.9411764705882355,
"grad_norm": 8.931832554567432,
"learning_rate": 1.2561675255564621e-08,
"loss": 0.7158060073852539,
"step": 1400
},
{
"epoch": 2.94327731092437,
"grad_norm": 11.017696540531707,
"learning_rate": 1.1710621921159904e-08,
"loss": 0.9123420119285583,
"step": 1401
},
{
"epoch": 2.9453781512605044,
"grad_norm": 13.459702577958145,
"learning_rate": 1.0889384280119985e-08,
"loss": 1.1057486534118652,
"step": 1402
},
{
"epoch": 2.947478991596639,
"grad_norm": 11.44996683672279,
"learning_rate": 1.009796724109613e-08,
"loss": 0.36926376819610596,
"step": 1403
},
{
"epoch": 2.9495798319327733,
"grad_norm": 8.32543080681241,
"learning_rate": 9.336375534497732e-09,
"loss": 0.5240511298179626,
"step": 1404
},
{
"epoch": 2.9516806722689077,
"grad_norm": 16.41897161685657,
"learning_rate": 8.60461371246235e-09,
"loss": 1.0361064672470093,
"step": 1405
},
{
"epoch": 2.953781512605042,
"grad_norm": 15.488903163881536,
"learning_rate": 7.902686148831273e-09,
"loss": 0.7314852476119995,
"step": 1406
},
{
"epoch": 2.9558823529411766,
"grad_norm": 10.609488526695282,
"learning_rate": 7.230597039123433e-09,
"loss": 0.5929103493690491,
"step": 1407
},
{
"epoch": 2.957983193277311,
"grad_norm": 9.545516911394982,
"learning_rate": 6.588350400507093e-09,
"loss": 0.24979953467845917,
"step": 1408
},
{
"epoch": 2.9600840336134455,
"grad_norm": 16.112825448357878,
"learning_rate": 5.975950071779313e-09,
"loss": 0.810958206653595,
"step": 1409
},
{
"epoch": 2.96218487394958,
"grad_norm": 8.6763799711489,
"learning_rate": 5.393399713341518e-09,
"loss": 0.4567590355873108,
"step": 1410
},
{
"epoch": 2.9642857142857144,
"grad_norm": 10.068385761335634,
"learning_rate": 4.8407028071773e-09,
"loss": 0.34989726543426514,
"step": 1411
},
{
"epoch": 2.966386554621849,
"grad_norm": 11.30284408928835,
"learning_rate": 4.317862656831873e-09,
"loss": 0.3826170563697815,
"step": 1412
},
{
"epoch": 2.9684873949579833,
"grad_norm": 13.404023548287954,
"learning_rate": 3.8248823873932026e-09,
"loss": 0.25103145837783813,
"step": 1413
},
{
"epoch": 2.9705882352941178,
"grad_norm": 10.423177049027613,
"learning_rate": 3.361764945473134e-09,
"loss": 0.33963871002197266,
"step": 1414
},
{
"epoch": 2.972689075630252,
"grad_norm": 9.701343246515489,
"learning_rate": 2.928513099187402e-09,
"loss": 0.5596168637275696,
"step": 1415
},
{
"epoch": 2.9747899159663866,
"grad_norm": 8.852327800983687,
"learning_rate": 2.52512943814176e-09,
"loss": 0.3114224374294281,
"step": 1416
},
{
"epoch": 2.976890756302521,
"grad_norm": 8.400624424787871,
"learning_rate": 2.151616373417542e-09,
"loss": 0.5350728631019592,
"step": 1417
},
{
"epoch": 2.9789915966386555,
"grad_norm": 10.794481012917993,
"learning_rate": 1.8079761375522365e-09,
"loss": 0.6644730567932129,
"step": 1418
},
{
"epoch": 2.98109243697479,
"grad_norm": 9.282496929164791,
"learning_rate": 1.4942107845317132e-09,
"loss": 0.2426847219467163,
"step": 1419
},
{
"epoch": 2.9831932773109244,
"grad_norm": 9.113139352861424,
"learning_rate": 1.210322189774682e-09,
"loss": 0.2127893567085266,
"step": 1420
},
{
"epoch": 2.985294117647059,
"grad_norm": 10.249299245135052,
"learning_rate": 9.563120501221434e-10,
"loss": 0.31507742404937744,
"step": 1421
},
{
"epoch": 2.9873949579831933,
"grad_norm": 10.643798027898825,
"learning_rate": 7.321818838279537e-10,
"loss": 0.49292629957199097,
"step": 1422
},
{
"epoch": 2.9894957983193278,
"grad_norm": 7.762285292055822,
"learning_rate": 5.379330305488317e-10,
"loss": 0.25357064604759216,
"step": 1423
},
{
"epoch": 2.991596638655462,
"grad_norm": 7.423869479037056,
"learning_rate": 3.735666513371428e-10,
"loss": 0.4229947328567505,
"step": 1424
},
{
"epoch": 2.9936974789915967,
"grad_norm": 10.794639890750766,
"learning_rate": 2.3908372863368223e-10,
"loss": 0.5679960250854492,
"step": 1425
},
{
"epoch": 2.995798319327731,
"grad_norm": 9.814210260546373,
"learning_rate": 1.344850662604591e-10,
"loss": 0.3406621515750885,
"step": 1426
},
{
"epoch": 2.9978991596638656,
"grad_norm": 7.52747077028302,
"learning_rate": 5.977128941903055e-11,
"loss": 0.3986052870750427,
"step": 1427
},
{
"epoch": 3.0,
"grad_norm": 8.379236977666347,
"learning_rate": 1.494284468384066e-11,
"loss": 0.49183082580566406,
"step": 1428
},
{
"epoch": 3.0,
"step": 1428,
"total_flos": 3902317486080.0,
"train_loss": 1.6620939874066776,
"train_runtime": 1853.0016,
"train_samples_per_second": 3.081,
"train_steps_per_second": 0.771
}
],
"logging_steps": 1,
"max_steps": 1428,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3902317486080.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}