10040 lines
258 KiB
JSON
10040 lines
258 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1428,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0021008403361344537,
|
|
"grad_norm": 46.99030458326992,
|
|
"learning_rate": 0.0,
|
|
"loss": 4.722589492797852,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.004201680672268907,
|
|
"grad_norm": 36.88474864262765,
|
|
"learning_rate": 6.993006993006993e-08,
|
|
"loss": 3.9686050415039062,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0063025210084033615,
|
|
"grad_norm": 33.07457670969746,
|
|
"learning_rate": 1.3986013986013987e-07,
|
|
"loss": 4.086915969848633,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.008403361344537815,
|
|
"grad_norm": 31.625791708920385,
|
|
"learning_rate": 2.097902097902098e-07,
|
|
"loss": 4.020754814147949,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.01050420168067227,
|
|
"grad_norm": 43.0872077594366,
|
|
"learning_rate": 2.7972027972027973e-07,
|
|
"loss": 4.11073112487793,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.012605042016806723,
|
|
"grad_norm": 41.05815920768766,
|
|
"learning_rate": 3.496503496503497e-07,
|
|
"loss": 4.068889141082764,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.014705882352941176,
|
|
"grad_norm": 38.60187500165403,
|
|
"learning_rate": 4.195804195804196e-07,
|
|
"loss": 3.9590301513671875,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.01680672268907563,
|
|
"grad_norm": 39.6727205000689,
|
|
"learning_rate": 4.895104895104896e-07,
|
|
"loss": 3.7929787635803223,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.018907563025210083,
|
|
"grad_norm": 35.21152216885091,
|
|
"learning_rate": 5.594405594405595e-07,
|
|
"loss": 4.345971584320068,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.02100840336134454,
|
|
"grad_norm": 32.94311862999745,
|
|
"learning_rate": 6.293706293706295e-07,
|
|
"loss": 4.204797744750977,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.023109243697478993,
|
|
"grad_norm": 35.708967266069514,
|
|
"learning_rate": 6.993006993006994e-07,
|
|
"loss": 4.2924957275390625,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.025210084033613446,
|
|
"grad_norm": 35.068164747297715,
|
|
"learning_rate": 7.692307692307694e-07,
|
|
"loss": 3.6519017219543457,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.0273109243697479,
|
|
"grad_norm": 36.012069511225576,
|
|
"learning_rate": 8.391608391608393e-07,
|
|
"loss": 3.5902950763702393,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.029411764705882353,
|
|
"grad_norm": 31.883494522724174,
|
|
"learning_rate": 9.090909090909091e-07,
|
|
"loss": 3.751192092895508,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.031512605042016806,
|
|
"grad_norm": 28.67530148457089,
|
|
"learning_rate": 9.790209790209791e-07,
|
|
"loss": 4.330526351928711,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.03361344537815126,
|
|
"grad_norm": 36.87930031460684,
|
|
"learning_rate": 1.0489510489510491e-06,
|
|
"loss": 3.7747349739074707,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.03571428571428571,
|
|
"grad_norm": 34.94283148044396,
|
|
"learning_rate": 1.118881118881119e-06,
|
|
"loss": 3.6174468994140625,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.037815126050420166,
|
|
"grad_norm": 26.489554272646977,
|
|
"learning_rate": 1.188811188811189e-06,
|
|
"loss": 3.4348971843719482,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.03991596638655462,
|
|
"grad_norm": 25.603886661513403,
|
|
"learning_rate": 1.258741258741259e-06,
|
|
"loss": 3.4862470626831055,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.04201680672268908,
|
|
"grad_norm": 22.404814809342252,
|
|
"learning_rate": 1.3286713286713287e-06,
|
|
"loss": 3.7471625804901123,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.04411764705882353,
|
|
"grad_norm": 21.15277638751192,
|
|
"learning_rate": 1.3986013986013987e-06,
|
|
"loss": 3.6562182903289795,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.046218487394957986,
|
|
"grad_norm": 20.270881263670095,
|
|
"learning_rate": 1.4685314685314685e-06,
|
|
"loss": 2.944753408432007,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.04831932773109244,
|
|
"grad_norm": 22.812966816264836,
|
|
"learning_rate": 1.5384615384615387e-06,
|
|
"loss": 4.277539253234863,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.05042016806722689,
|
|
"grad_norm": 21.380979723581284,
|
|
"learning_rate": 1.6083916083916085e-06,
|
|
"loss": 4.103379726409912,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.052521008403361345,
|
|
"grad_norm": 13.548338819677783,
|
|
"learning_rate": 1.6783216783216785e-06,
|
|
"loss": 3.516192674636841,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0546218487394958,
|
|
"grad_norm": 16.574184790133323,
|
|
"learning_rate": 1.7482517482517483e-06,
|
|
"loss": 3.054426908493042,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.05672268907563025,
|
|
"grad_norm": 14.817846975349166,
|
|
"learning_rate": 1.8181818181818183e-06,
|
|
"loss": 3.598344564437866,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.058823529411764705,
|
|
"grad_norm": 13.230202987729585,
|
|
"learning_rate": 1.888111888111888e-06,
|
|
"loss": 2.1566905975341797,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.06092436974789916,
|
|
"grad_norm": 19.612310968262104,
|
|
"learning_rate": 1.9580419580419583e-06,
|
|
"loss": 2.7493889331817627,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.06302521008403361,
|
|
"grad_norm": 18.334666543367657,
|
|
"learning_rate": 2.027972027972028e-06,
|
|
"loss": 3.7484190464019775,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.06512605042016807,
|
|
"grad_norm": 15.812972082251932,
|
|
"learning_rate": 2.0979020979020983e-06,
|
|
"loss": 3.4763312339782715,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.06722689075630252,
|
|
"grad_norm": 18.340243585590446,
|
|
"learning_rate": 2.167832167832168e-06,
|
|
"loss": 4.1537184715271,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.06932773109243698,
|
|
"grad_norm": 9.470566142580898,
|
|
"learning_rate": 2.237762237762238e-06,
|
|
"loss": 3.949978828430176,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.07142857142857142,
|
|
"grad_norm": 15.047162012043515,
|
|
"learning_rate": 2.307692307692308e-06,
|
|
"loss": 3.095123291015625,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.07352941176470588,
|
|
"grad_norm": 15.619490386855553,
|
|
"learning_rate": 2.377622377622378e-06,
|
|
"loss": 3.6232047080993652,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.07563025210084033,
|
|
"grad_norm": 12.996399577415676,
|
|
"learning_rate": 2.4475524475524477e-06,
|
|
"loss": 3.801804304122925,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.07773109243697479,
|
|
"grad_norm": 7.924270548246447,
|
|
"learning_rate": 2.517482517482518e-06,
|
|
"loss": 2.909287452697754,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.07983193277310924,
|
|
"grad_norm": 10.838167134028488,
|
|
"learning_rate": 2.5874125874125877e-06,
|
|
"loss": 3.283078670501709,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.0819327731092437,
|
|
"grad_norm": 12.236334826312302,
|
|
"learning_rate": 2.6573426573426574e-06,
|
|
"loss": 2.9224965572357178,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.08403361344537816,
|
|
"grad_norm": 10.59808836361908,
|
|
"learning_rate": 2.7272727272727272e-06,
|
|
"loss": 3.591977119445801,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.0861344537815126,
|
|
"grad_norm": 9.295669805450128,
|
|
"learning_rate": 2.7972027972027974e-06,
|
|
"loss": 3.0213565826416016,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.08823529411764706,
|
|
"grad_norm": 12.563423620415891,
|
|
"learning_rate": 2.8671328671328672e-06,
|
|
"loss": 2.9183509349823,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.09033613445378151,
|
|
"grad_norm": 13.858660538396043,
|
|
"learning_rate": 2.937062937062937e-06,
|
|
"loss": 3.4748919010162354,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.09243697478991597,
|
|
"grad_norm": 11.043040598415395,
|
|
"learning_rate": 3.006993006993007e-06,
|
|
"loss": 3.53951096534729,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.09453781512605042,
|
|
"grad_norm": 12.201335477546305,
|
|
"learning_rate": 3.0769230769230774e-06,
|
|
"loss": 3.2075607776641846,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.09663865546218488,
|
|
"grad_norm": 20.281483402633803,
|
|
"learning_rate": 3.1468531468531472e-06,
|
|
"loss": 3.2893571853637695,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.09873949579831932,
|
|
"grad_norm": 10.008468666819498,
|
|
"learning_rate": 3.216783216783217e-06,
|
|
"loss": 3.47295880317688,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.10084033613445378,
|
|
"grad_norm": 11.407538553004894,
|
|
"learning_rate": 3.286713286713287e-06,
|
|
"loss": 3.5495269298553467,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.10294117647058823,
|
|
"grad_norm": 16.252417297798132,
|
|
"learning_rate": 3.356643356643357e-06,
|
|
"loss": 3.218782901763916,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.10504201680672269,
|
|
"grad_norm": 8.752146553121406,
|
|
"learning_rate": 3.426573426573427e-06,
|
|
"loss": 2.612854242324829,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.10714285714285714,
|
|
"grad_norm": 14.782692853689836,
|
|
"learning_rate": 3.4965034965034966e-06,
|
|
"loss": 3.0805444717407227,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.1092436974789916,
|
|
"grad_norm": 14.664178996815842,
|
|
"learning_rate": 3.566433566433567e-06,
|
|
"loss": 3.1539719104766846,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.11134453781512606,
|
|
"grad_norm": 13.158498079025986,
|
|
"learning_rate": 3.6363636363636366e-06,
|
|
"loss": 3.5745811462402344,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.1134453781512605,
|
|
"grad_norm": 9.661944205457672,
|
|
"learning_rate": 3.7062937062937064e-06,
|
|
"loss": 3.033264398574829,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.11554621848739496,
|
|
"grad_norm": 8.534767379388418,
|
|
"learning_rate": 3.776223776223776e-06,
|
|
"loss": 2.5727319717407227,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.11764705882352941,
|
|
"grad_norm": 10.446726865588245,
|
|
"learning_rate": 3.846153846153847e-06,
|
|
"loss": 3.4801394939422607,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.11974789915966387,
|
|
"grad_norm": 11.510497882977212,
|
|
"learning_rate": 3.916083916083917e-06,
|
|
"loss": 3.253239631652832,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.12184873949579832,
|
|
"grad_norm": 12.480969279334285,
|
|
"learning_rate": 3.986013986013986e-06,
|
|
"loss": 3.0049266815185547,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.12394957983193278,
|
|
"grad_norm": 10.926998541566615,
|
|
"learning_rate": 4.055944055944056e-06,
|
|
"loss": 3.13586688041687,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.12605042016806722,
|
|
"grad_norm": 15.080151132986066,
|
|
"learning_rate": 4.125874125874127e-06,
|
|
"loss": 3.5970468521118164,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.12815126050420167,
|
|
"grad_norm": 11.40302094802426,
|
|
"learning_rate": 4.195804195804197e-06,
|
|
"loss": 3.0423130989074707,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.13025210084033614,
|
|
"grad_norm": 27.877774734458356,
|
|
"learning_rate": 4.265734265734266e-06,
|
|
"loss": 3.270495891571045,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.1323529411764706,
|
|
"grad_norm": 14.517043785366944,
|
|
"learning_rate": 4.335664335664336e-06,
|
|
"loss": 3.3109726905822754,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.13445378151260504,
|
|
"grad_norm": 9.703645186786849,
|
|
"learning_rate": 4.405594405594406e-06,
|
|
"loss": 2.8192973136901855,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.13655462184873948,
|
|
"grad_norm": 7.165620671720677,
|
|
"learning_rate": 4.475524475524476e-06,
|
|
"loss": 2.6368956565856934,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.13865546218487396,
|
|
"grad_norm": 8.390508554521247,
|
|
"learning_rate": 4.5454545454545455e-06,
|
|
"loss": 3.2420871257781982,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.1407563025210084,
|
|
"grad_norm": 13.495244668273582,
|
|
"learning_rate": 4.615384615384616e-06,
|
|
"loss": 3.4662106037139893,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.14285714285714285,
|
|
"grad_norm": 8.727556576037161,
|
|
"learning_rate": 4.685314685314686e-06,
|
|
"loss": 2.52485728263855,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.14495798319327732,
|
|
"grad_norm": 7.972842185352863,
|
|
"learning_rate": 4.755244755244756e-06,
|
|
"loss": 2.94364595413208,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.14705882352941177,
|
|
"grad_norm": 9.543376366698592,
|
|
"learning_rate": 4.8251748251748255e-06,
|
|
"loss": 3.080875873565674,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.14915966386554622,
|
|
"grad_norm": 9.776294776088129,
|
|
"learning_rate": 4.895104895104895e-06,
|
|
"loss": 2.779900550842285,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.15126050420168066,
|
|
"grad_norm": 13.30903798143632,
|
|
"learning_rate": 4.965034965034965e-06,
|
|
"loss": 2.5541608333587646,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.15336134453781514,
|
|
"grad_norm": 15.821626595005261,
|
|
"learning_rate": 5.034965034965036e-06,
|
|
"loss": 3.3032145500183105,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.15546218487394958,
|
|
"grad_norm": 16.2233191932233,
|
|
"learning_rate": 5.1048951048951055e-06,
|
|
"loss": 3.302570104598999,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.15756302521008403,
|
|
"grad_norm": 12.108052548372182,
|
|
"learning_rate": 5.174825174825175e-06,
|
|
"loss": 3.084743022918701,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.15966386554621848,
|
|
"grad_norm": 12.123207907469205,
|
|
"learning_rate": 5.244755244755245e-06,
|
|
"loss": 2.839994430541992,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.16176470588235295,
|
|
"grad_norm": 27.162201978657112,
|
|
"learning_rate": 5.314685314685315e-06,
|
|
"loss": 2.8261585235595703,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.1638655462184874,
|
|
"grad_norm": 9.26220027446702,
|
|
"learning_rate": 5.384615384615385e-06,
|
|
"loss": 3.022369861602783,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.16596638655462184,
|
|
"grad_norm": 34.01052504369158,
|
|
"learning_rate": 5.4545454545454545e-06,
|
|
"loss": 3.11270809173584,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.16806722689075632,
|
|
"grad_norm": 10.318191696420305,
|
|
"learning_rate": 5.524475524475524e-06,
|
|
"loss": 2.8419973850250244,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.17016806722689076,
|
|
"grad_norm": 8.227880656419073,
|
|
"learning_rate": 5.594405594405595e-06,
|
|
"loss": 3.14296555519104,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.1722689075630252,
|
|
"grad_norm": 9.40271889928186,
|
|
"learning_rate": 5.664335664335665e-06,
|
|
"loss": 2.8033950328826904,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.17436974789915966,
|
|
"grad_norm": 9.95443701525972,
|
|
"learning_rate": 5.7342657342657345e-06,
|
|
"loss": 3.087614059448242,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.17647058823529413,
|
|
"grad_norm": 9.899264562788812,
|
|
"learning_rate": 5.804195804195804e-06,
|
|
"loss": 2.7504851818084717,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.17857142857142858,
|
|
"grad_norm": 14.065492890913543,
|
|
"learning_rate": 5.874125874125874e-06,
|
|
"loss": 2.701443672180176,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.18067226890756302,
|
|
"grad_norm": 12.602747808400954,
|
|
"learning_rate": 5.944055944055944e-06,
|
|
"loss": 2.8965351581573486,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.18277310924369747,
|
|
"grad_norm": 10.478287423381614,
|
|
"learning_rate": 6.013986013986014e-06,
|
|
"loss": 2.9607667922973633,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.18487394957983194,
|
|
"grad_norm": 25.168903954415445,
|
|
"learning_rate": 6.083916083916085e-06,
|
|
"loss": 3.2360849380493164,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.1869747899159664,
|
|
"grad_norm": 9.706474901305377,
|
|
"learning_rate": 6.153846153846155e-06,
|
|
"loss": 3.146829605102539,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.18907563025210083,
|
|
"grad_norm": 16.976357238619705,
|
|
"learning_rate": 6.223776223776225e-06,
|
|
"loss": 3.017669200897217,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.19117647058823528,
|
|
"grad_norm": 22.76924701111939,
|
|
"learning_rate": 6.2937062937062944e-06,
|
|
"loss": 3.4739527702331543,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.19327731092436976,
|
|
"grad_norm": 10.061400086768733,
|
|
"learning_rate": 6.363636363636364e-06,
|
|
"loss": 2.8482136726379395,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.1953781512605042,
|
|
"grad_norm": 12.711145684012218,
|
|
"learning_rate": 6.433566433566434e-06,
|
|
"loss": 2.7700202465057373,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.19747899159663865,
|
|
"grad_norm": 19.156479556141598,
|
|
"learning_rate": 6.503496503496504e-06,
|
|
"loss": 3.109806537628174,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.19957983193277312,
|
|
"grad_norm": 7.981944912040402,
|
|
"learning_rate": 6.573426573426574e-06,
|
|
"loss": 2.9637131690979004,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.20168067226890757,
|
|
"grad_norm": 12.84352715723152,
|
|
"learning_rate": 6.643356643356644e-06,
|
|
"loss": 2.846522808074951,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.20378151260504201,
|
|
"grad_norm": 17.72741270084134,
|
|
"learning_rate": 6.713286713286714e-06,
|
|
"loss": 3.3485140800476074,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.20588235294117646,
|
|
"grad_norm": 13.533003488049717,
|
|
"learning_rate": 6.783216783216784e-06,
|
|
"loss": 3.014303207397461,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.20798319327731093,
|
|
"grad_norm": 8.522856642426069,
|
|
"learning_rate": 6.853146853146854e-06,
|
|
"loss": 2.6768596172332764,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.21008403361344538,
|
|
"grad_norm": 28.089483697370316,
|
|
"learning_rate": 6.923076923076923e-06,
|
|
"loss": 2.9336276054382324,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.21218487394957983,
|
|
"grad_norm": 10.595985912398088,
|
|
"learning_rate": 6.993006993006993e-06,
|
|
"loss": 3.103717803955078,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.21428571428571427,
|
|
"grad_norm": 9.980132596619391,
|
|
"learning_rate": 7.062937062937063e-06,
|
|
"loss": 2.7759556770324707,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.21638655462184875,
|
|
"grad_norm": 9.86026405652693,
|
|
"learning_rate": 7.132867132867134e-06,
|
|
"loss": 2.586292266845703,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.2184873949579832,
|
|
"grad_norm": 15.473317115776915,
|
|
"learning_rate": 7.202797202797203e-06,
|
|
"loss": 3.109880208969116,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.22058823529411764,
|
|
"grad_norm": 12.647734541011893,
|
|
"learning_rate": 7.272727272727273e-06,
|
|
"loss": 2.7075915336608887,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.22268907563025211,
|
|
"grad_norm": 13.506863668083897,
|
|
"learning_rate": 7.342657342657343e-06,
|
|
"loss": 3.034566879272461,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.22478991596638656,
|
|
"grad_norm": 10.413965863492892,
|
|
"learning_rate": 7.412587412587413e-06,
|
|
"loss": 2.3479254245758057,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.226890756302521,
|
|
"grad_norm": 11.563038534176888,
|
|
"learning_rate": 7.4825174825174825e-06,
|
|
"loss": 2.9856462478637695,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.22899159663865545,
|
|
"grad_norm": 12.710111708974129,
|
|
"learning_rate": 7.552447552447552e-06,
|
|
"loss": 3.462696075439453,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.23109243697478993,
|
|
"grad_norm": 11.076816381042432,
|
|
"learning_rate": 7.622377622377622e-06,
|
|
"loss": 3.270888566970825,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.23319327731092437,
|
|
"grad_norm": 11.212503376143607,
|
|
"learning_rate": 7.692307692307694e-06,
|
|
"loss": 3.048227310180664,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.23529411764705882,
|
|
"grad_norm": 12.400573941878047,
|
|
"learning_rate": 7.762237762237763e-06,
|
|
"loss": 3.2194204330444336,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.23739495798319327,
|
|
"grad_norm": 15.219804312233611,
|
|
"learning_rate": 7.832167832167833e-06,
|
|
"loss": 3.6809778213500977,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.23949579831932774,
|
|
"grad_norm": 14.217254787332546,
|
|
"learning_rate": 7.902097902097902e-06,
|
|
"loss": 2.912044048309326,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.2415966386554622,
|
|
"grad_norm": 16.623326169718574,
|
|
"learning_rate": 7.972027972027973e-06,
|
|
"loss": 3.1021275520324707,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.24369747899159663,
|
|
"grad_norm": 9.18687643151976,
|
|
"learning_rate": 8.041958041958042e-06,
|
|
"loss": 3.089829683303833,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.24579831932773108,
|
|
"grad_norm": 16.772079088582387,
|
|
"learning_rate": 8.111888111888112e-06,
|
|
"loss": 3.4016504287719727,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.24789915966386555,
|
|
"grad_norm": 68.62004336442155,
|
|
"learning_rate": 8.181818181818183e-06,
|
|
"loss": 3.502598285675049,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 9.091493777708147,
|
|
"learning_rate": 8.251748251748254e-06,
|
|
"loss": 3.0750184059143066,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.25210084033613445,
|
|
"grad_norm": 13.607326745790957,
|
|
"learning_rate": 8.321678321678323e-06,
|
|
"loss": 2.8168656826019287,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.2542016806722689,
|
|
"grad_norm": 8.683002515816812,
|
|
"learning_rate": 8.391608391608393e-06,
|
|
"loss": 2.3565826416015625,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.25630252100840334,
|
|
"grad_norm": 23.36617403509855,
|
|
"learning_rate": 8.461538461538462e-06,
|
|
"loss": 2.972810745239258,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.25840336134453784,
|
|
"grad_norm": 8.174637732136828,
|
|
"learning_rate": 8.531468531468533e-06,
|
|
"loss": 2.9700140953063965,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.2605042016806723,
|
|
"grad_norm": 10.47900789596826,
|
|
"learning_rate": 8.601398601398602e-06,
|
|
"loss": 3.0162484645843506,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.26260504201680673,
|
|
"grad_norm": 16.076782876444586,
|
|
"learning_rate": 8.671328671328672e-06,
|
|
"loss": 3.109422445297241,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.2647058823529412,
|
|
"grad_norm": 33.666522199585756,
|
|
"learning_rate": 8.741258741258743e-06,
|
|
"loss": 2.6802124977111816,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.2668067226890756,
|
|
"grad_norm": 12.537622047835336,
|
|
"learning_rate": 8.811188811188812e-06,
|
|
"loss": 2.6609840393066406,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.2689075630252101,
|
|
"grad_norm": 11.767487098574284,
|
|
"learning_rate": 8.881118881118883e-06,
|
|
"loss": 3.0896430015563965,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.2710084033613445,
|
|
"grad_norm": 7.737680571917604,
|
|
"learning_rate": 8.951048951048951e-06,
|
|
"loss": 3.2370247840881348,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.27310924369747897,
|
|
"grad_norm": 13.8395022595692,
|
|
"learning_rate": 9.020979020979022e-06,
|
|
"loss": 2.8461947441101074,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.27521008403361347,
|
|
"grad_norm": 27.87627626250655,
|
|
"learning_rate": 9.090909090909091e-06,
|
|
"loss": 3.480252742767334,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.2773109243697479,
|
|
"grad_norm": 8.067136701179228,
|
|
"learning_rate": 9.160839160839162e-06,
|
|
"loss": 2.8424923419952393,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.27941176470588236,
|
|
"grad_norm": 12.474203656062087,
|
|
"learning_rate": 9.230769230769232e-06,
|
|
"loss": 3.4489340782165527,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.2815126050420168,
|
|
"grad_norm": 9.56092760411321,
|
|
"learning_rate": 9.300699300699301e-06,
|
|
"loss": 2.48683500289917,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.28361344537815125,
|
|
"grad_norm": 10.45857911102664,
|
|
"learning_rate": 9.370629370629372e-06,
|
|
"loss": 2.975668430328369,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 9.79706691198192,
|
|
"learning_rate": 9.44055944055944e-06,
|
|
"loss": 3.163745403289795,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.28781512605042014,
|
|
"grad_norm": 16.663615728677826,
|
|
"learning_rate": 9.510489510489511e-06,
|
|
"loss": 3.3047399520874023,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.28991596638655465,
|
|
"grad_norm": 10.093105336690149,
|
|
"learning_rate": 9.58041958041958e-06,
|
|
"loss": 2.901014804840088,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.2920168067226891,
|
|
"grad_norm": 10.712099293339499,
|
|
"learning_rate": 9.650349650349651e-06,
|
|
"loss": 2.4749934673309326,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 12.306067699743261,
|
|
"learning_rate": 9.72027972027972e-06,
|
|
"loss": 2.735682964324951,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.296218487394958,
|
|
"grad_norm": 8.899689488937057,
|
|
"learning_rate": 9.79020979020979e-06,
|
|
"loss": 1.6851799488067627,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.29831932773109243,
|
|
"grad_norm": 15.412895468970188,
|
|
"learning_rate": 9.860139860139861e-06,
|
|
"loss": 2.4892358779907227,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.3004201680672269,
|
|
"grad_norm": 13.718632928552148,
|
|
"learning_rate": 9.93006993006993e-06,
|
|
"loss": 3.152186870574951,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.3025210084033613,
|
|
"grad_norm": 9.63303607414013,
|
|
"learning_rate": 1e-05,
|
|
"loss": 2.4623451232910156,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.30462184873949577,
|
|
"grad_norm": 9.020782417307544,
|
|
"learning_rate": 9.999985057155316e-06,
|
|
"loss": 2.3573660850524902,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.3067226890756303,
|
|
"grad_norm": 15.431206065267094,
|
|
"learning_rate": 9.999940228710581e-06,
|
|
"loss": 3.248166561126709,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.3088235294117647,
|
|
"grad_norm": 9.624481227031932,
|
|
"learning_rate": 9.99986551493374e-06,
|
|
"loss": 3.073438882827759,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.31092436974789917,
|
|
"grad_norm": 12.24535420873494,
|
|
"learning_rate": 9.999760916271368e-06,
|
|
"loss": 3.175532579421997,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.3130252100840336,
|
|
"grad_norm": 8.43469444061833,
|
|
"learning_rate": 9.999626433348664e-06,
|
|
"loss": 2.2849655151367188,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.31512605042016806,
|
|
"grad_norm": 13.307775899632185,
|
|
"learning_rate": 9.999462066969451e-06,
|
|
"loss": 2.7922751903533936,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3172268907563025,
|
|
"grad_norm": 11.454291564861384,
|
|
"learning_rate": 9.999267818116173e-06,
|
|
"loss": 3.03188419342041,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.31932773109243695,
|
|
"grad_norm": 16.712527557096042,
|
|
"learning_rate": 9.999043687949878e-06,
|
|
"loss": 3.3826239109039307,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.32142857142857145,
|
|
"grad_norm": 22.000641429064785,
|
|
"learning_rate": 9.998789677810226e-06,
|
|
"loss": 3.103822708129883,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.3235294117647059,
|
|
"grad_norm": 11.206024089957094,
|
|
"learning_rate": 9.998505789215469e-06,
|
|
"loss": 2.633566379547119,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.32563025210084034,
|
|
"grad_norm": 17.0566593574694,
|
|
"learning_rate": 9.998192023862448e-06,
|
|
"loss": 2.937821388244629,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.3277310924369748,
|
|
"grad_norm": 10.638495096316019,
|
|
"learning_rate": 9.997848383626583e-06,
|
|
"loss": 3.0057592391967773,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.32983193277310924,
|
|
"grad_norm": 13.891998906384215,
|
|
"learning_rate": 9.997474870561858e-06,
|
|
"loss": 3.4198083877563477,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.3319327731092437,
|
|
"grad_norm": 7.77313705300237,
|
|
"learning_rate": 9.997071486900813e-06,
|
|
"loss": 2.748509407043457,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.33403361344537813,
|
|
"grad_norm": 11.432910137348301,
|
|
"learning_rate": 9.996638235054527e-06,
|
|
"loss": 3.3422679901123047,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.33613445378151263,
|
|
"grad_norm": 9.407520098068266,
|
|
"learning_rate": 9.996175117612608e-06,
|
|
"loss": 3.2214763164520264,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.3382352941176471,
|
|
"grad_norm": 9.207535688673886,
|
|
"learning_rate": 9.99568213734317e-06,
|
|
"loss": 2.5538628101348877,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.3403361344537815,
|
|
"grad_norm": 10.755277234400435,
|
|
"learning_rate": 9.995159297192824e-06,
|
|
"loss": 2.781787872314453,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.34243697478991597,
|
|
"grad_norm": 13.097444619561006,
|
|
"learning_rate": 9.99460660028666e-06,
|
|
"loss": 3.3784282207489014,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.3445378151260504,
|
|
"grad_norm": 9.022751200279867,
|
|
"learning_rate": 9.994024049928222e-06,
|
|
"loss": 3.2824249267578125,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.34663865546218486,
|
|
"grad_norm": 7.521996605994801,
|
|
"learning_rate": 9.993411649599494e-06,
|
|
"loss": 2.589594841003418,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.3487394957983193,
|
|
"grad_norm": 19.97411284039417,
|
|
"learning_rate": 9.992769402960878e-06,
|
|
"loss": 3.7193164825439453,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.35084033613445376,
|
|
"grad_norm": 8.085906363590569,
|
|
"learning_rate": 9.99209731385117e-06,
|
|
"loss": 2.823063611984253,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.35294117647058826,
|
|
"grad_norm": 11.903719879092119,
|
|
"learning_rate": 9.99139538628754e-06,
|
|
"loss": 3.1389951705932617,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.3550420168067227,
|
|
"grad_norm": 21.923086009804106,
|
|
"learning_rate": 9.990663624465504e-06,
|
|
"loss": 2.9536495208740234,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.35714285714285715,
|
|
"grad_norm": 13.50586631618126,
|
|
"learning_rate": 9.989902032758904e-06,
|
|
"loss": 2.6355466842651367,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3592436974789916,
|
|
"grad_norm": 5.578596081707914,
|
|
"learning_rate": 9.989110615719882e-06,
|
|
"loss": 1.1800763607025146,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.36134453781512604,
|
|
"grad_norm": 16.042050675579503,
|
|
"learning_rate": 9.988289378078842e-06,
|
|
"loss": 2.679232358932495,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.3634453781512605,
|
|
"grad_norm": 13.382798743317503,
|
|
"learning_rate": 9.987438324744437e-06,
|
|
"loss": 2.3583908081054688,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.36554621848739494,
|
|
"grad_norm": 9.108315025108485,
|
|
"learning_rate": 9.986557460803527e-06,
|
|
"loss": 2.748077392578125,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.36764705882352944,
|
|
"grad_norm": 9.67015040715346,
|
|
"learning_rate": 9.985646791521165e-06,
|
|
"loss": 3.2660067081451416,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.3697478991596639,
|
|
"grad_norm": 11.678263700428246,
|
|
"learning_rate": 9.984706322340539e-06,
|
|
"loss": 2.9270148277282715,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.37184873949579833,
|
|
"grad_norm": 9.845183071879623,
|
|
"learning_rate": 9.983736058882965e-06,
|
|
"loss": 2.455327033996582,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.3739495798319328,
|
|
"grad_norm": 9.630887189931224,
|
|
"learning_rate": 9.982736006947842e-06,
|
|
"loss": 3.171403169631958,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.3760504201680672,
|
|
"grad_norm": 7.217019473795253,
|
|
"learning_rate": 9.98170617251262e-06,
|
|
"loss": 2.6023473739624023,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.37815126050420167,
|
|
"grad_norm": 7.438957257707156,
|
|
"learning_rate": 9.98064656173276e-06,
|
|
"loss": 2.7492432594299316,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.3802521008403361,
|
|
"grad_norm": 12.314225953456766,
|
|
"learning_rate": 9.979557180941702e-06,
|
|
"loss": 3.520758628845215,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.38235294117647056,
|
|
"grad_norm": 7.754983445761027,
|
|
"learning_rate": 9.978438036650822e-06,
|
|
"loss": 2.7245442867279053,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.38445378151260506,
|
|
"grad_norm": 15.124443991385633,
|
|
"learning_rate": 9.977289135549404e-06,
|
|
"loss": 2.790768623352051,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.3865546218487395,
|
|
"grad_norm": 15.169071975047261,
|
|
"learning_rate": 9.976110484504587e-06,
|
|
"loss": 2.5588126182556152,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.38865546218487396,
|
|
"grad_norm": 18.50299410182784,
|
|
"learning_rate": 9.974902090561331e-06,
|
|
"loss": 3.0367865562438965,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.3907563025210084,
|
|
"grad_norm": 12.853534690634186,
|
|
"learning_rate": 9.973663960942373e-06,
|
|
"loss": 3.1013669967651367,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.39285714285714285,
|
|
"grad_norm": 11.962180171730763,
|
|
"learning_rate": 9.972396103048184e-06,
|
|
"loss": 2.678436279296875,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.3949579831932773,
|
|
"grad_norm": 14.345031935763927,
|
|
"learning_rate": 9.971098524456925e-06,
|
|
"loss": 2.866910696029663,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.39705882352941174,
|
|
"grad_norm": 24.927874908872194,
|
|
"learning_rate": 9.969771232924404e-06,
|
|
"loss": 2.6690807342529297,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.39915966386554624,
|
|
"grad_norm": 13.232716463146705,
|
|
"learning_rate": 9.968414236384022e-06,
|
|
"loss": 2.615846633911133,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.4012605042016807,
|
|
"grad_norm": 19.24597028348177,
|
|
"learning_rate": 9.967027542946739e-06,
|
|
"loss": 3.197604179382324,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.40336134453781514,
|
|
"grad_norm": 19.57923793430777,
|
|
"learning_rate": 9.965611160901008e-06,
|
|
"loss": 1.584808349609375,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.4054621848739496,
|
|
"grad_norm": 9.313854254132917,
|
|
"learning_rate": 9.964165098712745e-06,
|
|
"loss": 2.7913365364074707,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.40756302521008403,
|
|
"grad_norm": 15.764914604292455,
|
|
"learning_rate": 9.962689365025259e-06,
|
|
"loss": 3.42575740814209,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.4096638655462185,
|
|
"grad_norm": 9.662424511151881,
|
|
"learning_rate": 9.961183968659217e-06,
|
|
"loss": 2.6931188106536865,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.4117647058823529,
|
|
"grad_norm": 13.117904635638109,
|
|
"learning_rate": 9.959648918612576e-06,
|
|
"loss": 2.4463605880737305,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.41386554621848737,
|
|
"grad_norm": 8.434614198562612,
|
|
"learning_rate": 9.958084224060547e-06,
|
|
"loss": 2.647773265838623,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.41596638655462187,
|
|
"grad_norm": 26.520590112059157,
|
|
"learning_rate": 9.956489894355521e-06,
|
|
"loss": 2.660770893096924,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.4180672268907563,
|
|
"grad_norm": 28.510323184410662,
|
|
"learning_rate": 9.954865939027028e-06,
|
|
"loss": 3.627254009246826,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.42016806722689076,
|
|
"grad_norm": 7.679364921262506,
|
|
"learning_rate": 9.953212367781675e-06,
|
|
"loss": 2.683685779571533,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.4222689075630252,
|
|
"grad_norm": 13.123862369544378,
|
|
"learning_rate": 9.95152919050308e-06,
|
|
"loss": 2.7249388694763184,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.42436974789915966,
|
|
"grad_norm": 6.985824973864478,
|
|
"learning_rate": 9.949816417251831e-06,
|
|
"loss": 2.933401107788086,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.4264705882352941,
|
|
"grad_norm": 13.569070375050062,
|
|
"learning_rate": 9.948074058265409e-06,
|
|
"loss": 3.5457630157470703,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.42857142857142855,
|
|
"grad_norm": 7.335673995298351,
|
|
"learning_rate": 9.94630212395813e-06,
|
|
"loss": 2.483736038208008,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.43067226890756305,
|
|
"grad_norm": 73.9554577496319,
|
|
"learning_rate": 9.944500624921094e-06,
|
|
"loss": 2.470374584197998,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.4327731092436975,
|
|
"grad_norm": 11.27254717083641,
|
|
"learning_rate": 9.942669571922108e-06,
|
|
"loss": 3.2255494594573975,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.43487394957983194,
|
|
"grad_norm": 11.257221010364708,
|
|
"learning_rate": 9.940808975905627e-06,
|
|
"loss": 3.4820542335510254,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.4369747899159664,
|
|
"grad_norm": 16.32933603207297,
|
|
"learning_rate": 9.93891884799269e-06,
|
|
"loss": 3.218539237976074,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.43907563025210083,
|
|
"grad_norm": 27.30232213883322,
|
|
"learning_rate": 9.936999199480854e-06,
|
|
"loss": 2.8428990840911865,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.4411764705882353,
|
|
"grad_norm": 8.340720464987514,
|
|
"learning_rate": 9.935050041844121e-06,
|
|
"loss": 3.661019802093506,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4432773109243697,
|
|
"grad_norm": 18.410105558121085,
|
|
"learning_rate": 9.933071386732874e-06,
|
|
"loss": 3.330902338027954,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.44537815126050423,
|
|
"grad_norm": 10.649860943280096,
|
|
"learning_rate": 9.931063245973812e-06,
|
|
"loss": 2.7754883766174316,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.4474789915966387,
|
|
"grad_norm": 13.898816541841864,
|
|
"learning_rate": 9.929025631569864e-06,
|
|
"loss": 2.3284661769866943,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.4495798319327731,
|
|
"grad_norm": 11.170546252681195,
|
|
"learning_rate": 9.926958555700134e-06,
|
|
"loss": 2.599228858947754,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.45168067226890757,
|
|
"grad_norm": 11.76779841923458,
|
|
"learning_rate": 9.924862030719821e-06,
|
|
"loss": 3.174004077911377,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.453781512605042,
|
|
"grad_norm": 12.943887425672324,
|
|
"learning_rate": 9.922736069160141e-06,
|
|
"loss": 2.7390694618225098,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.45588235294117646,
|
|
"grad_norm": 11.55413590289726,
|
|
"learning_rate": 9.920580683728263e-06,
|
|
"loss": 2.7388081550598145,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.4579831932773109,
|
|
"grad_norm": 22.67934856569803,
|
|
"learning_rate": 9.918395887307219e-06,
|
|
"loss": 2.4359140396118164,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.46008403361344535,
|
|
"grad_norm": 16.89880489289811,
|
|
"learning_rate": 9.916181692955841e-06,
|
|
"loss": 2.9688220024108887,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.46218487394957986,
|
|
"grad_norm": 19.071787734842648,
|
|
"learning_rate": 9.913938113908675e-06,
|
|
"loss": 3.1534006595611572,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.4642857142857143,
|
|
"grad_norm": 15.85242809267351,
|
|
"learning_rate": 9.9116651635759e-06,
|
|
"loss": 2.618938684463501,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.46638655462184875,
|
|
"grad_norm": 16.618677645763935,
|
|
"learning_rate": 9.909362855543253e-06,
|
|
"loss": 2.844968318939209,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.4684873949579832,
|
|
"grad_norm": 10.671971882677827,
|
|
"learning_rate": 9.907031203571948e-06,
|
|
"loss": 2.4792628288269043,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.47058823529411764,
|
|
"grad_norm": 19.241816484552377,
|
|
"learning_rate": 9.90467022159859e-06,
|
|
"loss": 2.894502639770508,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.4726890756302521,
|
|
"grad_norm": 23.569212607106817,
|
|
"learning_rate": 9.902279923735093e-06,
|
|
"loss": 2.792015552520752,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.47478991596638653,
|
|
"grad_norm": 9.680153288005078,
|
|
"learning_rate": 9.899860324268599e-06,
|
|
"loss": 2.9171247482299805,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.47689075630252103,
|
|
"grad_norm": 12.955899131578942,
|
|
"learning_rate": 9.897411437661386e-06,
|
|
"loss": 2.560214042663574,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.4789915966386555,
|
|
"grad_norm": 8.404115741492017,
|
|
"learning_rate": 9.894933278550785e-06,
|
|
"loss": 3.2796883583068848,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.4810924369747899,
|
|
"grad_norm": 8.318847487560761,
|
|
"learning_rate": 9.8924258617491e-06,
|
|
"loss": 3.0324971675872803,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.4831932773109244,
|
|
"grad_norm": 18.51611171900766,
|
|
"learning_rate": 9.8898892022435e-06,
|
|
"loss": 3.3899683952331543,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.4852941176470588,
|
|
"grad_norm": 11.1091069250737,
|
|
"learning_rate": 9.887323315195956e-06,
|
|
"loss": 2.742903709411621,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.48739495798319327,
|
|
"grad_norm": 7.80795476246885,
|
|
"learning_rate": 9.884728215943122e-06,
|
|
"loss": 3.230966806411743,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.4894957983193277,
|
|
"grad_norm": 13.293388527053166,
|
|
"learning_rate": 9.882103919996268e-06,
|
|
"loss": 2.8818302154541016,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.49159663865546216,
|
|
"grad_norm": 6.043647907341577,
|
|
"learning_rate": 9.879450443041172e-06,
|
|
"loss": 2.358765125274658,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.49369747899159666,
|
|
"grad_norm": 8.169920329828493,
|
|
"learning_rate": 9.876767800938032e-06,
|
|
"loss": 3.0420098304748535,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.4957983193277311,
|
|
"grad_norm": 18.681067639331463,
|
|
"learning_rate": 9.874056009721367e-06,
|
|
"loss": 2.9595160484313965,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.49789915966386555,
|
|
"grad_norm": 7.792831708992119,
|
|
"learning_rate": 9.87131508559993e-06,
|
|
"loss": 2.9571242332458496,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 13.533405695746444,
|
|
"learning_rate": 9.868545044956603e-06,
|
|
"loss": 2.798694610595703,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.5021008403361344,
|
|
"grad_norm": 7.855798585235136,
|
|
"learning_rate": 9.865745904348296e-06,
|
|
"loss": 2.9430432319641113,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.5042016806722689,
|
|
"grad_norm": 12.596029584158895,
|
|
"learning_rate": 9.862917680505863e-06,
|
|
"loss": 3.065462112426758,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5063025210084033,
|
|
"grad_norm": 11.793626015707394,
|
|
"learning_rate": 9.860060390333988e-06,
|
|
"loss": 3.8562116622924805,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.5084033613445378,
|
|
"grad_norm": 8.660405200484282,
|
|
"learning_rate": 9.857174050911085e-06,
|
|
"loss": 2.645123243331909,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.5105042016806722,
|
|
"grad_norm": 11.950071539791612,
|
|
"learning_rate": 9.854258679489203e-06,
|
|
"loss": 2.500267744064331,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.5126050420168067,
|
|
"grad_norm": 14.029861713702717,
|
|
"learning_rate": 9.851314293493923e-06,
|
|
"loss": 2.553537368774414,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.5147058823529411,
|
|
"grad_norm": 21.40352382596275,
|
|
"learning_rate": 9.848340910524243e-06,
|
|
"loss": 2.694528102874756,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.5168067226890757,
|
|
"grad_norm": 11.756867034830558,
|
|
"learning_rate": 9.845338548352482e-06,
|
|
"loss": 3.2089271545410156,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.5189075630252101,
|
|
"grad_norm": 13.513723971793041,
|
|
"learning_rate": 9.842307224924174e-06,
|
|
"loss": 2.443826198577881,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.5210084033613446,
|
|
"grad_norm": 8.839705225157738,
|
|
"learning_rate": 9.839246958357957e-06,
|
|
"loss": 2.9329233169555664,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.523109243697479,
|
|
"grad_norm": 14.107087922274081,
|
|
"learning_rate": 9.836157766945467e-06,
|
|
"loss": 2.5171399116516113,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.5252100840336135,
|
|
"grad_norm": 8.285926532283062,
|
|
"learning_rate": 9.833039669151225e-06,
|
|
"loss": 3.0069408416748047,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5273109243697479,
|
|
"grad_norm": 9.58371718621674,
|
|
"learning_rate": 9.829892683612535e-06,
|
|
"loss": 2.5816359519958496,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.5294117647058824,
|
|
"grad_norm": 20.00310864922347,
|
|
"learning_rate": 9.826716829139358e-06,
|
|
"loss": 2.3982670307159424,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.5315126050420168,
|
|
"grad_norm": 10.648220658525108,
|
|
"learning_rate": 9.82351212471422e-06,
|
|
"loss": 2.975574016571045,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.5336134453781513,
|
|
"grad_norm": 17.551242772865887,
|
|
"learning_rate": 9.820278589492076e-06,
|
|
"loss": 2.4827775955200195,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.5357142857142857,
|
|
"grad_norm": 17.09184171751482,
|
|
"learning_rate": 9.817016242800215e-06,
|
|
"loss": 2.690033197402954,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.5378151260504201,
|
|
"grad_norm": 14.722560106056354,
|
|
"learning_rate": 9.813725104138133e-06,
|
|
"loss": 3.346949338912964,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.5399159663865546,
|
|
"grad_norm": 17.505076110573757,
|
|
"learning_rate": 9.810405193177418e-06,
|
|
"loss": 2.6791281700134277,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.542016806722689,
|
|
"grad_norm": 17.808558357662132,
|
|
"learning_rate": 9.807056529761637e-06,
|
|
"loss": 2.853158950805664,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.5441176470588235,
|
|
"grad_norm": 20.31515982195739,
|
|
"learning_rate": 9.80367913390621e-06,
|
|
"loss": 3.1636295318603516,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.5462184873949579,
|
|
"grad_norm": 12.64467693447632,
|
|
"learning_rate": 9.800273025798302e-06,
|
|
"loss": 2.5055313110351562,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5483193277310925,
|
|
"grad_norm": 9.570949964132296,
|
|
"learning_rate": 9.796838225796688e-06,
|
|
"loss": 2.9986414909362793,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.5504201680672269,
|
|
"grad_norm": 14.832124263006255,
|
|
"learning_rate": 9.793374754431642e-06,
|
|
"loss": 2.419975757598877,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.5525210084033614,
|
|
"grad_norm": 22.395098332172758,
|
|
"learning_rate": 9.789882632404809e-06,
|
|
"loss": 3.0301923751831055,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.5546218487394958,
|
|
"grad_norm": 8.680917615796206,
|
|
"learning_rate": 9.786361880589084e-06,
|
|
"loss": 2.846034526824951,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.5567226890756303,
|
|
"grad_norm": 16.17159732015871,
|
|
"learning_rate": 9.782812520028487e-06,
|
|
"loss": 3.250943183898926,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.5588235294117647,
|
|
"grad_norm": 12.800181347711561,
|
|
"learning_rate": 9.779234571938034e-06,
|
|
"loss": 2.5069515705108643,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.5609243697478992,
|
|
"grad_norm": 20.58760178113823,
|
|
"learning_rate": 9.775628057703616e-06,
|
|
"loss": 2.2883377075195312,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.5630252100840336,
|
|
"grad_norm": 19.3122933281468,
|
|
"learning_rate": 9.771992998881865e-06,
|
|
"loss": 1.8844149112701416,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.5651260504201681,
|
|
"grad_norm": 11.766785955468544,
|
|
"learning_rate": 9.768329417200029e-06,
|
|
"loss": 2.608553409576416,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.5672268907563025,
|
|
"grad_norm": 9.015634942296078,
|
|
"learning_rate": 9.76463733455584e-06,
|
|
"loss": 2.8849685192108154,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.569327731092437,
|
|
"grad_norm": 10.71605416834433,
|
|
"learning_rate": 9.760916773017386e-06,
|
|
"loss": 2.83829402923584,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 8.102503833940233,
|
|
"learning_rate": 9.757167754822974e-06,
|
|
"loss": 2.6053004264831543,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.5735294117647058,
|
|
"grad_norm": 18.62135736056985,
|
|
"learning_rate": 9.753390302381006e-06,
|
|
"loss": 2.8338804244995117,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.5756302521008403,
|
|
"grad_norm": 34.82348840659483,
|
|
"learning_rate": 9.749584438269833e-06,
|
|
"loss": 2.979978084564209,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.5777310924369747,
|
|
"grad_norm": 6.726547081859168,
|
|
"learning_rate": 9.74575018523763e-06,
|
|
"loss": 1.8241777420043945,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.5798319327731093,
|
|
"grad_norm": 12.206262847267514,
|
|
"learning_rate": 9.741887566202259e-06,
|
|
"loss": 3.2140274047851562,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.5819327731092437,
|
|
"grad_norm": 43.432328207654045,
|
|
"learning_rate": 9.737996604251124e-06,
|
|
"loss": 3.074397325515747,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.5840336134453782,
|
|
"grad_norm": 19.844157721727896,
|
|
"learning_rate": 9.73407732264104e-06,
|
|
"loss": 2.527010679244995,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.5861344537815126,
|
|
"grad_norm": 10.112570131000647,
|
|
"learning_rate": 9.730129744798096e-06,
|
|
"loss": 2.6019768714904785,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 216.6539557731807,
|
|
"learning_rate": 9.726153894317508e-06,
|
|
"loss": 2.848952293395996,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.5903361344537815,
|
|
"grad_norm": 18.730728554973695,
|
|
"learning_rate": 9.722149794963483e-06,
|
|
"loss": 3.120556354522705,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.592436974789916,
|
|
"grad_norm": 7.236837867364418,
|
|
"learning_rate": 9.718117470669072e-06,
|
|
"loss": 2.8926405906677246,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.5945378151260504,
|
|
"grad_norm": 8.247663007399707,
|
|
"learning_rate": 9.714056945536039e-06,
|
|
"loss": 3.2854347229003906,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.5966386554621849,
|
|
"grad_norm": 7.729125572796969,
|
|
"learning_rate": 9.709968243834698e-06,
|
|
"loss": 2.856870651245117,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.5987394957983193,
|
|
"grad_norm": 20.951434970442865,
|
|
"learning_rate": 9.705851390003783e-06,
|
|
"loss": 3.3881802558898926,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.6008403361344538,
|
|
"grad_norm": 8.671814837426174,
|
|
"learning_rate": 9.7017064086503e-06,
|
|
"loss": 2.6102542877197266,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.6029411764705882,
|
|
"grad_norm": 8.644019718162792,
|
|
"learning_rate": 9.697533324549371e-06,
|
|
"loss": 2.7697243690490723,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.6050420168067226,
|
|
"grad_norm": 12.279613000984195,
|
|
"learning_rate": 9.693332162644095e-06,
|
|
"loss": 2.568695545196533,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.6071428571428571,
|
|
"grad_norm": 13.384358670021655,
|
|
"learning_rate": 9.689102948045398e-06,
|
|
"loss": 2.922543525695801,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.6092436974789915,
|
|
"grad_norm": 15.250277694133263,
|
|
"learning_rate": 9.684845706031878e-06,
|
|
"loss": 3.1011314392089844,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6113445378151261,
|
|
"grad_norm": 9.840291260984259,
|
|
"learning_rate": 9.680560462049657e-06,
|
|
"loss": 2.627528429031372,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.6134453781512605,
|
|
"grad_norm": 13.648735567431437,
|
|
"learning_rate": 9.676247241712228e-06,
|
|
"loss": 2.8417811393737793,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.615546218487395,
|
|
"grad_norm": 8.931356705581003,
|
|
"learning_rate": 9.671906070800307e-06,
|
|
"loss": 2.3787314891815186,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.6176470588235294,
|
|
"grad_norm": 7.6270227976464895,
|
|
"learning_rate": 9.667536975261667e-06,
|
|
"loss": 2.751317024230957,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.6197478991596639,
|
|
"grad_norm": 7.016417787785432,
|
|
"learning_rate": 9.663139981210998e-06,
|
|
"loss": 2.6910929679870605,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.6218487394957983,
|
|
"grad_norm": 11.206285204533946,
|
|
"learning_rate": 9.658715114929737e-06,
|
|
"loss": 2.801499366760254,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.6239495798319328,
|
|
"grad_norm": 18.427453742915965,
|
|
"learning_rate": 9.654262402865922e-06,
|
|
"loss": 2.885946273803711,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.6260504201680672,
|
|
"grad_norm": 13.114557836832477,
|
|
"learning_rate": 9.649781871634025e-06,
|
|
"loss": 3.1485133171081543,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.6281512605042017,
|
|
"grad_norm": 8.349893932720915,
|
|
"learning_rate": 9.6452735480148e-06,
|
|
"loss": 3.174015998840332,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.6302521008403361,
|
|
"grad_norm": 11.762326882141835,
|
|
"learning_rate": 9.64073745895512e-06,
|
|
"loss": 3.339445114135742,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6323529411764706,
|
|
"grad_norm": 7.555546243601591,
|
|
"learning_rate": 9.636173631567812e-06,
|
|
"loss": 2.9448843002319336,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.634453781512605,
|
|
"grad_norm": 5.95180683932207,
|
|
"learning_rate": 9.631582093131501e-06,
|
|
"loss": 2.6363561153411865,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.6365546218487395,
|
|
"grad_norm": 11.371944122058592,
|
|
"learning_rate": 9.62696287109045e-06,
|
|
"loss": 2.4621901512145996,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.6386554621848739,
|
|
"grad_norm": 11.742524524874973,
|
|
"learning_rate": 9.622315993054384e-06,
|
|
"loss": 2.8623251914978027,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.6407563025210085,
|
|
"grad_norm": 12.39315277601619,
|
|
"learning_rate": 9.61764148679833e-06,
|
|
"loss": 2.191575765609741,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.6428571428571429,
|
|
"grad_norm": 6.720496031064891,
|
|
"learning_rate": 9.61293938026246e-06,
|
|
"loss": 2.018388271331787,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.6449579831932774,
|
|
"grad_norm": 9.624398589362118,
|
|
"learning_rate": 9.608209701551913e-06,
|
|
"loss": 2.756854772567749,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.6470588235294118,
|
|
"grad_norm": 21.622075822614562,
|
|
"learning_rate": 9.60345247893663e-06,
|
|
"loss": 2.6668529510498047,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.6491596638655462,
|
|
"grad_norm": 14.926878160653533,
|
|
"learning_rate": 9.598667740851187e-06,
|
|
"loss": 2.6617343425750732,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.6512605042016807,
|
|
"grad_norm": 9.499741494871419,
|
|
"learning_rate": 9.59385551589462e-06,
|
|
"loss": 3.1460976600646973,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6533613445378151,
|
|
"grad_norm": 17.910724072364676,
|
|
"learning_rate": 9.589015832830267e-06,
|
|
"loss": 2.7566354274749756,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.6554621848739496,
|
|
"grad_norm": 15.059990921253526,
|
|
"learning_rate": 9.584148720585575e-06,
|
|
"loss": 3.3112881183624268,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.657563025210084,
|
|
"grad_norm": 12.27519040871759,
|
|
"learning_rate": 9.57925420825195e-06,
|
|
"loss": 2.8563618659973145,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.6596638655462185,
|
|
"grad_norm": 14.53433732237354,
|
|
"learning_rate": 9.574332325084564e-06,
|
|
"loss": 3.5544567108154297,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.6617647058823529,
|
|
"grad_norm": 9.662661721128384,
|
|
"learning_rate": 9.569383100502193e-06,
|
|
"loss": 2.924015998840332,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.6638655462184874,
|
|
"grad_norm": 11.360356839234715,
|
|
"learning_rate": 9.564406564087032e-06,
|
|
"loss": 2.7250008583068848,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.6659663865546218,
|
|
"grad_norm": 6.265433825569306,
|
|
"learning_rate": 9.559402745584527e-06,
|
|
"loss": 2.9229238033294678,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.6680672268907563,
|
|
"grad_norm": 10.23047238705242,
|
|
"learning_rate": 9.554371674903191e-06,
|
|
"loss": 3.4867515563964844,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.6701680672268907,
|
|
"grad_norm": 13.446172115002414,
|
|
"learning_rate": 9.549313382114427e-06,
|
|
"loss": 2.4049417972564697,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.6722689075630253,
|
|
"grad_norm": 14.135159230227343,
|
|
"learning_rate": 9.54422789745235e-06,
|
|
"loss": 3.1008338928222656,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6743697478991597,
|
|
"grad_norm": 9.140604791680513,
|
|
"learning_rate": 9.5391152513136e-06,
|
|
"loss": 2.6114342212677,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.6764705882352942,
|
|
"grad_norm": 9.779919855511938,
|
|
"learning_rate": 9.533975474257171e-06,
|
|
"loss": 2.7165164947509766,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.6785714285714286,
|
|
"grad_norm": 9.275622947642706,
|
|
"learning_rate": 9.528808597004216e-06,
|
|
"loss": 2.8122520446777344,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.680672268907563,
|
|
"grad_norm": 14.779154717919877,
|
|
"learning_rate": 9.523614650437876e-06,
|
|
"loss": 2.862661838531494,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.6827731092436975,
|
|
"grad_norm": 12.377273208842894,
|
|
"learning_rate": 9.518393665603084e-06,
|
|
"loss": 2.9812843799591064,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.6848739495798319,
|
|
"grad_norm": 11.010658732376989,
|
|
"learning_rate": 9.513145673706383e-06,
|
|
"loss": 2.9455337524414062,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.6869747899159664,
|
|
"grad_norm": 14.806340169845868,
|
|
"learning_rate": 9.507870706115749e-06,
|
|
"loss": 3.1577422618865967,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.6890756302521008,
|
|
"grad_norm": 13.616368197529665,
|
|
"learning_rate": 9.50256879436039e-06,
|
|
"loss": 2.545835018157959,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.6911764705882353,
|
|
"grad_norm": 8.987871015734585,
|
|
"learning_rate": 9.497239970130561e-06,
|
|
"loss": 2.559062957763672,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.6932773109243697,
|
|
"grad_norm": 8.587992072590101,
|
|
"learning_rate": 9.491884265277383e-06,
|
|
"loss": 2.932499647140503,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.6953781512605042,
|
|
"grad_norm": 7.463276523398998,
|
|
"learning_rate": 9.486501711812637e-06,
|
|
"loss": 2.967616558074951,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.6974789915966386,
|
|
"grad_norm": 14.475511521289118,
|
|
"learning_rate": 9.481092341908591e-06,
|
|
"loss": 2.4604697227478027,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.6995798319327731,
|
|
"grad_norm": 9.02600045542574,
|
|
"learning_rate": 9.475656187897794e-06,
|
|
"loss": 3.146969795227051,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.7016806722689075,
|
|
"grad_norm": 7.639638057540197,
|
|
"learning_rate": 9.470193282272886e-06,
|
|
"loss": 3.337083339691162,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.7037815126050421,
|
|
"grad_norm": 16.397307515268395,
|
|
"learning_rate": 9.464703657686412e-06,
|
|
"loss": 2.7829766273498535,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.7058823529411765,
|
|
"grad_norm": 11.048022152868258,
|
|
"learning_rate": 9.45918734695061e-06,
|
|
"loss": 3.095449447631836,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.707983193277311,
|
|
"grad_norm": 10.20138527201031,
|
|
"learning_rate": 9.453644383037232e-06,
|
|
"loss": 2.6790573596954346,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.7100840336134454,
|
|
"grad_norm": 9.93895349514755,
|
|
"learning_rate": 9.448074799077337e-06,
|
|
"loss": 2.9844274520874023,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.7121848739495799,
|
|
"grad_norm": 171.53953866617377,
|
|
"learning_rate": 9.442478628361098e-06,
|
|
"loss": 2.256910562515259,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 12.351676724137773,
|
|
"learning_rate": 9.436855904337596e-06,
|
|
"loss": 2.9464545249938965,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7163865546218487,
|
|
"grad_norm": 19.404123629754835,
|
|
"learning_rate": 9.43120666061463e-06,
|
|
"loss": 2.23644757270813,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.7184873949579832,
|
|
"grad_norm": 11.246236929808724,
|
|
"learning_rate": 9.425530930958507e-06,
|
|
"loss": 2.85072660446167,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.7205882352941176,
|
|
"grad_norm": 17.642986778414265,
|
|
"learning_rate": 9.419828749293845e-06,
|
|
"loss": 3.09238862991333,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.7226890756302521,
|
|
"grad_norm": 8.1418179714146,
|
|
"learning_rate": 9.414100149703373e-06,
|
|
"loss": 2.7548587322235107,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.7247899159663865,
|
|
"grad_norm": 11.258932741699391,
|
|
"learning_rate": 9.40834516642772e-06,
|
|
"loss": 2.487452507019043,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.726890756302521,
|
|
"grad_norm": 10.082639156310133,
|
|
"learning_rate": 9.402563833865213e-06,
|
|
"loss": 3.077296257019043,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.7289915966386554,
|
|
"grad_norm": 11.506257708160296,
|
|
"learning_rate": 9.396756186571672e-06,
|
|
"loss": 2.6188814640045166,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.7310924369747899,
|
|
"grad_norm": 11.743812268831451,
|
|
"learning_rate": 9.39092225926021e-06,
|
|
"loss": 3.150355815887451,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.7331932773109243,
|
|
"grad_norm": 10.613109994526992,
|
|
"learning_rate": 9.385062086801013e-06,
|
|
"loss": 2.6666879653930664,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.7352941176470589,
|
|
"grad_norm": 11.1137083326389,
|
|
"learning_rate": 9.379175704221139e-06,
|
|
"loss": 2.885680675506592,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7373949579831933,
|
|
"grad_norm": 10.845634322034954,
|
|
"learning_rate": 9.37326314670431e-06,
|
|
"loss": 2.948115110397339,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.7394957983193278,
|
|
"grad_norm": 20.12834911912162,
|
|
"learning_rate": 9.367324449590694e-06,
|
|
"loss": 2.743468761444092,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.7415966386554622,
|
|
"grad_norm": 10.20324180750042,
|
|
"learning_rate": 9.361359648376707e-06,
|
|
"loss": 3.0895063877105713,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.7436974789915967,
|
|
"grad_norm": 16.159497011872574,
|
|
"learning_rate": 9.355368778714784e-06,
|
|
"loss": 2.808818817138672,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.7457983193277311,
|
|
"grad_norm": 14.111122417184372,
|
|
"learning_rate": 9.349351876413181e-06,
|
|
"loss": 2.889227867126465,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.7478991596638656,
|
|
"grad_norm": 10.685634708452614,
|
|
"learning_rate": 9.343308977435754e-06,
|
|
"loss": 3.021900177001953,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 29.116347517828004,
|
|
"learning_rate": 9.337240117901742e-06,
|
|
"loss": 2.4112629890441895,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.7521008403361344,
|
|
"grad_norm": 8.824096565810732,
|
|
"learning_rate": 9.331145334085554e-06,
|
|
"loss": 2.898515224456787,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.7542016806722689,
|
|
"grad_norm": 21.430509149211513,
|
|
"learning_rate": 9.325024662416553e-06,
|
|
"loss": 2.683413028717041,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.7563025210084033,
|
|
"grad_norm": 10.321295794427858,
|
|
"learning_rate": 9.318878139478842e-06,
|
|
"loss": 2.890808582305908,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.7584033613445378,
|
|
"grad_norm": 20.795050786572304,
|
|
"learning_rate": 9.312705802011029e-06,
|
|
"loss": 2.9919955730438232,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.7605042016806722,
|
|
"grad_norm": 9.83932446467153,
|
|
"learning_rate": 9.306507686906033e-06,
|
|
"loss": 2.7725915908813477,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.7626050420168067,
|
|
"grad_norm": 9.916160263978837,
|
|
"learning_rate": 9.300283831210838e-06,
|
|
"loss": 2.9397757053375244,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.7647058823529411,
|
|
"grad_norm": 9.312450032530169,
|
|
"learning_rate": 9.294034272126286e-06,
|
|
"loss": 2.770698070526123,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.7668067226890757,
|
|
"grad_norm": 13.885653883484695,
|
|
"learning_rate": 9.28775904700686e-06,
|
|
"loss": 2.5156445503234863,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.7689075630252101,
|
|
"grad_norm": 13.084004538001976,
|
|
"learning_rate": 9.281458193360442e-06,
|
|
"loss": 2.597851276397705,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.7710084033613446,
|
|
"grad_norm": 17.679727525867335,
|
|
"learning_rate": 9.2751317488481e-06,
|
|
"loss": 2.4659290313720703,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.773109243697479,
|
|
"grad_norm": 9.199345804679885,
|
|
"learning_rate": 9.26877975128387e-06,
|
|
"loss": 3.0518131256103516,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.7752100840336135,
|
|
"grad_norm": 15.824344742656248,
|
|
"learning_rate": 9.262402238634514e-06,
|
|
"loss": 2.0272233486175537,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.7773109243697479,
|
|
"grad_norm": 9.874132429438818,
|
|
"learning_rate": 9.255999249019307e-06,
|
|
"loss": 2.282167911529541,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7794117647058824,
|
|
"grad_norm": 8.044430179764902,
|
|
"learning_rate": 9.2495708207098e-06,
|
|
"loss": 2.447831869125366,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.7815126050420168,
|
|
"grad_norm": 15.289268393319317,
|
|
"learning_rate": 9.243116992129593e-06,
|
|
"loss": 2.5548458099365234,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.7836134453781513,
|
|
"grad_norm": 18.576142639391133,
|
|
"learning_rate": 9.23663780185411e-06,
|
|
"loss": 2.2244365215301514,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.7857142857142857,
|
|
"grad_norm": 8.55234069521718,
|
|
"learning_rate": 9.230133288610366e-06,
|
|
"loss": 3.044992208480835,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.7878151260504201,
|
|
"grad_norm": 11.142079035862414,
|
|
"learning_rate": 9.223603491276733e-06,
|
|
"loss": 2.545569896697998,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.7899159663865546,
|
|
"grad_norm": 14.123674718701432,
|
|
"learning_rate": 9.217048448882711e-06,
|
|
"loss": 3.337583541870117,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.792016806722689,
|
|
"grad_norm": 16.146080651689587,
|
|
"learning_rate": 9.210468200608691e-06,
|
|
"loss": 3.1922380924224854,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.7941176470588235,
|
|
"grad_norm": 12.28600079308305,
|
|
"learning_rate": 9.203862785785724e-06,
|
|
"loss": 2.5922632217407227,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.7962184873949579,
|
|
"grad_norm": 18.04398024676097,
|
|
"learning_rate": 9.197232243895285e-06,
|
|
"loss": 2.876894474029541,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.7983193277310925,
|
|
"grad_norm": 13.494043036714963,
|
|
"learning_rate": 9.190576614569035e-06,
|
|
"loss": 2.7677531242370605,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.8004201680672269,
|
|
"grad_norm": 23.463052019031387,
|
|
"learning_rate": 9.183895937588594e-06,
|
|
"loss": 1.9870229959487915,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.8025210084033614,
|
|
"grad_norm": 7.7476580634838665,
|
|
"learning_rate": 9.177190252885285e-06,
|
|
"loss": 2.784242868423462,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.8046218487394958,
|
|
"grad_norm": 6.086395137680743,
|
|
"learning_rate": 9.17045960053991e-06,
|
|
"loss": 2.878697395324707,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.8067226890756303,
|
|
"grad_norm": 16.59316957110638,
|
|
"learning_rate": 9.163704020782507e-06,
|
|
"loss": 2.7685139179229736,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.8088235294117647,
|
|
"grad_norm": 15.470438153645851,
|
|
"learning_rate": 9.156923553992107e-06,
|
|
"loss": 2.8312299251556396,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.8109243697478992,
|
|
"grad_norm": 8.00902098985157,
|
|
"learning_rate": 9.150118240696497e-06,
|
|
"loss": 1.7165706157684326,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.8130252100840336,
|
|
"grad_norm": 14.0610194690077,
|
|
"learning_rate": 9.14328812157197e-06,
|
|
"loss": 3.451162815093994,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.8151260504201681,
|
|
"grad_norm": 10.174053809556211,
|
|
"learning_rate": 9.136433237443093e-06,
|
|
"loss": 3.455259084701538,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.8172268907563025,
|
|
"grad_norm": 14.076181600112081,
|
|
"learning_rate": 9.129553629282448e-06,
|
|
"loss": 3.3125205039978027,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.819327731092437,
|
|
"grad_norm": 11.760967038966463,
|
|
"learning_rate": 9.122649338210407e-06,
|
|
"loss": 3.175715923309326,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.8214285714285714,
|
|
"grad_norm": 12.215337173611072,
|
|
"learning_rate": 9.115720405494868e-06,
|
|
"loss": 3.426882743835449,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.8235294117647058,
|
|
"grad_norm": 16.884819154921146,
|
|
"learning_rate": 9.108766872551016e-06,
|
|
"loss": 2.693225860595703,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.8256302521008403,
|
|
"grad_norm": 11.991779005638564,
|
|
"learning_rate": 9.101788780941076e-06,
|
|
"loss": 2.8251726627349854,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.8277310924369747,
|
|
"grad_norm": 8.901523397999386,
|
|
"learning_rate": 9.094786172374066e-06,
|
|
"loss": 2.845076560974121,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.8298319327731093,
|
|
"grad_norm": 15.559813600064993,
|
|
"learning_rate": 9.087759088705541e-06,
|
|
"loss": 2.9212491512298584,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.8319327731092437,
|
|
"grad_norm": 12.334218057409931,
|
|
"learning_rate": 9.08070757193735e-06,
|
|
"loss": 2.752890110015869,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.8340336134453782,
|
|
"grad_norm": 20.040022595533,
|
|
"learning_rate": 9.07363166421738e-06,
|
|
"loss": 3.1292171478271484,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.8361344537815126,
|
|
"grad_norm": 9.339997691276547,
|
|
"learning_rate": 9.066531407839307e-06,
|
|
"loss": 2.2926840782165527,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.8382352941176471,
|
|
"grad_norm": 9.210411213235453,
|
|
"learning_rate": 9.059406845242343e-06,
|
|
"loss": 2.7644119262695312,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.8403361344537815,
|
|
"grad_norm": 13.484928949211756,
|
|
"learning_rate": 9.05225801901098e-06,
|
|
"loss": 2.9096150398254395,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.842436974789916,
|
|
"grad_norm": 21.901892899759964,
|
|
"learning_rate": 9.045084971874738e-06,
|
|
"loss": 4.536911964416504,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.8445378151260504,
|
|
"grad_norm": 8.027798710835631,
|
|
"learning_rate": 9.03788774670791e-06,
|
|
"loss": 3.3775062561035156,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.8466386554621849,
|
|
"grad_norm": 11.22841391004864,
|
|
"learning_rate": 9.030666386529303e-06,
|
|
"loss": 2.755703926086426,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.8487394957983193,
|
|
"grad_norm": 9.698938581529527,
|
|
"learning_rate": 9.023420934501981e-06,
|
|
"loss": 2.812281608581543,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.8508403361344538,
|
|
"grad_norm": 9.495702557416454,
|
|
"learning_rate": 9.01615143393301e-06,
|
|
"loss": 2.9015493392944336,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.8529411764705882,
|
|
"grad_norm": 8.59480884978166,
|
|
"learning_rate": 9.008857928273199e-06,
|
|
"loss": 2.8743391036987305,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.8550420168067226,
|
|
"grad_norm": 14.060855102265236,
|
|
"learning_rate": 9.001540461116835e-06,
|
|
"loss": 2.7400550842285156,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 9.670354596798553,
|
|
"learning_rate": 8.994199076201428e-06,
|
|
"loss": 3.788983106613159,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.8592436974789915,
|
|
"grad_norm": 10.094582977623446,
|
|
"learning_rate": 8.98683381740745e-06,
|
|
"loss": 2.426604747772217,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.8613445378151261,
|
|
"grad_norm": 6.42119276092813,
|
|
"learning_rate": 8.979444728758067e-06,
|
|
"loss": 2.467769145965576,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8634453781512605,
|
|
"grad_norm": 101.25120998420752,
|
|
"learning_rate": 8.97203185441888e-06,
|
|
"loss": 2.878884792327881,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.865546218487395,
|
|
"grad_norm": 10.063927366400284,
|
|
"learning_rate": 8.964595238697659e-06,
|
|
"loss": 3.323913812637329,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.8676470588235294,
|
|
"grad_norm": 8.176196947638319,
|
|
"learning_rate": 8.957134926044088e-06,
|
|
"loss": 2.2674732208251953,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.8697478991596639,
|
|
"grad_norm": 7.688045397272728,
|
|
"learning_rate": 8.949650961049479e-06,
|
|
"loss": 2.6359667778015137,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.8718487394957983,
|
|
"grad_norm": 12.061723837223782,
|
|
"learning_rate": 8.942143388446522e-06,
|
|
"loss": 4.3965678215026855,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.8739495798319328,
|
|
"grad_norm": 13.801014710596668,
|
|
"learning_rate": 8.934612253109017e-06,
|
|
"loss": 3.584599733352661,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.8760504201680672,
|
|
"grad_norm": 11.465324791085347,
|
|
"learning_rate": 8.927057600051594e-06,
|
|
"loss": 2.9781904220581055,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.8781512605042017,
|
|
"grad_norm": 32.19803137859573,
|
|
"learning_rate": 8.919479474429462e-06,
|
|
"loss": 3.3312220573425293,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.8802521008403361,
|
|
"grad_norm": 23.418640662777587,
|
|
"learning_rate": 8.911877921538117e-06,
|
|
"loss": 3.8054161071777344,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 7.760210305795623,
|
|
"learning_rate": 8.904252986813091e-06,
|
|
"loss": 2.8041489124298096,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.884453781512605,
|
|
"grad_norm": 13.790720201964906,
|
|
"learning_rate": 8.896604715829671e-06,
|
|
"loss": 2.8391265869140625,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.8865546218487395,
|
|
"grad_norm": 8.558877313925247,
|
|
"learning_rate": 8.888933154302626e-06,
|
|
"loss": 2.6835553646087646,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.8886554621848739,
|
|
"grad_norm": 21.689551042379083,
|
|
"learning_rate": 8.881238348085936e-06,
|
|
"loss": 2.6738481521606445,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.8907563025210085,
|
|
"grad_norm": 7.686758427886692,
|
|
"learning_rate": 8.87352034317252e-06,
|
|
"loss": 2.619101047515869,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.8928571428571429,
|
|
"grad_norm": 20.40695143594997,
|
|
"learning_rate": 8.865779185693957e-06,
|
|
"loss": 3.3444905281066895,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.8949579831932774,
|
|
"grad_norm": 27.431337065110313,
|
|
"learning_rate": 8.858014921920215e-06,
|
|
"loss": 2.1527421474456787,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.8970588235294118,
|
|
"grad_norm": 20.606507987678672,
|
|
"learning_rate": 8.850227598259365e-06,
|
|
"loss": 2.6689836978912354,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.8991596638655462,
|
|
"grad_norm": 8.968995022440353,
|
|
"learning_rate": 8.842417261257316e-06,
|
|
"loss": 3.0119547843933105,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.9012605042016807,
|
|
"grad_norm": 28.528232969469133,
|
|
"learning_rate": 8.83458395759753e-06,
|
|
"loss": 2.482861042022705,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.9033613445378151,
|
|
"grad_norm": 10.074031458183692,
|
|
"learning_rate": 8.826727734100742e-06,
|
|
"loss": 2.8982067108154297,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.9054621848739496,
|
|
"grad_norm": 8.399253353390154,
|
|
"learning_rate": 8.818848637724681e-06,
|
|
"loss": 2.5004382133483887,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.907563025210084,
|
|
"grad_norm": 8.747805949968082,
|
|
"learning_rate": 8.810946715563798e-06,
|
|
"loss": 2.612011194229126,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.9096638655462185,
|
|
"grad_norm": 10.425702565789909,
|
|
"learning_rate": 8.803022014848966e-06,
|
|
"loss": 2.9700820446014404,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.9117647058823529,
|
|
"grad_norm": 11.029401754074971,
|
|
"learning_rate": 8.795074582947214e-06,
|
|
"loss": 3.248368263244629,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.9138655462184874,
|
|
"grad_norm": 9.336382488449228,
|
|
"learning_rate": 8.787104467361442e-06,
|
|
"loss": 2.993704319000244,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.9159663865546218,
|
|
"grad_norm": 13.327453834983904,
|
|
"learning_rate": 8.779111715730127e-06,
|
|
"loss": 2.6930155754089355,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.9180672268907563,
|
|
"grad_norm": 10.570908488031245,
|
|
"learning_rate": 8.771096375827047e-06,
|
|
"loss": 3.069434404373169,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.9201680672268907,
|
|
"grad_norm": 19.381962817436207,
|
|
"learning_rate": 8.763058495560994e-06,
|
|
"loss": 3.1358611583709717,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.9222689075630253,
|
|
"grad_norm": 18.418237048785702,
|
|
"learning_rate": 8.754998122975489e-06,
|
|
"loss": 3.2987184524536133,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.9243697478991597,
|
|
"grad_norm": 8.737803987239646,
|
|
"learning_rate": 8.746915306248488e-06,
|
|
"loss": 2.9279255867004395,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.9264705882352942,
|
|
"grad_norm": 13.117095498271222,
|
|
"learning_rate": 8.7388100936921e-06,
|
|
"loss": 2.795942783355713,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.9285714285714286,
|
|
"grad_norm": 25.973728201733575,
|
|
"learning_rate": 8.730682533752301e-06,
|
|
"loss": 2.7590699195861816,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.930672268907563,
|
|
"grad_norm": 9.543199289400748,
|
|
"learning_rate": 8.722532675008635e-06,
|
|
"loss": 2.6571459770202637,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.9327731092436975,
|
|
"grad_norm": 10.69198569405724,
|
|
"learning_rate": 8.714360566173932e-06,
|
|
"loss": 2.7342920303344727,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.9348739495798319,
|
|
"grad_norm": 13.298135717649288,
|
|
"learning_rate": 8.706166256094013e-06,
|
|
"loss": 2.9492366313934326,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.9369747899159664,
|
|
"grad_norm": 18.5856782117513,
|
|
"learning_rate": 8.6979497937474e-06,
|
|
"loss": 2.937699317932129,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.9390756302521008,
|
|
"grad_norm": 10.292297569389804,
|
|
"learning_rate": 8.689711228245021e-06,
|
|
"loss": 3.23824405670166,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.9411764705882353,
|
|
"grad_norm": 19.90454431534383,
|
|
"learning_rate": 8.681450608829916e-06,
|
|
"loss": 2.542668581008911,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.9432773109243697,
|
|
"grad_norm": 14.413143934794212,
|
|
"learning_rate": 8.67316798487695e-06,
|
|
"loss": 3.257632255554199,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.9453781512605042,
|
|
"grad_norm": 10.80231465762936,
|
|
"learning_rate": 8.664863405892506e-06,
|
|
"loss": 2.7072958946228027,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9474789915966386,
|
|
"grad_norm": 18.020582485094227,
|
|
"learning_rate": 8.656536921514195e-06,
|
|
"loss": 2.532301664352417,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.9495798319327731,
|
|
"grad_norm": 12.503896279810512,
|
|
"learning_rate": 8.648188581510567e-06,
|
|
"loss": 2.726604461669922,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.9516806722689075,
|
|
"grad_norm": 18.785189447389097,
|
|
"learning_rate": 8.639818435780797e-06,
|
|
"loss": 2.516594886779785,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.9537815126050421,
|
|
"grad_norm": 12.0120687102085,
|
|
"learning_rate": 8.631426534354404e-06,
|
|
"loss": 2.7706644535064697,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.9558823529411765,
|
|
"grad_norm": 11.506720081337315,
|
|
"learning_rate": 8.623012927390936e-06,
|
|
"loss": 3.2427144050598145,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.957983193277311,
|
|
"grad_norm": 10.11083550503784,
|
|
"learning_rate": 8.614577665179684e-06,
|
|
"loss": 3.1202523708343506,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.9600840336134454,
|
|
"grad_norm": 15.945109216294865,
|
|
"learning_rate": 8.606120798139375e-06,
|
|
"loss": 2.6210598945617676,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.9621848739495799,
|
|
"grad_norm": 9.09618149788864,
|
|
"learning_rate": 8.597642376817865e-06,
|
|
"loss": 2.669271469116211,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.9642857142857143,
|
|
"grad_norm": 8.714640631605363,
|
|
"learning_rate": 8.589142451891849e-06,
|
|
"loss": 2.6489734649658203,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.9663865546218487,
|
|
"grad_norm": 7.855597298788909,
|
|
"learning_rate": 8.580621074166553e-06,
|
|
"loss": 3.10178804397583,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.9684873949579832,
|
|
"grad_norm": 10.502691052340555,
|
|
"learning_rate": 8.572078294575423e-06,
|
|
"loss": 2.589158296585083,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.9705882352941176,
|
|
"grad_norm": 10.459968052493494,
|
|
"learning_rate": 8.56351416417983e-06,
|
|
"loss": 2.5543792247772217,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.9726890756302521,
|
|
"grad_norm": 12.885512846289808,
|
|
"learning_rate": 8.554928734168767e-06,
|
|
"loss": 2.65985369682312,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.9747899159663865,
|
|
"grad_norm": 9.639047199230617,
|
|
"learning_rate": 8.546322055858526e-06,
|
|
"loss": 3.0177440643310547,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.976890756302521,
|
|
"grad_norm": 9.494268049756599,
|
|
"learning_rate": 8.537694180692416e-06,
|
|
"loss": 2.2767248153686523,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.9789915966386554,
|
|
"grad_norm": 12.56887928459161,
|
|
"learning_rate": 8.529045160240433e-06,
|
|
"loss": 2.7835707664489746,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.9810924369747899,
|
|
"grad_norm": 10.580355179128095,
|
|
"learning_rate": 8.520375046198965e-06,
|
|
"loss": 2.4373722076416016,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.9831932773109243,
|
|
"grad_norm": 10.13582135951574,
|
|
"learning_rate": 8.51168389039048e-06,
|
|
"loss": 2.464303731918335,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.9852941176470589,
|
|
"grad_norm": 12.209700818401375,
|
|
"learning_rate": 8.502971744763216e-06,
|
|
"loss": 2.2609100341796875,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.9873949579831933,
|
|
"grad_norm": 21.359445929891656,
|
|
"learning_rate": 8.494238661390865e-06,
|
|
"loss": 3.0135858058929443,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9894957983193278,
|
|
"grad_norm": 15.087072293517004,
|
|
"learning_rate": 8.485484692472272e-06,
|
|
"loss": 2.770965099334717,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.9915966386554622,
|
|
"grad_norm": 8.181199645745421,
|
|
"learning_rate": 8.476709890331116e-06,
|
|
"loss": 2.6243722438812256,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.9936974789915967,
|
|
"grad_norm": 7.527423998031555,
|
|
"learning_rate": 8.467914307415601e-06,
|
|
"loss": 2.9319207668304443,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.9957983193277311,
|
|
"grad_norm": 9.424234237676545,
|
|
"learning_rate": 8.459097996298137e-06,
|
|
"loss": 3.0626072883605957,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.9978991596638656,
|
|
"grad_norm": 14.444274317338678,
|
|
"learning_rate": 8.45026100967503e-06,
|
|
"loss": 3.000889778137207,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 8.45019782867115,
|
|
"learning_rate": 8.441403400366169e-06,
|
|
"loss": 3.112825393676758,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 1.0021008403361344,
|
|
"grad_norm": 19.596775314152666,
|
|
"learning_rate": 8.432525221314708e-06,
|
|
"loss": 1.4137624502182007,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 1.004201680672269,
|
|
"grad_norm": 9.233023882113994,
|
|
"learning_rate": 8.423626525586744e-06,
|
|
"loss": 1.6808059215545654,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 1.0063025210084033,
|
|
"grad_norm": 9.789186389046735,
|
|
"learning_rate": 8.414707366371006e-06,
|
|
"loss": 1.8797330856323242,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 1.0084033613445378,
|
|
"grad_norm": 7.894274079237724,
|
|
"learning_rate": 8.405767796978546e-06,
|
|
"loss": 1.9548699855804443,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.0105042016806722,
|
|
"grad_norm": 11.882995555931503,
|
|
"learning_rate": 8.396807870842396e-06,
|
|
"loss": 1.5713114738464355,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 1.0126050420168067,
|
|
"grad_norm": 14.948396348319923,
|
|
"learning_rate": 8.387827641517274e-06,
|
|
"loss": 1.69504976272583,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 1.0147058823529411,
|
|
"grad_norm": 6.935744624929541,
|
|
"learning_rate": 8.378827162679248e-06,
|
|
"loss": 1.3813257217407227,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 1.0168067226890756,
|
|
"grad_norm": 9.50729885231966,
|
|
"learning_rate": 8.369806488125418e-06,
|
|
"loss": 2.4568567276000977,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 1.01890756302521,
|
|
"grad_norm": 11.62592077082348,
|
|
"learning_rate": 8.360765671773603e-06,
|
|
"loss": 2.602184534072876,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 1.0210084033613445,
|
|
"grad_norm": 15.469624436922395,
|
|
"learning_rate": 8.351704767662005e-06,
|
|
"loss": 1.8193070888519287,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 1.023109243697479,
|
|
"grad_norm": 12.389371131721145,
|
|
"learning_rate": 8.3426238299489e-06,
|
|
"loss": 1.4549766778945923,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 1.0252100840336134,
|
|
"grad_norm": 7.898711913261212,
|
|
"learning_rate": 8.333522912912308e-06,
|
|
"loss": 1.4681106805801392,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 1.0273109243697478,
|
|
"grad_norm": 14.553557605821632,
|
|
"learning_rate": 8.324402070949658e-06,
|
|
"loss": 1.4224164485931396,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 1.0294117647058822,
|
|
"grad_norm": 21.0322684953627,
|
|
"learning_rate": 8.315261358577485e-06,
|
|
"loss": 2.200676441192627,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.0315126050420167,
|
|
"grad_norm": 14.230965851092702,
|
|
"learning_rate": 8.306100830431085e-06,
|
|
"loss": 1.867397665977478,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 1.0336134453781514,
|
|
"grad_norm": 11.330315084805383,
|
|
"learning_rate": 8.296920541264197e-06,
|
|
"loss": 1.4270985126495361,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 1.0357142857142858,
|
|
"grad_norm": 11.452248734086307,
|
|
"learning_rate": 8.287720545948676e-06,
|
|
"loss": 1.464069128036499,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 1.0378151260504203,
|
|
"grad_norm": 18.476525141242952,
|
|
"learning_rate": 8.278500899474162e-06,
|
|
"loss": 1.192551612854004,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 1.0399159663865547,
|
|
"grad_norm": 13.695173322132312,
|
|
"learning_rate": 8.269261656947755e-06,
|
|
"loss": 2.367762327194214,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.0420168067226891,
|
|
"grad_norm": 12.101022572223535,
|
|
"learning_rate": 8.260002873593679e-06,
|
|
"loss": 1.6752372980117798,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 1.0441176470588236,
|
|
"grad_norm": 14.763270168918805,
|
|
"learning_rate": 8.25072460475296e-06,
|
|
"loss": 1.409712314605713,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 1.046218487394958,
|
|
"grad_norm": 12.622229054224464,
|
|
"learning_rate": 8.24142690588309e-06,
|
|
"loss": 1.6270588636398315,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 1.0483193277310925,
|
|
"grad_norm": 7.889964988601032,
|
|
"learning_rate": 8.232109832557696e-06,
|
|
"loss": 1.4294947385787964,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 1.050420168067227,
|
|
"grad_norm": 9.640341277497848,
|
|
"learning_rate": 8.222773440466213e-06,
|
|
"loss": 1.2340010404586792,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.0525210084033614,
|
|
"grad_norm": 9.361065825268032,
|
|
"learning_rate": 8.213417785413538e-06,
|
|
"loss": 1.451041340827942,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 1.0546218487394958,
|
|
"grad_norm": 10.851800895184763,
|
|
"learning_rate": 8.204042923319717e-06,
|
|
"loss": 0.8124719858169556,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 1.0567226890756303,
|
|
"grad_norm": 13.939415896202156,
|
|
"learning_rate": 8.19464891021959e-06,
|
|
"loss": 1.5310864448547363,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 1.0588235294117647,
|
|
"grad_norm": 12.545903899817956,
|
|
"learning_rate": 8.18523580226247e-06,
|
|
"loss": 1.2139228582382202,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 1.0609243697478992,
|
|
"grad_norm": 7.8688457688530455,
|
|
"learning_rate": 8.1758036557118e-06,
|
|
"loss": 1.3573241233825684,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.0630252100840336,
|
|
"grad_norm": 29.274148786110516,
|
|
"learning_rate": 8.166352526944821e-06,
|
|
"loss": 1.9899749755859375,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 1.065126050420168,
|
|
"grad_norm": 12.789841758713314,
|
|
"learning_rate": 8.156882472452232e-06,
|
|
"loss": 1.4103593826293945,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 1.0672268907563025,
|
|
"grad_norm": 11.46688535188232,
|
|
"learning_rate": 8.147393548837856e-06,
|
|
"loss": 1.227393627166748,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 1.069327731092437,
|
|
"grad_norm": 11.67493017233716,
|
|
"learning_rate": 8.137885812818296e-06,
|
|
"loss": 1.7060927152633667,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 1.0714285714285714,
|
|
"grad_norm": 13.183390423963338,
|
|
"learning_rate": 8.128359321222601e-06,
|
|
"loss": 1.890432357788086,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.0735294117647058,
|
|
"grad_norm": 8.769195455641308,
|
|
"learning_rate": 8.118814130991925e-06,
|
|
"loss": 1.8258857727050781,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 1.0756302521008403,
|
|
"grad_norm": 9.016866647141889,
|
|
"learning_rate": 8.109250299179188e-06,
|
|
"loss": 0.9584097862243652,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 1.0777310924369747,
|
|
"grad_norm": 8.866656672277916,
|
|
"learning_rate": 8.09966788294873e-06,
|
|
"loss": 1.4017150402069092,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 1.0798319327731092,
|
|
"grad_norm": 12.12920225890514,
|
|
"learning_rate": 8.090066939575972e-06,
|
|
"loss": 1.3034381866455078,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 1.0819327731092436,
|
|
"grad_norm": 11.169332765461306,
|
|
"learning_rate": 8.080447526447079e-06,
|
|
"loss": 1.0734150409698486,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.084033613445378,
|
|
"grad_norm": 15.988980575396647,
|
|
"learning_rate": 8.070809701058606e-06,
|
|
"loss": 0.8819087743759155,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 1.0861344537815125,
|
|
"grad_norm": 10.445041930863859,
|
|
"learning_rate": 8.061153521017169e-06,
|
|
"loss": 1.3253920078277588,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 1.088235294117647,
|
|
"grad_norm": 7.477532974278996,
|
|
"learning_rate": 8.051479044039086e-06,
|
|
"loss": 1.0912744998931885,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.0903361344537814,
|
|
"grad_norm": 15.386742532344485,
|
|
"learning_rate": 8.041786327950037e-06,
|
|
"loss": 1.6941767930984497,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.092436974789916,
|
|
"grad_norm": 13.631587045212196,
|
|
"learning_rate": 8.032075430684724e-06,
|
|
"loss": 1.058671236038208,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.0945378151260505,
|
|
"grad_norm": 9.174394889796707,
|
|
"learning_rate": 8.02234641028652e-06,
|
|
"loss": 1.1603420972824097,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.096638655462185,
|
|
"grad_norm": 18.009634618634845,
|
|
"learning_rate": 8.012599324907121e-06,
|
|
"loss": 1.4285218715667725,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.0987394957983194,
|
|
"grad_norm": 18.317588738929096,
|
|
"learning_rate": 8.0028342328062e-06,
|
|
"loss": 1.3041057586669922,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.1008403361344539,
|
|
"grad_norm": 17.245361771703262,
|
|
"learning_rate": 7.993051192351056e-06,
|
|
"loss": 2.329005718231201,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.1029411764705883,
|
|
"grad_norm": 5.466501144551759,
|
|
"learning_rate": 7.983250262016276e-06,
|
|
"loss": 0.7331016063690186,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.1050420168067228,
|
|
"grad_norm": 19.76792957260025,
|
|
"learning_rate": 7.973431500383366e-06,
|
|
"loss": 2.193528175354004,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.1071428571428572,
|
|
"grad_norm": 11.04973790435175,
|
|
"learning_rate": 7.963594966140423e-06,
|
|
"loss": 1.3245251178741455,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.1092436974789917,
|
|
"grad_norm": 14.50002827076454,
|
|
"learning_rate": 7.953740718081765e-06,
|
|
"loss": 1.1308670043945312,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.111344537815126,
|
|
"grad_norm": 8.457254255014693,
|
|
"learning_rate": 7.943868815107594e-06,
|
|
"loss": 1.3318034410476685,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.1134453781512605,
|
|
"grad_norm": 12.48006901565296,
|
|
"learning_rate": 7.933979316223632e-06,
|
|
"loss": 1.2564438581466675,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.115546218487395,
|
|
"grad_norm": 13.952521489657013,
|
|
"learning_rate": 7.92407228054078e-06,
|
|
"loss": 1.2420412302017212,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.1176470588235294,
|
|
"grad_norm": 11.927118732913993,
|
|
"learning_rate": 7.914147767274756e-06,
|
|
"loss": 1.9582582712173462,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.1197478991596639,
|
|
"grad_norm": 29.1836862977554,
|
|
"learning_rate": 7.904205835745744e-06,
|
|
"loss": 1.7057411670684814,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.1218487394957983,
|
|
"grad_norm": 8.77699695792644,
|
|
"learning_rate": 7.894246545378037e-06,
|
|
"loss": 1.810387134552002,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.1239495798319328,
|
|
"grad_norm": 11.812154757139437,
|
|
"learning_rate": 7.884269955699689e-06,
|
|
"loss": 1.6038577556610107,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.1260504201680672,
|
|
"grad_norm": 11.347334970124107,
|
|
"learning_rate": 7.874276126342151e-06,
|
|
"loss": 1.1410393714904785,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.1281512605042017,
|
|
"grad_norm": 23.969457981422316,
|
|
"learning_rate": 7.86426511703992e-06,
|
|
"loss": 2.28239369392395,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.1302521008403361,
|
|
"grad_norm": 11.3793937172999,
|
|
"learning_rate": 7.854236987630178e-06,
|
|
"loss": 2.1672444343566895,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.1323529411764706,
|
|
"grad_norm": 8.571185039369908,
|
|
"learning_rate": 7.844191798052438e-06,
|
|
"loss": 1.7712535858154297,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.134453781512605,
|
|
"grad_norm": 8.155743104110897,
|
|
"learning_rate": 7.834129608348183e-06,
|
|
"loss": 1.4109793901443481,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.1365546218487395,
|
|
"grad_norm": 12.006945471100122,
|
|
"learning_rate": 7.824050478660506e-06,
|
|
"loss": 1.4405725002288818,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.138655462184874,
|
|
"grad_norm": 16.24385934265993,
|
|
"learning_rate": 7.813954469233758e-06,
|
|
"loss": 2.2450976371765137,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.1407563025210083,
|
|
"grad_norm": 63.00358955157523,
|
|
"learning_rate": 7.803841640413177e-06,
|
|
"loss": 2.16367244720459,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.1428571428571428,
|
|
"grad_norm": 10.995277933527825,
|
|
"learning_rate": 7.793712052644535e-06,
|
|
"loss": 2.3919224739074707,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.1449579831932772,
|
|
"grad_norm": 9.931645247221951,
|
|
"learning_rate": 7.783565766473777e-06,
|
|
"loss": 1.4211726188659668,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.1470588235294117,
|
|
"grad_norm": 12.106564772704573,
|
|
"learning_rate": 7.773402842546654e-06,
|
|
"loss": 1.2502498626708984,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.1491596638655461,
|
|
"grad_norm": 8.144149987908426,
|
|
"learning_rate": 7.76322334160836e-06,
|
|
"loss": 1.423762321472168,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.1512605042016806,
|
|
"grad_norm": 10.152738619426868,
|
|
"learning_rate": 7.75302732450318e-06,
|
|
"loss": 1.1090279817581177,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.153361344537815,
|
|
"grad_norm": 11.024880610484013,
|
|
"learning_rate": 7.742814852174112e-06,
|
|
"loss": 1.0321426391601562,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.1554621848739495,
|
|
"grad_norm": 10.4112886492949,
|
|
"learning_rate": 7.73258598566251e-06,
|
|
"loss": 1.0928632020950317,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.157563025210084,
|
|
"grad_norm": 17.17079853756711,
|
|
"learning_rate": 7.72234078610772e-06,
|
|
"loss": 1.2369472980499268,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.1596638655462184,
|
|
"grad_norm": 12.662228894532866,
|
|
"learning_rate": 7.712079314746716e-06,
|
|
"loss": 1.2957392930984497,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.161764705882353,
|
|
"grad_norm": 8.967923305212855,
|
|
"learning_rate": 7.701801632913722e-06,
|
|
"loss": 1.6709070205688477,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.1638655462184875,
|
|
"grad_norm": 9.520057506790387,
|
|
"learning_rate": 7.691507802039861e-06,
|
|
"loss": 1.6091077327728271,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.165966386554622,
|
|
"grad_norm": 12.924582534581134,
|
|
"learning_rate": 7.68119788365278e-06,
|
|
"loss": 1.8003133535385132,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.1680672268907564,
|
|
"grad_norm": 8.027840739484652,
|
|
"learning_rate": 7.670871939376281e-06,
|
|
"loss": 1.0151593685150146,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 1.1701680672268908,
|
|
"grad_norm": 10.792867985796137,
|
|
"learning_rate": 7.660530030929961e-06,
|
|
"loss": 1.3084783554077148,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 1.1722689075630253,
|
|
"grad_norm": 10.336895443268714,
|
|
"learning_rate": 7.650172220128828e-06,
|
|
"loss": 1.3882572650909424,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 1.1743697478991597,
|
|
"grad_norm": 11.46121788240209,
|
|
"learning_rate": 7.639798568882947e-06,
|
|
"loss": 1.3919298648834229,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 11.442052901701038,
|
|
"learning_rate": 7.629409139197063e-06,
|
|
"loss": 1.3745830059051514,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.1785714285714286,
|
|
"grad_norm": 22.18812336562329,
|
|
"learning_rate": 7.619003993170226e-06,
|
|
"loss": 1.2964568138122559,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 1.180672268907563,
|
|
"grad_norm": 14.305068677598294,
|
|
"learning_rate": 7.608583192995433e-06,
|
|
"loss": 1.75518798828125,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 1.1827731092436975,
|
|
"grad_norm": 38.68186634407232,
|
|
"learning_rate": 7.598146800959238e-06,
|
|
"loss": 2.156588554382324,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 1.184873949579832,
|
|
"grad_norm": 15.824079652626462,
|
|
"learning_rate": 7.5876948794414015e-06,
|
|
"loss": 1.3602566719055176,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 1.1869747899159664,
|
|
"grad_norm": 13.134376075413467,
|
|
"learning_rate": 7.577227490914495e-06,
|
|
"loss": 1.5620733499526978,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.1890756302521008,
|
|
"grad_norm": 9.860309886809128,
|
|
"learning_rate": 7.5667446979435445e-06,
|
|
"loss": 0.971282422542572,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 1.1911764705882353,
|
|
"grad_norm": 11.617960075857892,
|
|
"learning_rate": 7.556246563185648e-06,
|
|
"loss": 1.1717581748962402,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 1.1932773109243697,
|
|
"grad_norm": 14.78629106010037,
|
|
"learning_rate": 7.545733149389605e-06,
|
|
"loss": 1.8813025951385498,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 1.1953781512605042,
|
|
"grad_norm": 11.92994585452875,
|
|
"learning_rate": 7.535204519395538e-06,
|
|
"loss": 1.280207633972168,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 1.1974789915966386,
|
|
"grad_norm": 14.537731397359755,
|
|
"learning_rate": 7.5246607361345215e-06,
|
|
"loss": 1.5685778856277466,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.199579831932773,
|
|
"grad_norm": 9.978722079402786,
|
|
"learning_rate": 7.514101862628203e-06,
|
|
"loss": 2.2011172771453857,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 1.2016806722689075,
|
|
"grad_norm": 14.328584272935853,
|
|
"learning_rate": 7.503527961988422e-06,
|
|
"loss": 2.0038180351257324,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 1.203781512605042,
|
|
"grad_norm": 11.49676437218398,
|
|
"learning_rate": 7.492939097416842e-06,
|
|
"loss": 1.1275922060012817,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 1.2058823529411764,
|
|
"grad_norm": 13.603928637496292,
|
|
"learning_rate": 7.482335332204568e-06,
|
|
"loss": 1.208678960800171,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 1.2079831932773109,
|
|
"grad_norm": 10.710849924738463,
|
|
"learning_rate": 7.471716729731764e-06,
|
|
"loss": 1.7450125217437744,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.2100840336134453,
|
|
"grad_norm": 10.8408813790809,
|
|
"learning_rate": 7.461083353467283e-06,
|
|
"loss": 1.5381510257720947,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 1.2121848739495797,
|
|
"grad_norm": 10.502717838660322,
|
|
"learning_rate": 7.450435266968279e-06,
|
|
"loss": 1.6857651472091675,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 1.2142857142857142,
|
|
"grad_norm": 10.194196645130454,
|
|
"learning_rate": 7.4397725338798365e-06,
|
|
"loss": 1.9049471616744995,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 1.2163865546218489,
|
|
"grad_norm": 8.336901180250376,
|
|
"learning_rate": 7.429095217934578e-06,
|
|
"loss": 2.2398974895477295,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 1.2184873949579833,
|
|
"grad_norm": 8.289301563947674,
|
|
"learning_rate": 7.4184033829522935e-06,
|
|
"loss": 1.8767409324645996,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.2205882352941178,
|
|
"grad_norm": 7.83258681688038,
|
|
"learning_rate": 7.4076970928395565e-06,
|
|
"loss": 1.4787061214447021,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 1.2226890756302522,
|
|
"grad_norm": 11.288493150816146,
|
|
"learning_rate": 7.396976411589338e-06,
|
|
"loss": 1.1055876016616821,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 1.2247899159663866,
|
|
"grad_norm": 8.627197279612671,
|
|
"learning_rate": 7.386241403280629e-06,
|
|
"loss": 1.668757438659668,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 1.226890756302521,
|
|
"grad_norm": 7.9829732080808276,
|
|
"learning_rate": 7.375492132078051e-06,
|
|
"loss": 1.2818783521652222,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 1.2289915966386555,
|
|
"grad_norm": 9.132163063845432,
|
|
"learning_rate": 7.364728662231484e-06,
|
|
"loss": 1.578829050064087,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.23109243697479,
|
|
"grad_norm": 9.541187433357738,
|
|
"learning_rate": 7.353951058075669e-06,
|
|
"loss": 1.572939157485962,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 1.2331932773109244,
|
|
"grad_norm": 29.472008336805924,
|
|
"learning_rate": 7.343159384029833e-06,
|
|
"loss": 3.977992057800293,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 1.2352941176470589,
|
|
"grad_norm": 12.577355177733914,
|
|
"learning_rate": 7.332353704597299e-06,
|
|
"loss": 1.955003023147583,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 1.2373949579831933,
|
|
"grad_norm": 10.61755598072498,
|
|
"learning_rate": 7.321534084365101e-06,
|
|
"loss": 1.5401737689971924,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 1.2394957983193278,
|
|
"grad_norm": 17.052134953118316,
|
|
"learning_rate": 7.310700588003605e-06,
|
|
"loss": 1.895308017730713,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.2415966386554622,
|
|
"grad_norm": 13.914617942504853,
|
|
"learning_rate": 7.299853280266109e-06,
|
|
"loss": 1.6920474767684937,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 1.2436974789915967,
|
|
"grad_norm": 18.300626922757814,
|
|
"learning_rate": 7.28899222598847e-06,
|
|
"loss": 1.9865736961364746,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 1.245798319327731,
|
|
"grad_norm": 8.604822405832417,
|
|
"learning_rate": 7.278117490088703e-06,
|
|
"loss": 1.2350941896438599,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 1.2478991596638656,
|
|
"grad_norm": 15.714181520858954,
|
|
"learning_rate": 7.267229137566607e-06,
|
|
"loss": 1.800095558166504,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 7.625924941471246,
|
|
"learning_rate": 7.256327233503365e-06,
|
|
"loss": 1.848137617111206,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.2521008403361344,
|
|
"grad_norm": 8.480492494477819,
|
|
"learning_rate": 7.24541184306116e-06,
|
|
"loss": 1.7656617164611816,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 1.254201680672269,
|
|
"grad_norm": 11.960261973795399,
|
|
"learning_rate": 7.234483031482787e-06,
|
|
"loss": 1.0096323490142822,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 1.2563025210084033,
|
|
"grad_norm": 9.709001923888373,
|
|
"learning_rate": 7.223540864091259e-06,
|
|
"loss": 1.428197979927063,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 1.2584033613445378,
|
|
"grad_norm": 11.628908186348927,
|
|
"learning_rate": 7.2125854062894184e-06,
|
|
"loss": 1.0703970193862915,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 1.2605042016806722,
|
|
"grad_norm": 14.20204722362147,
|
|
"learning_rate": 7.201616723559548e-06,
|
|
"loss": 1.7873646020889282,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.2626050420168067,
|
|
"grad_norm": 12.326258967391198,
|
|
"learning_rate": 7.190634881462976e-06,
|
|
"loss": 1.3262135982513428,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 1.2647058823529411,
|
|
"grad_norm": 13.762619560991299,
|
|
"learning_rate": 7.179639945639688e-06,
|
|
"loss": 1.6294150352478027,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 1.2668067226890756,
|
|
"grad_norm": 12.793929462404881,
|
|
"learning_rate": 7.168631981807931e-06,
|
|
"loss": 2.6409220695495605,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 1.26890756302521,
|
|
"grad_norm": 12.75285051440542,
|
|
"learning_rate": 7.15761105576382e-06,
|
|
"loss": 1.3407433032989502,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 1.2710084033613445,
|
|
"grad_norm": 11.811026706721915,
|
|
"learning_rate": 7.1465772333809524e-06,
|
|
"loss": 1.1475789546966553,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.273109243697479,
|
|
"grad_norm": 16.182274466548407,
|
|
"learning_rate": 7.1355305806100036e-06,
|
|
"loss": 1.8270117044448853,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 1.2752100840336134,
|
|
"grad_norm": 9.390889705782493,
|
|
"learning_rate": 7.124471163478344e-06,
|
|
"loss": 2.168900489807129,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 1.2773109243697478,
|
|
"grad_norm": 14.960557905830523,
|
|
"learning_rate": 7.113399048089631e-06,
|
|
"loss": 2.0142345428466797,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 1.2794117647058822,
|
|
"grad_norm": 14.63642311907181,
|
|
"learning_rate": 7.102314300623425e-06,
|
|
"loss": 2.015444755554199,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 1.2815126050420167,
|
|
"grad_norm": 13.291155405094099,
|
|
"learning_rate": 7.091216987334792e-06,
|
|
"loss": 1.5882906913757324,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.2836134453781511,
|
|
"grad_norm": 17.727064634923273,
|
|
"learning_rate": 7.080107174553903e-06,
|
|
"loss": 1.4543545246124268,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 13.123573018342379,
|
|
"learning_rate": 7.068984928685638e-06,
|
|
"loss": 1.3196444511413574,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 1.28781512605042,
|
|
"grad_norm": 11.204963124082711,
|
|
"learning_rate": 7.057850316209198e-06,
|
|
"loss": 0.8601089715957642,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 1.2899159663865547,
|
|
"grad_norm": 11.507041064870066,
|
|
"learning_rate": 7.0467034036776945e-06,
|
|
"loss": 1.334380865097046,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 1.2920168067226891,
|
|
"grad_norm": 9.153184893600336,
|
|
"learning_rate": 7.035544257717761e-06,
|
|
"loss": 1.4980111122131348,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.2941176470588236,
|
|
"grad_norm": 11.208470095807519,
|
|
"learning_rate": 7.024372945029152e-06,
|
|
"loss": 1.9393174648284912,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 1.296218487394958,
|
|
"grad_norm": 9.33539024674701,
|
|
"learning_rate": 7.013189532384343e-06,
|
|
"loss": 1.1070374250411987,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 1.2983193277310925,
|
|
"grad_norm": 22.088040059228636,
|
|
"learning_rate": 7.001994086628133e-06,
|
|
"loss": 2.146557331085205,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 1.300420168067227,
|
|
"grad_norm": 12.461539796415895,
|
|
"learning_rate": 6.990786674677246e-06,
|
|
"loss": 1.097703456878662,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 1.3025210084033614,
|
|
"grad_norm": 10.337144677645794,
|
|
"learning_rate": 6.979567363519927e-06,
|
|
"loss": 1.9619685411453247,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.3046218487394958,
|
|
"grad_norm": 8.583774398203186,
|
|
"learning_rate": 6.9683362202155465e-06,
|
|
"loss": 1.2424434423446655,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 1.3067226890756303,
|
|
"grad_norm": 10.235846664061171,
|
|
"learning_rate": 6.957093311894199e-06,
|
|
"loss": 1.8912100791931152,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 1.3088235294117647,
|
|
"grad_norm": 12.7496233438477,
|
|
"learning_rate": 6.945838705756293e-06,
|
|
"loss": 1.4234580993652344,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 1.3109243697478992,
|
|
"grad_norm": 12.664108172155123,
|
|
"learning_rate": 6.934572469072163e-06,
|
|
"loss": 1.7631306648254395,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 1.3130252100840336,
|
|
"grad_norm": 9.043940926283064,
|
|
"learning_rate": 6.923294669181659e-06,
|
|
"loss": 1.275686264038086,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 1.315126050420168,
|
|
"grad_norm": 7.7562010562396155,
|
|
"learning_rate": 6.912005373493747e-06,
|
|
"loss": 1.8493428230285645,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 1.3172268907563025,
|
|
"grad_norm": 10.778946101337466,
|
|
"learning_rate": 6.900704649486103e-06,
|
|
"loss": 1.0401699542999268,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 1.319327731092437,
|
|
"grad_norm": 9.474741424665671,
|
|
"learning_rate": 6.889392564704712e-06,
|
|
"loss": 1.932092547416687,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 1.3214285714285714,
|
|
"grad_norm": 18.187204049633937,
|
|
"learning_rate": 6.878069186763466e-06,
|
|
"loss": 2.0269484519958496,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 1.3235294117647058,
|
|
"grad_norm": 12.371698082139902,
|
|
"learning_rate": 6.866734583343753e-06,
|
|
"loss": 1.6765419244766235,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.3256302521008403,
|
|
"grad_norm": 20.859173211033255,
|
|
"learning_rate": 6.855388822194061e-06,
|
|
"loss": 1.7931967973709106,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 1.3277310924369747,
|
|
"grad_norm": 10.82122547870125,
|
|
"learning_rate": 6.844031971129571e-06,
|
|
"loss": 0.9582860469818115,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 1.3298319327731092,
|
|
"grad_norm": 13.255068667352083,
|
|
"learning_rate": 6.8326640980317475e-06,
|
|
"loss": 1.7692348957061768,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 1.3319327731092436,
|
|
"grad_norm": 12.632729369596628,
|
|
"learning_rate": 6.821285270847934e-06,
|
|
"loss": 2.143463373184204,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 1.334033613445378,
|
|
"grad_norm": 31.76404047719635,
|
|
"learning_rate": 6.80989555759095e-06,
|
|
"loss": 2.290733814239502,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 1.3361344537815127,
|
|
"grad_norm": 18.9996916353526,
|
|
"learning_rate": 6.79849502633868e-06,
|
|
"loss": 1.4548063278198242,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 1.3382352941176472,
|
|
"grad_norm": 10.423656653462372,
|
|
"learning_rate": 6.787083745233674e-06,
|
|
"loss": 1.6137502193450928,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 1.3403361344537816,
|
|
"grad_norm": 9.260024233354208,
|
|
"learning_rate": 6.775661782482732e-06,
|
|
"loss": 1.277546763420105,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 1.342436974789916,
|
|
"grad_norm": 17.22623817552147,
|
|
"learning_rate": 6.764229206356498e-06,
|
|
"loss": 1.4183297157287598,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 1.3445378151260505,
|
|
"grad_norm": 13.655193467078059,
|
|
"learning_rate": 6.752786085189059e-06,
|
|
"loss": 2.352818012237549,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.346638655462185,
|
|
"grad_norm": 10.832229231352626,
|
|
"learning_rate": 6.741332487377525e-06,
|
|
"loss": 1.1966056823730469,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 1.3487394957983194,
|
|
"grad_norm": 16.450581846244585,
|
|
"learning_rate": 6.729868481381632e-06,
|
|
"loss": 2.1670610904693604,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 1.3508403361344539,
|
|
"grad_norm": 14.092922253172704,
|
|
"learning_rate": 6.718394135723321e-06,
|
|
"loss": 1.4478580951690674,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 1.3529411764705883,
|
|
"grad_norm": 9.381957347730207,
|
|
"learning_rate": 6.706909518986341e-06,
|
|
"loss": 1.1712067127227783,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 1.3550420168067228,
|
|
"grad_norm": 10.457393453015948,
|
|
"learning_rate": 6.695414699815828e-06,
|
|
"loss": 1.241437315940857,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 1.3571428571428572,
|
|
"grad_norm": 26.13803770478073,
|
|
"learning_rate": 6.6839097469179e-06,
|
|
"loss": 1.5295310020446777,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 1.3592436974789917,
|
|
"grad_norm": 7.790998799935961,
|
|
"learning_rate": 6.6723947290592505e-06,
|
|
"loss": 1.3555617332458496,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 1.361344537815126,
|
|
"grad_norm": 14.276342203489932,
|
|
"learning_rate": 6.660869715066725e-06,
|
|
"loss": 1.3158948421478271,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 1.3634453781512605,
|
|
"grad_norm": 7.962812237225353,
|
|
"learning_rate": 6.649334773826924e-06,
|
|
"loss": 1.7540979385375977,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 1.365546218487395,
|
|
"grad_norm": 12.139617079516373,
|
|
"learning_rate": 6.63778997428578e-06,
|
|
"loss": 1.7170000076293945,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.3676470588235294,
|
|
"grad_norm": 9.424987040512477,
|
|
"learning_rate": 6.626235385448152e-06,
|
|
"loss": 1.2551283836364746,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 1.3697478991596639,
|
|
"grad_norm": 11.731648378217931,
|
|
"learning_rate": 6.61467107637741e-06,
|
|
"loss": 1.468104362487793,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 1.3718487394957983,
|
|
"grad_norm": 22.089135904446437,
|
|
"learning_rate": 6.603097116195026e-06,
|
|
"loss": 1.3832511901855469,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 1.3739495798319328,
|
|
"grad_norm": 13.408397747285187,
|
|
"learning_rate": 6.591513574080152e-06,
|
|
"loss": 1.1895179748535156,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 1.3760504201680672,
|
|
"grad_norm": 10.506415017764112,
|
|
"learning_rate": 6.579920519269218e-06,
|
|
"loss": 1.57008957862854,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 1.3781512605042017,
|
|
"grad_norm": 7.3609644144158315,
|
|
"learning_rate": 6.568318021055512e-06,
|
|
"loss": 1.1686642169952393,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 1.3802521008403361,
|
|
"grad_norm": 19.968281534972263,
|
|
"learning_rate": 6.556706148788765e-06,
|
|
"loss": 1.831925392150879,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 1.3823529411764706,
|
|
"grad_norm": 14.858290457520233,
|
|
"learning_rate": 6.545084971874738e-06,
|
|
"loss": 1.1927814483642578,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 1.384453781512605,
|
|
"grad_norm": 13.471589913344788,
|
|
"learning_rate": 6.5334545597748075e-06,
|
|
"loss": 1.225053310394287,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 1.3865546218487395,
|
|
"grad_norm": 10.64391058893006,
|
|
"learning_rate": 6.521814982005552e-06,
|
|
"loss": 1.489911437034607,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.388655462184874,
|
|
"grad_norm": 14.084203318094486,
|
|
"learning_rate": 6.510166308138328e-06,
|
|
"loss": 1.3653918504714966,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 1.3907563025210083,
|
|
"grad_norm": 10.331380068295612,
|
|
"learning_rate": 6.498508607798872e-06,
|
|
"loss": 1.7082477807998657,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 1.3928571428571428,
|
|
"grad_norm": 10.828390377137284,
|
|
"learning_rate": 6.48684195066686e-06,
|
|
"loss": 1.1122634410858154,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 1.3949579831932772,
|
|
"grad_norm": 14.858926439296923,
|
|
"learning_rate": 6.475166406475515e-06,
|
|
"loss": 0.9572471380233765,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 1.3970588235294117,
|
|
"grad_norm": 17.02414371173566,
|
|
"learning_rate": 6.4634820450111715e-06,
|
|
"loss": 1.8282674551010132,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 1.3991596638655461,
|
|
"grad_norm": 12.803154660225488,
|
|
"learning_rate": 6.451788936112868e-06,
|
|
"loss": 1.2026221752166748,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 1.4012605042016806,
|
|
"grad_norm": 10.424391789653072,
|
|
"learning_rate": 6.440087149671932e-06,
|
|
"loss": 1.3183879852294922,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 1.403361344537815,
|
|
"grad_norm": 8.925077970843816,
|
|
"learning_rate": 6.428376755631553e-06,
|
|
"loss": 1.216771125793457,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 1.4054621848739495,
|
|
"grad_norm": 12.5613140309092,
|
|
"learning_rate": 6.41665782398637e-06,
|
|
"loss": 1.6759852170944214,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 1.407563025210084,
|
|
"grad_norm": 13.278399152081807,
|
|
"learning_rate": 6.404930424782052e-06,
|
|
"loss": 1.6593937873840332,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.4096638655462184,
|
|
"grad_norm": 11.405836988138063,
|
|
"learning_rate": 6.393194628114885e-06,
|
|
"loss": 1.672929286956787,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 1.4117647058823528,
|
|
"grad_norm": 13.1586681372233,
|
|
"learning_rate": 6.381450504131339e-06,
|
|
"loss": 1.2778139114379883,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 1.4138655462184873,
|
|
"grad_norm": 9.462564203496632,
|
|
"learning_rate": 6.369698123027664e-06,
|
|
"loss": 1.6472318172454834,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 1.415966386554622,
|
|
"grad_norm": 12.367075684146661,
|
|
"learning_rate": 6.357937555049465e-06,
|
|
"loss": 1.5301233530044556,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 1.4180672268907564,
|
|
"grad_norm": 9.502388277835697,
|
|
"learning_rate": 6.3461688704912735e-06,
|
|
"loss": 1.5423755645751953,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 1.4201680672268908,
|
|
"grad_norm": 13.340625777023925,
|
|
"learning_rate": 6.334392139696144e-06,
|
|
"loss": 0.8435590863227844,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 1.4222689075630253,
|
|
"grad_norm": 9.498703079540906,
|
|
"learning_rate": 6.322607433055217e-06,
|
|
"loss": 0.9243001937866211,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 1.4243697478991597,
|
|
"grad_norm": 16.692002135074148,
|
|
"learning_rate": 6.310814821007312e-06,
|
|
"loss": 1.1370623111724854,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 1.4264705882352942,
|
|
"grad_norm": 14.252581322539957,
|
|
"learning_rate": 6.299014374038493e-06,
|
|
"loss": 1.8121721744537354,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 12.146719665307664,
|
|
"learning_rate": 6.287206162681663e-06,
|
|
"loss": 1.5701857805252075,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.430672268907563,
|
|
"grad_norm": 11.383072024184132,
|
|
"learning_rate": 6.275390257516125e-06,
|
|
"loss": 1.7376922369003296,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.4327731092436975,
|
|
"grad_norm": 7.389859803918485,
|
|
"learning_rate": 6.263566729167177e-06,
|
|
"loss": 1.722080111503601,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 1.434873949579832,
|
|
"grad_norm": 11.051749495669629,
|
|
"learning_rate": 6.251735648305676e-06,
|
|
"loss": 1.8646998405456543,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 1.4369747899159664,
|
|
"grad_norm": 8.056416794494698,
|
|
"learning_rate": 6.239897085647624e-06,
|
|
"loss": 1.6373791694641113,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 1.4390756302521008,
|
|
"grad_norm": 14.269272004271027,
|
|
"learning_rate": 6.228051111953742e-06,
|
|
"loss": 1.5332825183868408,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 1.4411764705882353,
|
|
"grad_norm": 11.569261548147155,
|
|
"learning_rate": 6.216197798029049e-06,
|
|
"loss": 1.7713117599487305,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 1.4432773109243697,
|
|
"grad_norm": 8.013171505509781,
|
|
"learning_rate": 6.204337214722435e-06,
|
|
"loss": 1.3197343349456787,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 1.4453781512605042,
|
|
"grad_norm": 7.988425778687254,
|
|
"learning_rate": 6.192469432926241e-06,
|
|
"loss": 1.3940856456756592,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 1.4474789915966386,
|
|
"grad_norm": 14.591944041736712,
|
|
"learning_rate": 6.180594523575838e-06,
|
|
"loss": 2.0876762866973877,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 1.449579831932773,
|
|
"grad_norm": 13.421486753054541,
|
|
"learning_rate": 6.1687125576491945e-06,
|
|
"loss": 2.5141618251800537,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.4516806722689075,
|
|
"grad_norm": 11.284766831118931,
|
|
"learning_rate": 6.156823606166461e-06,
|
|
"loss": 0.9575009942054749,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 1.453781512605042,
|
|
"grad_norm": 10.180747973970707,
|
|
"learning_rate": 6.144927740189537e-06,
|
|
"loss": 1.2732771635055542,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 1.4558823529411764,
|
|
"grad_norm": 18.346962905469923,
|
|
"learning_rate": 6.133025030821656e-06,
|
|
"loss": 1.0447793006896973,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 1.4579831932773109,
|
|
"grad_norm": 9.72623535803224,
|
|
"learning_rate": 6.12111554920695e-06,
|
|
"loss": 2.069892406463623,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 1.4600840336134453,
|
|
"grad_norm": 12.298140767363686,
|
|
"learning_rate": 6.1091993665300354e-06,
|
|
"loss": 1.4193060398101807,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 1.46218487394958,
|
|
"grad_norm": 13.962463696814781,
|
|
"learning_rate": 6.0972765540155764e-06,
|
|
"loss": 1.8489269018173218,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 1.4642857142857144,
|
|
"grad_norm": 28.27872673921732,
|
|
"learning_rate": 6.08534718292787e-06,
|
|
"loss": 1.8245782852172852,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 1.4663865546218489,
|
|
"grad_norm": 11.704836274205533,
|
|
"learning_rate": 6.07341132457041e-06,
|
|
"loss": 1.8135966062545776,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 1.4684873949579833,
|
|
"grad_norm": 10.329932140617693,
|
|
"learning_rate": 6.061469050285469e-06,
|
|
"loss": 1.2886388301849365,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 7.515435335019253,
|
|
"learning_rate": 6.049520431453666e-06,
|
|
"loss": 1.8994669914245605,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.4726890756302522,
|
|
"grad_norm": 8.559344765158919,
|
|
"learning_rate": 6.037565539493542e-06,
|
|
"loss": 1.830640196800232,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 1.4747899159663866,
|
|
"grad_norm": 11.041841545301232,
|
|
"learning_rate": 6.025604445861137e-06,
|
|
"loss": 1.253919005393982,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 1.476890756302521,
|
|
"grad_norm": 8.205733723815058,
|
|
"learning_rate": 6.013637222049554e-06,
|
|
"loss": 1.4687739610671997,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 1.4789915966386555,
|
|
"grad_norm": 11.178455036225843,
|
|
"learning_rate": 6.0016639395885424e-06,
|
|
"loss": 0.8241528272628784,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 1.48109243697479,
|
|
"grad_norm": 9.38917529173767,
|
|
"learning_rate": 5.98968467004406e-06,
|
|
"loss": 0.9833969473838806,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 1.4831932773109244,
|
|
"grad_norm": 12.194704863353925,
|
|
"learning_rate": 5.977699485017855e-06,
|
|
"loss": 1.4603691101074219,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 1.4852941176470589,
|
|
"grad_norm": 15.962403885996371,
|
|
"learning_rate": 5.965708456147028e-06,
|
|
"loss": 1.2566254138946533,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 1.4873949579831933,
|
|
"grad_norm": 12.538339280369007,
|
|
"learning_rate": 5.953711655103615e-06,
|
|
"loss": 1.1779121160507202,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 1.4894957983193278,
|
|
"grad_norm": 13.11480387819097,
|
|
"learning_rate": 5.941709153594146e-06,
|
|
"loss": 2.1752524375915527,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 1.4915966386554622,
|
|
"grad_norm": 13.674164350381794,
|
|
"learning_rate": 5.92970102335923e-06,
|
|
"loss": 1.368391752243042,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.4936974789915967,
|
|
"grad_norm": 14.25334329729132,
|
|
"learning_rate": 5.917687336173116e-06,
|
|
"loss": 1.4870836734771729,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 1.495798319327731,
|
|
"grad_norm": 6.953756120767862,
|
|
"learning_rate": 5.905668163843269e-06,
|
|
"loss": 1.5822714567184448,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 1.4978991596638656,
|
|
"grad_norm": 11.644576591550592,
|
|
"learning_rate": 5.893643578209939e-06,
|
|
"loss": 1.5158865451812744,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 7.891690515863711,
|
|
"learning_rate": 5.881613651145732e-06,
|
|
"loss": 1.0833930969238281,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 1.5021008403361344,
|
|
"grad_norm": 10.374951659973064,
|
|
"learning_rate": 5.8695784545551815e-06,
|
|
"loss": 1.2957074642181396,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 1.504201680672269,
|
|
"grad_norm": 11.404797448034625,
|
|
"learning_rate": 5.8575380603743155e-06,
|
|
"loss": 1.541155457496643,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 1.5063025210084033,
|
|
"grad_norm": 9.289021596431404,
|
|
"learning_rate": 5.8454925405702326e-06,
|
|
"loss": 1.7509238719940186,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 1.5084033613445378,
|
|
"grad_norm": 15.014764167830913,
|
|
"learning_rate": 5.833441967140662e-06,
|
|
"loss": 1.8062071800231934,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 1.5105042016806722,
|
|
"grad_norm": 11.714801378063543,
|
|
"learning_rate": 5.821386412113546e-06,
|
|
"loss": 1.4850780963897705,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 1.5126050420168067,
|
|
"grad_norm": 12.362413690497235,
|
|
"learning_rate": 5.809325947546596e-06,
|
|
"loss": 1.1842257976531982,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.5147058823529411,
|
|
"grad_norm": 18.21069319599996,
|
|
"learning_rate": 5.797260645526873e-06,
|
|
"loss": 1.396120548248291,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 1.5168067226890756,
|
|
"grad_norm": 9.57576869626496,
|
|
"learning_rate": 5.785190578170351e-06,
|
|
"loss": 1.2990989685058594,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 1.51890756302521,
|
|
"grad_norm": 9.618803676236782,
|
|
"learning_rate": 5.773115817621487e-06,
|
|
"loss": 1.8467388153076172,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 1.5210084033613445,
|
|
"grad_norm": 11.794758813101549,
|
|
"learning_rate": 5.761036436052788e-06,
|
|
"loss": 1.6907732486724854,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 1.523109243697479,
|
|
"grad_norm": 10.45395101852907,
|
|
"learning_rate": 5.748952505664385e-06,
|
|
"loss": 1.265946388244629,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 1.5252100840336134,
|
|
"grad_norm": 39.03800117968252,
|
|
"learning_rate": 5.736864098683595e-06,
|
|
"loss": 1.2473053932189941,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 1.5273109243697478,
|
|
"grad_norm": 12.736788173749753,
|
|
"learning_rate": 5.724771287364492e-06,
|
|
"loss": 0.8382349014282227,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 1.5294117647058822,
|
|
"grad_norm": 9.695123568985625,
|
|
"learning_rate": 5.712674143987478e-06,
|
|
"loss": 0.6312862038612366,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 1.5315126050420167,
|
|
"grad_norm": 19.807295601128907,
|
|
"learning_rate": 5.700572740858847e-06,
|
|
"loss": 2.154848575592041,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 1.5336134453781511,
|
|
"grad_norm": 8.348583198098744,
|
|
"learning_rate": 5.688467150310353e-06,
|
|
"loss": 2.034533739089966,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.5357142857142856,
|
|
"grad_norm": 17.36525324904992,
|
|
"learning_rate": 5.67635744469878e-06,
|
|
"loss": 1.1331748962402344,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 1.53781512605042,
|
|
"grad_norm": 16.447167153207392,
|
|
"learning_rate": 5.664243696405509e-06,
|
|
"loss": 2.139069080352783,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 1.5399159663865545,
|
|
"grad_norm": 8.422362967066016,
|
|
"learning_rate": 5.652125977836083e-06,
|
|
"loss": 1.5174198150634766,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 1.542016806722689,
|
|
"grad_norm": 11.885541322376927,
|
|
"learning_rate": 5.640004361419776e-06,
|
|
"loss": 1.4445990324020386,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 1.5441176470588234,
|
|
"grad_norm": 13.004468722411309,
|
|
"learning_rate": 5.627878919609162e-06,
|
|
"loss": 1.3474795818328857,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 1.5462184873949578,
|
|
"grad_norm": 8.116038341885554,
|
|
"learning_rate": 5.615749724879677e-06,
|
|
"loss": 1.4871881008148193,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 1.5483193277310925,
|
|
"grad_norm": 9.971232266174457,
|
|
"learning_rate": 5.603616849729191e-06,
|
|
"loss": 1.308741569519043,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 1.550420168067227,
|
|
"grad_norm": 9.853281920667216,
|
|
"learning_rate": 5.591480366677571e-06,
|
|
"loss": 1.712050199508667,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 1.5525210084033614,
|
|
"grad_norm": 10.993711611399497,
|
|
"learning_rate": 5.579340348266251e-06,
|
|
"loss": 1.5636662244796753,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 1.5546218487394958,
|
|
"grad_norm": 9.673522828347147,
|
|
"learning_rate": 5.5671968670577935e-06,
|
|
"loss": 2.132948160171509,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.5567226890756303,
|
|
"grad_norm": 8.375544989800046,
|
|
"learning_rate": 5.55504999563546e-06,
|
|
"loss": 1.3193635940551758,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 1.5588235294117647,
|
|
"grad_norm": 17.452697703036844,
|
|
"learning_rate": 5.542899806602776e-06,
|
|
"loss": 2.288175582885742,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 1.5609243697478992,
|
|
"grad_norm": 9.46737262414516,
|
|
"learning_rate": 5.530746372583097e-06,
|
|
"loss": 0.9925522804260254,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 1.5630252100840336,
|
|
"grad_norm": 11.56785515084437,
|
|
"learning_rate": 5.518589766219173e-06,
|
|
"loss": 1.0975109338760376,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 1.565126050420168,
|
|
"grad_norm": 7.691252772883318,
|
|
"learning_rate": 5.506430060172714e-06,
|
|
"loss": 1.5021933317184448,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 1.5672268907563025,
|
|
"grad_norm": 18.209072269714934,
|
|
"learning_rate": 5.494267327123965e-06,
|
|
"loss": 1.3946982622146606,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 1.569327731092437,
|
|
"grad_norm": 8.665816006385743,
|
|
"learning_rate": 5.482101639771255e-06,
|
|
"loss": 1.1381559371948242,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 8.44106615949262,
|
|
"learning_rate": 5.469933070830574e-06,
|
|
"loss": 1.6340922117233276,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 1.5735294117647058,
|
|
"grad_norm": 15.525808686682517,
|
|
"learning_rate": 5.457761693035139e-06,
|
|
"loss": 1.3076329231262207,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 1.5756302521008403,
|
|
"grad_norm": 17.23937998928761,
|
|
"learning_rate": 5.44558757913495e-06,
|
|
"loss": 1.4544854164123535,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.5777310924369747,
|
|
"grad_norm": 12.32276892072033,
|
|
"learning_rate": 5.433410801896366e-06,
|
|
"loss": 1.254534363746643,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 1.5798319327731094,
|
|
"grad_norm": 15.375685102500293,
|
|
"learning_rate": 5.4212314341016645e-06,
|
|
"loss": 1.6915278434753418,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 1.5819327731092439,
|
|
"grad_norm": 11.585921032972996,
|
|
"learning_rate": 5.409049548548604e-06,
|
|
"loss": 1.9941121339797974,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 1.5840336134453783,
|
|
"grad_norm": 20.541716513837855,
|
|
"learning_rate": 5.396865218049995e-06,
|
|
"loss": 1.8611130714416504,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 1.5861344537815127,
|
|
"grad_norm": 8.615776619323707,
|
|
"learning_rate": 5.38467851543326e-06,
|
|
"loss": 1.5740795135498047,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 1.5882352941176472,
|
|
"grad_norm": 11.309502746225869,
|
|
"learning_rate": 5.3724895135400015e-06,
|
|
"loss": 2.713351249694824,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 1.5903361344537816,
|
|
"grad_norm": 10.253542895381939,
|
|
"learning_rate": 5.360298285225564e-06,
|
|
"loss": 1.3163414001464844,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 1.592436974789916,
|
|
"grad_norm": 9.938347580214066,
|
|
"learning_rate": 5.3481049033586e-06,
|
|
"loss": 1.2127149105072021,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 1.5945378151260505,
|
|
"grad_norm": 8.342953472089572,
|
|
"learning_rate": 5.335909440820635e-06,
|
|
"loss": 1.7897974252700806,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 1.596638655462185,
|
|
"grad_norm": 15.069858504573101,
|
|
"learning_rate": 5.323711970505627e-06,
|
|
"loss": 1.1387288570404053,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.5987394957983194,
|
|
"grad_norm": 11.085133444710992,
|
|
"learning_rate": 5.311512565319542e-06,
|
|
"loss": 1.3364837169647217,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 1.6008403361344539,
|
|
"grad_norm": 32.930598343392575,
|
|
"learning_rate": 5.299311298179904e-06,
|
|
"loss": 1.5517654418945312,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 1.6029411764705883,
|
|
"grad_norm": 8.444149682468657,
|
|
"learning_rate": 5.287108242015371e-06,
|
|
"loss": 1.3167724609375,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 1.6050420168067228,
|
|
"grad_norm": 11.755149351980176,
|
|
"learning_rate": 5.27490346976529e-06,
|
|
"loss": 0.9891781210899353,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 1.6071428571428572,
|
|
"grad_norm": 7.559294905407309,
|
|
"learning_rate": 5.2626970543792685e-06,
|
|
"loss": 1.4272327423095703,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 1.6092436974789917,
|
|
"grad_norm": 15.95519407006552,
|
|
"learning_rate": 5.250489068816734e-06,
|
|
"loss": 1.5374692678451538,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 1.611344537815126,
|
|
"grad_norm": 11.13469591726469,
|
|
"learning_rate": 5.238279586046499e-06,
|
|
"loss": 2.130378484725952,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 1.6134453781512605,
|
|
"grad_norm": 7.655252597687492,
|
|
"learning_rate": 5.226068679046327e-06,
|
|
"loss": 1.0989816188812256,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 1.615546218487395,
|
|
"grad_norm": 13.788475789937872,
|
|
"learning_rate": 5.21385642080249e-06,
|
|
"loss": 1.4945666790008545,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 1.6176470588235294,
|
|
"grad_norm": 10.474320077857882,
|
|
"learning_rate": 5.201642884309341e-06,
|
|
"loss": 2.0057296752929688,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.6197478991596639,
|
|
"grad_norm": 15.324254986466453,
|
|
"learning_rate": 5.189428142568872e-06,
|
|
"loss": 1.1791839599609375,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 1.6218487394957983,
|
|
"grad_norm": 9.947837053439045,
|
|
"learning_rate": 5.177212268590277e-06,
|
|
"loss": 1.836449384689331,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 1.6239495798319328,
|
|
"grad_norm": 13.279110268267903,
|
|
"learning_rate": 5.16499533538952e-06,
|
|
"loss": 1.711057424545288,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 1.6260504201680672,
|
|
"grad_norm": 8.331904700424623,
|
|
"learning_rate": 5.152777415988894e-06,
|
|
"loss": 1.5274529457092285,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 1.6281512605042017,
|
|
"grad_norm": 8.55624828912363,
|
|
"learning_rate": 5.140558583416591e-06,
|
|
"loss": 1.687756061553955,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 1.6302521008403361,
|
|
"grad_norm": 13.622302699665928,
|
|
"learning_rate": 5.128338910706254e-06,
|
|
"loss": 1.857285976409912,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 1.6323529411764706,
|
|
"grad_norm": 10.381639920961357,
|
|
"learning_rate": 5.1161184708965525e-06,
|
|
"loss": 2.2893779277801514,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 1.634453781512605,
|
|
"grad_norm": 13.136015822018155,
|
|
"learning_rate": 5.103897337030742e-06,
|
|
"loss": 1.299177646636963,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 1.6365546218487395,
|
|
"grad_norm": 12.717300364122215,
|
|
"learning_rate": 5.091675582156224e-06,
|
|
"loss": 1.156067132949829,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 1.638655462184874,
|
|
"grad_norm": 12.064277910399447,
|
|
"learning_rate": 5.07945327932411e-06,
|
|
"loss": 1.6888867616653442,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.6407563025210083,
|
|
"grad_norm": 18.58505757321674,
|
|
"learning_rate": 5.067230501588792e-06,
|
|
"loss": 2.480485677719116,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 1.6428571428571428,
|
|
"grad_norm": 10.797297449293279,
|
|
"learning_rate": 5.055007322007497e-06,
|
|
"loss": 1.1827846765518188,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 1.6449579831932772,
|
|
"grad_norm": 8.451797960661173,
|
|
"learning_rate": 5.0427838136398545e-06,
|
|
"loss": 0.9974920749664307,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 1.6470588235294117,
|
|
"grad_norm": 9.35430238275204,
|
|
"learning_rate": 5.0305600495474586e-06,
|
|
"loss": 1.3341560363769531,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 1.6491596638655461,
|
|
"grad_norm": 12.853381185509344,
|
|
"learning_rate": 5.018336102793433e-06,
|
|
"loss": 1.7801398038864136,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 1.6512605042016806,
|
|
"grad_norm": 9.421435792652487,
|
|
"learning_rate": 5.006112046441993e-06,
|
|
"loss": 1.5409959554672241,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 1.653361344537815,
|
|
"grad_norm": 17.569411033601973,
|
|
"learning_rate": 4.993887953558008e-06,
|
|
"loss": 1.7089118957519531,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 1.6554621848739495,
|
|
"grad_norm": 7.577368880962854,
|
|
"learning_rate": 4.981663897206568e-06,
|
|
"loss": 1.5989807844161987,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 1.657563025210084,
|
|
"grad_norm": 8.077886545894733,
|
|
"learning_rate": 4.969439950452543e-06,
|
|
"loss": 1.4693567752838135,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 1.6596638655462184,
|
|
"grad_norm": 9.785283960809164,
|
|
"learning_rate": 4.957216186360147e-06,
|
|
"loss": 1.7319889068603516,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 1.6617647058823528,
|
|
"grad_norm": 9.333867287657988,
|
|
"learning_rate": 4.944992677992505e-06,
|
|
"loss": 1.38368558883667,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 1.6638655462184873,
|
|
"grad_norm": 12.124257442308217,
|
|
"learning_rate": 4.932769498411209e-06,
|
|
"loss": 1.9276368618011475,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 1.6659663865546217,
|
|
"grad_norm": 11.917503250417354,
|
|
"learning_rate": 4.9205467206758914e-06,
|
|
"loss": 1.5189965963363647,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 1.6680672268907561,
|
|
"grad_norm": 10.697092148525952,
|
|
"learning_rate": 4.908324417843779e-06,
|
|
"loss": 1.8080897331237793,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 1.6701680672268906,
|
|
"grad_norm": 13.667659355410398,
|
|
"learning_rate": 4.896102662969259e-06,
|
|
"loss": 1.3283686637878418,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 1.6722689075630253,
|
|
"grad_norm": 10.250252779637346,
|
|
"learning_rate": 4.883881529103448e-06,
|
|
"loss": 1.7254778146743774,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 1.6743697478991597,
|
|
"grad_norm": 23.09605024251299,
|
|
"learning_rate": 4.8716610892937486e-06,
|
|
"loss": 1.882294774055481,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 1.6764705882352942,
|
|
"grad_norm": 9.75260822435192,
|
|
"learning_rate": 4.859441416583412e-06,
|
|
"loss": 1.955444574356079,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 1.6785714285714286,
|
|
"grad_norm": 9.214097989478232,
|
|
"learning_rate": 4.847222584011107e-06,
|
|
"loss": 1.3442355394363403,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 1.680672268907563,
|
|
"grad_norm": 7.0621430973608295,
|
|
"learning_rate": 4.8350046646104815e-06,
|
|
"loss": 1.3532618284225464,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.6827731092436975,
|
|
"grad_norm": 9.43032823639271,
|
|
"learning_rate": 4.8227877314097245e-06,
|
|
"loss": 1.8527226448059082,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 1.684873949579832,
|
|
"grad_norm": 42.238546118436666,
|
|
"learning_rate": 4.81057185743113e-06,
|
|
"loss": 2.1312098503112793,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 1.6869747899159664,
|
|
"grad_norm": 19.238236915867475,
|
|
"learning_rate": 4.798357115690661e-06,
|
|
"loss": 1.3131635189056396,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 1.6890756302521008,
|
|
"grad_norm": 11.839412971623531,
|
|
"learning_rate": 4.7861435791975124e-06,
|
|
"loss": 1.057523488998413,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 1.6911764705882353,
|
|
"grad_norm": 16.330203992434377,
|
|
"learning_rate": 4.7739313209536755e-06,
|
|
"loss": 1.510682225227356,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 1.6932773109243697,
|
|
"grad_norm": 9.476255130895225,
|
|
"learning_rate": 4.761720413953503e-06,
|
|
"loss": 2.0877933502197266,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 1.6953781512605042,
|
|
"grad_norm": 18.00296013944893,
|
|
"learning_rate": 4.7495109311832665e-06,
|
|
"loss": 0.9936963319778442,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 1.6974789915966386,
|
|
"grad_norm": 12.213588808955969,
|
|
"learning_rate": 4.737302945620732e-06,
|
|
"loss": 1.4148988723754883,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 1.699579831932773,
|
|
"grad_norm": 16.652878791868638,
|
|
"learning_rate": 4.72509653023471e-06,
|
|
"loss": 1.5457355976104736,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 1.7016806722689075,
|
|
"grad_norm": 12.137844142300786,
|
|
"learning_rate": 4.712891757984629e-06,
|
|
"loss": 1.1069682836532593,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 1.7037815126050422,
|
|
"grad_norm": 10.383790506526612,
|
|
"learning_rate": 4.700688701820096e-06,
|
|
"loss": 2.270923376083374,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 1.7058823529411766,
|
|
"grad_norm": 9.870143573450948,
|
|
"learning_rate": 4.688487434680459e-06,
|
|
"loss": 2.1212430000305176,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 1.707983193277311,
|
|
"grad_norm": 7.378583815892385,
|
|
"learning_rate": 4.6762880294943734e-06,
|
|
"loss": 1.340724229812622,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 1.7100840336134455,
|
|
"grad_norm": 10.60633944294548,
|
|
"learning_rate": 4.664090559179367e-06,
|
|
"loss": 1.1250860691070557,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 1.71218487394958,
|
|
"grad_norm": 10.395256226604776,
|
|
"learning_rate": 4.651895096641402e-06,
|
|
"loss": 1.3906278610229492,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 1.7142857142857144,
|
|
"grad_norm": 14.563103162972197,
|
|
"learning_rate": 4.639701714774439e-06,
|
|
"loss": 1.5373984575271606,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 1.7163865546218489,
|
|
"grad_norm": 11.845585553697056,
|
|
"learning_rate": 4.627510486459999e-06,
|
|
"loss": 1.1511554718017578,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 1.7184873949579833,
|
|
"grad_norm": 12.016172591706953,
|
|
"learning_rate": 4.615321484566741e-06,
|
|
"loss": 1.0511482954025269,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 1.7205882352941178,
|
|
"grad_norm": 24.328922920047308,
|
|
"learning_rate": 4.603134781950007e-06,
|
|
"loss": 1.6539651155471802,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 1.7226890756302522,
|
|
"grad_norm": 10.354684370723726,
|
|
"learning_rate": 4.590950451451397e-06,
|
|
"loss": 1.7340842485427856,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.7247899159663866,
|
|
"grad_norm": 9.128876197606015,
|
|
"learning_rate": 4.578768565898337e-06,
|
|
"loss": 1.9771497249603271,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 1.726890756302521,
|
|
"grad_norm": 13.12308018791223,
|
|
"learning_rate": 4.566589198103635e-06,
|
|
"loss": 1.6702903509140015,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 1.7289915966386555,
|
|
"grad_norm": 14.032448855066312,
|
|
"learning_rate": 4.554412420865052e-06,
|
|
"loss": 1.2594914436340332,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 1.73109243697479,
|
|
"grad_norm": 11.631886990165667,
|
|
"learning_rate": 4.542238306964863e-06,
|
|
"loss": 1.2319787740707397,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 1.7331932773109244,
|
|
"grad_norm": 12.012430999144566,
|
|
"learning_rate": 4.530066929169427e-06,
|
|
"loss": 1.631975769996643,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 1.7352941176470589,
|
|
"grad_norm": 14.157695450219515,
|
|
"learning_rate": 4.5178983602287476e-06,
|
|
"loss": 1.4831879138946533,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 1.7373949579831933,
|
|
"grad_norm": 12.493623072525319,
|
|
"learning_rate": 4.505732672876037e-06,
|
|
"loss": 1.225109338760376,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 1.7394957983193278,
|
|
"grad_norm": 30.459517703838213,
|
|
"learning_rate": 4.493569939827288e-06,
|
|
"loss": 1.6191500425338745,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 1.7415966386554622,
|
|
"grad_norm": 10.531191971687601,
|
|
"learning_rate": 4.48141023378083e-06,
|
|
"loss": 1.562519907951355,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 1.7436974789915967,
|
|
"grad_norm": 20.955204277046732,
|
|
"learning_rate": 4.4692536274169055e-06,
|
|
"loss": 1.5889461040496826,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.745798319327731,
|
|
"grad_norm": 16.275459231962,
|
|
"learning_rate": 4.457100193397226e-06,
|
|
"loss": 0.8582566976547241,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 1.7478991596638656,
|
|
"grad_norm": 8.59181477774897,
|
|
"learning_rate": 4.444950004364542e-06,
|
|
"loss": 1.2409437894821167,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"grad_norm": 11.280859647806443,
|
|
"learning_rate": 4.432803132942208e-06,
|
|
"loss": 1.2449380159378052,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 1.7521008403361344,
|
|
"grad_norm": 13.088404793322951,
|
|
"learning_rate": 4.420659651733751e-06,
|
|
"loss": 1.2676522731781006,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 1.754201680672269,
|
|
"grad_norm": 10.60776961423833,
|
|
"learning_rate": 4.40851963332243e-06,
|
|
"loss": 1.5941420793533325,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 1.7563025210084033,
|
|
"grad_norm": 17.131655688023486,
|
|
"learning_rate": 4.396383150270811e-06,
|
|
"loss": 1.1451562643051147,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 1.7584033613445378,
|
|
"grad_norm": 16.683478300158875,
|
|
"learning_rate": 4.384250275120325e-06,
|
|
"loss": 1.4305951595306396,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 1.7605042016806722,
|
|
"grad_norm": 23.521629142849456,
|
|
"learning_rate": 4.372121080390841e-06,
|
|
"loss": 1.9824583530426025,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 1.7626050420168067,
|
|
"grad_norm": 12.354999677804352,
|
|
"learning_rate": 4.359995638580226e-06,
|
|
"loss": 1.2548645734786987,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 11.031648245105625,
|
|
"learning_rate": 4.34787402216392e-06,
|
|
"loss": 1.8208611011505127,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.7668067226890756,
|
|
"grad_norm": 12.87350659067892,
|
|
"learning_rate": 4.335756303594493e-06,
|
|
"loss": 1.3555166721343994,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 1.76890756302521,
|
|
"grad_norm": 11.399790863860508,
|
|
"learning_rate": 4.323642555301222e-06,
|
|
"loss": 0.9843342900276184,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 1.7710084033613445,
|
|
"grad_norm": 9.734229808167726,
|
|
"learning_rate": 4.311532849689649e-06,
|
|
"loss": 1.5203514099121094,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 1.773109243697479,
|
|
"grad_norm": 12.62619531995832,
|
|
"learning_rate": 4.299427259141155e-06,
|
|
"loss": 2.226682662963867,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 1.7752100840336134,
|
|
"grad_norm": 9.352423304681453,
|
|
"learning_rate": 4.2873258560125244e-06,
|
|
"loss": 1.4532074928283691,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 1.7773109243697478,
|
|
"grad_norm": 13.614475982132188,
|
|
"learning_rate": 4.275228712635511e-06,
|
|
"loss": 0.939800500869751,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 1.7794117647058822,
|
|
"grad_norm": 7.898241318034454,
|
|
"learning_rate": 4.263135901316406e-06,
|
|
"loss": 0.9362924098968506,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 1.7815126050420167,
|
|
"grad_norm": 12.878302238594825,
|
|
"learning_rate": 4.251047494335616e-06,
|
|
"loss": 1.4257563352584839,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 1.7836134453781511,
|
|
"grad_norm": 11.607398907584903,
|
|
"learning_rate": 4.238963563947212e-06,
|
|
"loss": 1.393942952156067,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 1.7857142857142856,
|
|
"grad_norm": 9.414377919796664,
|
|
"learning_rate": 4.226884182378513e-06,
|
|
"loss": 1.588603138923645,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.78781512605042,
|
|
"grad_norm": 9.24807113557597,
|
|
"learning_rate": 4.2148094218296485e-06,
|
|
"loss": 1.198427677154541,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 1.7899159663865545,
|
|
"grad_norm": 9.93269092312877,
|
|
"learning_rate": 4.202739354473127e-06,
|
|
"loss": 1.0912418365478516,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 1.792016806722689,
|
|
"grad_norm": 9.934910320433355,
|
|
"learning_rate": 4.190674052453405e-06,
|
|
"loss": 1.8104877471923828,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 1.7941176470588234,
|
|
"grad_norm": 9.124793784485341,
|
|
"learning_rate": 4.178613587886455e-06,
|
|
"loss": 1.337807297706604,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 1.7962184873949578,
|
|
"grad_norm": 12.887138564407095,
|
|
"learning_rate": 4.166558032859339e-06,
|
|
"loss": 1.0441133975982666,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 1.7983193277310925,
|
|
"grad_norm": 13.183784075535518,
|
|
"learning_rate": 4.154507459429769e-06,
|
|
"loss": 1.8002381324768066,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 1.800420168067227,
|
|
"grad_norm": 17.636030537350624,
|
|
"learning_rate": 4.142461939625685e-06,
|
|
"loss": 1.7534747123718262,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 1.8025210084033614,
|
|
"grad_norm": 13.439089280649503,
|
|
"learning_rate": 4.13042154544482e-06,
|
|
"loss": 2.5967888832092285,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 1.8046218487394958,
|
|
"grad_norm": 11.531721271683752,
|
|
"learning_rate": 4.1183863488542686e-06,
|
|
"loss": 0.9714012145996094,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 1.8067226890756303,
|
|
"grad_norm": 39.93239963926744,
|
|
"learning_rate": 4.106356421790062e-06,
|
|
"loss": 2.0358502864837646,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.8088235294117647,
|
|
"grad_norm": 13.143824988546921,
|
|
"learning_rate": 4.094331836156732e-06,
|
|
"loss": 1.2078362703323364,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 1.8109243697478992,
|
|
"grad_norm": 17.216726387192157,
|
|
"learning_rate": 4.082312663826886e-06,
|
|
"loss": 1.3551952838897705,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 1.8130252100840336,
|
|
"grad_norm": 9.960021372460178,
|
|
"learning_rate": 4.070298976640772e-06,
|
|
"loss": 1.7473708391189575,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 1.815126050420168,
|
|
"grad_norm": 8.856051876807816,
|
|
"learning_rate": 4.058290846405856e-06,
|
|
"loss": 1.1888244152069092,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 1.8172268907563025,
|
|
"grad_norm": 16.353016649836196,
|
|
"learning_rate": 4.046288344896388e-06,
|
|
"loss": 1.7867594957351685,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 1.819327731092437,
|
|
"grad_norm": 12.062167246659023,
|
|
"learning_rate": 4.034291543852973e-06,
|
|
"loss": 1.2903845310211182,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 1.8214285714285714,
|
|
"grad_norm": 12.560697459985716,
|
|
"learning_rate": 4.022300514982146e-06,
|
|
"loss": 1.4051203727722168,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 1.8235294117647058,
|
|
"grad_norm": 7.497031379547495,
|
|
"learning_rate": 4.010315329955941e-06,
|
|
"loss": 1.7378381490707397,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 1.8256302521008403,
|
|
"grad_norm": 13.858534174862163,
|
|
"learning_rate": 3.998336060411459e-06,
|
|
"loss": 1.1623207330703735,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 1.8277310924369747,
|
|
"grad_norm": 16.260048868681064,
|
|
"learning_rate": 3.986362777950448e-06,
|
|
"loss": 1.9922326803207397,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.8298319327731094,
|
|
"grad_norm": 10.526911105706054,
|
|
"learning_rate": 3.9743955541388645e-06,
|
|
"loss": 1.8183355331420898,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 1.8319327731092439,
|
|
"grad_norm": 30.127353686918507,
|
|
"learning_rate": 3.962434460506459e-06,
|
|
"loss": 1.20865797996521,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 1.8340336134453783,
|
|
"grad_norm": 9.693811930511602,
|
|
"learning_rate": 3.950479568546336e-06,
|
|
"loss": 1.2787063121795654,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 1.8361344537815127,
|
|
"grad_norm": 18.05960154864148,
|
|
"learning_rate": 3.938530949714533e-06,
|
|
"loss": 2.1469886302948,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 1.8382352941176472,
|
|
"grad_norm": 18.30460348258425,
|
|
"learning_rate": 3.926588675429591e-06,
|
|
"loss": 2.5014071464538574,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 1.8403361344537816,
|
|
"grad_norm": 9.407437407393076,
|
|
"learning_rate": 3.914652817072132e-06,
|
|
"loss": 1.2857444286346436,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 1.842436974789916,
|
|
"grad_norm": 16.37890925961833,
|
|
"learning_rate": 3.902723445984425e-06,
|
|
"loss": 0.8846265077590942,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 1.8445378151260505,
|
|
"grad_norm": 23.642202819656244,
|
|
"learning_rate": 3.890800633469968e-06,
|
|
"loss": 3.6164169311523438,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 1.846638655462185,
|
|
"grad_norm": 13.713411145435602,
|
|
"learning_rate": 3.878884450793053e-06,
|
|
"loss": 1.778512954711914,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 1.8487394957983194,
|
|
"grad_norm": 14.191930149580415,
|
|
"learning_rate": 3.866974969178348e-06,
|
|
"loss": 1.2984943389892578,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.8508403361344539,
|
|
"grad_norm": 11.358190736464952,
|
|
"learning_rate": 3.855072259810465e-06,
|
|
"loss": 1.233088493347168,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 1.8529411764705883,
|
|
"grad_norm": 12.19590848916144,
|
|
"learning_rate": 3.8431763938335415e-06,
|
|
"loss": 1.0973716974258423,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 1.8550420168067228,
|
|
"grad_norm": 8.299847504153597,
|
|
"learning_rate": 3.831287442350806e-06,
|
|
"loss": 1.9479036331176758,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 25.32246065502413,
|
|
"learning_rate": 3.819405476424164e-06,
|
|
"loss": 2.3243212699890137,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 1.8592436974789917,
|
|
"grad_norm": 16.82297657925073,
|
|
"learning_rate": 3.8075305670737605e-06,
|
|
"loss": 2.167454242706299,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 1.861344537815126,
|
|
"grad_norm": 21.753642919267325,
|
|
"learning_rate": 3.795662785277568e-06,
|
|
"loss": 1.3896931409835815,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 1.8634453781512605,
|
|
"grad_norm": 11.74483761622084,
|
|
"learning_rate": 3.783802201970953e-06,
|
|
"loss": 1.7062684297561646,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 1.865546218487395,
|
|
"grad_norm": 10.437798829559927,
|
|
"learning_rate": 3.7719488880462596e-06,
|
|
"loss": 1.996096134185791,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 1.8676470588235294,
|
|
"grad_norm": 12.757708819402238,
|
|
"learning_rate": 3.7601029143523767e-06,
|
|
"loss": 0.9396399259567261,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 1.8697478991596639,
|
|
"grad_norm": 9.851656503119592,
|
|
"learning_rate": 3.748264351694324e-06,
|
|
"loss": 1.384545922279358,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.8718487394957983,
|
|
"grad_norm": 9.806551408884758,
|
|
"learning_rate": 3.7364332708328232e-06,
|
|
"loss": 1.298504114151001,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 1.8739495798319328,
|
|
"grad_norm": 10.623737292924032,
|
|
"learning_rate": 3.7246097424838746e-06,
|
|
"loss": 1.395151138305664,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 1.8760504201680672,
|
|
"grad_norm": 10.061693679195699,
|
|
"learning_rate": 3.712793837318338e-06,
|
|
"loss": 0.9280238747596741,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 1.8781512605042017,
|
|
"grad_norm": 15.132177107654861,
|
|
"learning_rate": 3.7009856259615074e-06,
|
|
"loss": 1.3795464038848877,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 1.8802521008403361,
|
|
"grad_norm": 6.163863926370169,
|
|
"learning_rate": 3.689185178992689e-06,
|
|
"loss": 1.3113572597503662,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 1.8823529411764706,
|
|
"grad_norm": 9.130793358710777,
|
|
"learning_rate": 3.677392566944783e-06,
|
|
"loss": 1.580859661102295,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 1.884453781512605,
|
|
"grad_norm": 9.259911678328434,
|
|
"learning_rate": 3.665607860303857e-06,
|
|
"loss": 1.5074641704559326,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 1.8865546218487395,
|
|
"grad_norm": 11.62240320198324,
|
|
"learning_rate": 3.653831129508727e-06,
|
|
"loss": 1.438436508178711,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 1.888655462184874,
|
|
"grad_norm": 21.40632893627489,
|
|
"learning_rate": 3.642062444950537e-06,
|
|
"loss": 2.4116339683532715,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 1.8907563025210083,
|
|
"grad_norm": 11.75786970161833,
|
|
"learning_rate": 3.630301876972337e-06,
|
|
"loss": 1.5296099185943604,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.8928571428571428,
|
|
"grad_norm": 13.176259265738059,
|
|
"learning_rate": 3.618549495868662e-06,
|
|
"loss": 1.7645788192749023,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 1.8949579831932772,
|
|
"grad_norm": 8.946035833702966,
|
|
"learning_rate": 3.606805371885117e-06,
|
|
"loss": 1.528565526008606,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 1.8970588235294117,
|
|
"grad_norm": 9.910748332846175,
|
|
"learning_rate": 3.5950695752179487e-06,
|
|
"loss": 1.5352060794830322,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 1.8991596638655461,
|
|
"grad_norm": 8.872120526454859,
|
|
"learning_rate": 3.5833421760136323e-06,
|
|
"loss": 1.2181806564331055,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 1.9012605042016806,
|
|
"grad_norm": 8.52566041071061,
|
|
"learning_rate": 3.5716232443684486e-06,
|
|
"loss": 1.2715753316879272,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 1.903361344537815,
|
|
"grad_norm": 21.774545687178733,
|
|
"learning_rate": 3.559912850328069e-06,
|
|
"loss": 1.4792617559432983,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 1.9054621848739495,
|
|
"grad_norm": 19.592792336690916,
|
|
"learning_rate": 3.5482110638871325e-06,
|
|
"loss": 1.072256326675415,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 1.907563025210084,
|
|
"grad_norm": 13.065810555435853,
|
|
"learning_rate": 3.5365179549888306e-06,
|
|
"loss": 1.5988600254058838,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 1.9096638655462184,
|
|
"grad_norm": 21.891373834171244,
|
|
"learning_rate": 3.524833593524487e-06,
|
|
"loss": 2.473078489303589,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 1.9117647058823528,
|
|
"grad_norm": 10.756654205561983,
|
|
"learning_rate": 3.513158049333141e-06,
|
|
"loss": 1.8987966775894165,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.9138655462184873,
|
|
"grad_norm": 11.56568429501611,
|
|
"learning_rate": 3.50149139220113e-06,
|
|
"loss": 1.6164718866348267,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 1.9159663865546217,
|
|
"grad_norm": 11.650144499924947,
|
|
"learning_rate": 3.4898336918616726e-06,
|
|
"loss": 1.3376764059066772,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 1.9180672268907561,
|
|
"grad_norm": 10.167435576576402,
|
|
"learning_rate": 3.47818501799445e-06,
|
|
"loss": 1.7546143531799316,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 1.9201680672268906,
|
|
"grad_norm": 16.044159587879186,
|
|
"learning_rate": 3.4665454402251937e-06,
|
|
"loss": 1.4916424751281738,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 1.9222689075630253,
|
|
"grad_norm": 9.485222637010883,
|
|
"learning_rate": 3.4549150281252635e-06,
|
|
"loss": 1.6852712631225586,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 1.9243697478991597,
|
|
"grad_norm": 17.238783509738973,
|
|
"learning_rate": 3.443293851211237e-06,
|
|
"loss": 1.7150108814239502,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 1.9264705882352942,
|
|
"grad_norm": 13.404050851317407,
|
|
"learning_rate": 3.4316819789444893e-06,
|
|
"loss": 1.7211201190948486,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 1.9285714285714286,
|
|
"grad_norm": 12.556393984422412,
|
|
"learning_rate": 3.4200794807307834e-06,
|
|
"loss": 0.7268713712692261,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 1.930672268907563,
|
|
"grad_norm": 18.049874392140797,
|
|
"learning_rate": 3.40848642591985e-06,
|
|
"loss": 1.4612197875976562,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 1.9327731092436975,
|
|
"grad_norm": 15.899805779516837,
|
|
"learning_rate": 3.3969028838049765e-06,
|
|
"loss": 2.3325533866882324,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.934873949579832,
|
|
"grad_norm": 12.244325200092801,
|
|
"learning_rate": 3.3853289236225917e-06,
|
|
"loss": 1.3644397258758545,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 1.9369747899159664,
|
|
"grad_norm": 12.06507650622523,
|
|
"learning_rate": 3.37376461455185e-06,
|
|
"loss": 1.405503273010254,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 1.9390756302521008,
|
|
"grad_norm": 16.537370916012062,
|
|
"learning_rate": 3.362210025714222e-06,
|
|
"loss": 1.1844987869262695,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 1.9411764705882353,
|
|
"grad_norm": 10.411642432611085,
|
|
"learning_rate": 3.350665226173078e-06,
|
|
"loss": 2.294912815093994,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 1.9432773109243697,
|
|
"grad_norm": 7.385187019711569,
|
|
"learning_rate": 3.339130284933276e-06,
|
|
"loss": 1.3060452938079834,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 1.9453781512605042,
|
|
"grad_norm": 11.928015027901997,
|
|
"learning_rate": 3.327605270940751e-06,
|
|
"loss": 1.5017865896224976,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 1.9474789915966386,
|
|
"grad_norm": 23.754968850349012,
|
|
"learning_rate": 3.316090253082101e-06,
|
|
"loss": 1.9816479682922363,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 1.949579831932773,
|
|
"grad_norm": 15.3357371335637,
|
|
"learning_rate": 3.304585300184173e-06,
|
|
"loss": 1.4715440273284912,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 1.9516806722689075,
|
|
"grad_norm": 12.706896195884193,
|
|
"learning_rate": 3.293090481013661e-06,
|
|
"loss": 1.321998953819275,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 1.9537815126050422,
|
|
"grad_norm": 8.254920710769508,
|
|
"learning_rate": 3.28160586427668e-06,
|
|
"loss": 1.1600078344345093,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.9558823529411766,
|
|
"grad_norm": 12.971411985932859,
|
|
"learning_rate": 3.2701315186183692e-06,
|
|
"loss": 1.5981496572494507,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 1.957983193277311,
|
|
"grad_norm": 9.707338087138305,
|
|
"learning_rate": 3.258667512622475e-06,
|
|
"loss": 1.9018357992172241,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 1.9600840336134455,
|
|
"grad_norm": 19.34885988489593,
|
|
"learning_rate": 3.2472139148109416e-06,
|
|
"loss": 1.175397515296936,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 1.96218487394958,
|
|
"grad_norm": 14.470424387669999,
|
|
"learning_rate": 3.2357707936435013e-06,
|
|
"loss": 0.5444597005844116,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 1.9642857142857144,
|
|
"grad_norm": 5.567855687640852,
|
|
"learning_rate": 3.224338217517269e-06,
|
|
"loss": 1.3773345947265625,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 1.9663865546218489,
|
|
"grad_norm": 14.622402074305704,
|
|
"learning_rate": 3.212916254766326e-06,
|
|
"loss": 2.028517007827759,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 1.9684873949579833,
|
|
"grad_norm": 9.79269410710096,
|
|
"learning_rate": 3.20150497366132e-06,
|
|
"loss": 0.9947667121887207,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 1.9705882352941178,
|
|
"grad_norm": 27.334085978635635,
|
|
"learning_rate": 3.190104442409052e-06,
|
|
"loss": 1.7532271146774292,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 1.9726890756302522,
|
|
"grad_norm": 12.841320289359894,
|
|
"learning_rate": 3.1787147291520675e-06,
|
|
"loss": 2.114809036254883,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 1.9747899159663866,
|
|
"grad_norm": 11.483734843753465,
|
|
"learning_rate": 3.1673359019682538e-06,
|
|
"loss": 2.2796754837036133,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.976890756302521,
|
|
"grad_norm": 11.440585157823008,
|
|
"learning_rate": 3.1559680288704297e-06,
|
|
"loss": 1.172208547592163,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 1.9789915966386555,
|
|
"grad_norm": 17.223673892561905,
|
|
"learning_rate": 3.1446111778059405e-06,
|
|
"loss": 1.1454124450683594,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 1.98109243697479,
|
|
"grad_norm": 9.23862037561014,
|
|
"learning_rate": 3.1332654166562494e-06,
|
|
"loss": 1.6078896522521973,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 1.9831932773109244,
|
|
"grad_norm": 8.824526781008897,
|
|
"learning_rate": 3.1219308132365365e-06,
|
|
"loss": 1.2369673252105713,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 1.9852941176470589,
|
|
"grad_norm": 10.697868664174909,
|
|
"learning_rate": 3.110607435295289e-06,
|
|
"loss": 1.075582504272461,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 1.9873949579831933,
|
|
"grad_norm": 10.407621014847015,
|
|
"learning_rate": 3.099295350513898e-06,
|
|
"loss": 0.9495413899421692,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 1.9894957983193278,
|
|
"grad_norm": 12.92768742395414,
|
|
"learning_rate": 3.087994626506254e-06,
|
|
"loss": 1.3577098846435547,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 1.9915966386554622,
|
|
"grad_norm": 15.824553960297969,
|
|
"learning_rate": 3.0767053308183416e-06,
|
|
"loss": 1.229673981666565,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 1.9936974789915967,
|
|
"grad_norm": 10.163559398603317,
|
|
"learning_rate": 3.0654275309278382e-06,
|
|
"loss": 1.3727761507034302,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 1.995798319327731,
|
|
"grad_norm": 24.18617513536731,
|
|
"learning_rate": 3.0541612942437095e-06,
|
|
"loss": 1.2849650382995605,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.9978991596638656,
|
|
"grad_norm": 11.306358671227525,
|
|
"learning_rate": 3.0429066881058036e-06,
|
|
"loss": 1.644538164138794,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 8.518728725517601,
|
|
"learning_rate": 3.031663779784454e-06,
|
|
"loss": 1.627841591835022,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 2.0021008403361344,
|
|
"grad_norm": 7.931423247551538,
|
|
"learning_rate": 3.020432636480074e-06,
|
|
"loss": 0.2781870365142822,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 2.004201680672269,
|
|
"grad_norm": 10.005905501706337,
|
|
"learning_rate": 3.0092133253227563e-06,
|
|
"loss": 0.4595562815666199,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 2.0063025210084033,
|
|
"grad_norm": 11.609856477894333,
|
|
"learning_rate": 2.9980059133718687e-06,
|
|
"loss": 0.5985803604125977,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 2.008403361344538,
|
|
"grad_norm": 9.586217141888886,
|
|
"learning_rate": 2.986810467615659e-06,
|
|
"loss": 0.41152679920196533,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 2.0105042016806722,
|
|
"grad_norm": 7.487148279080336,
|
|
"learning_rate": 2.9756270549708497e-06,
|
|
"loss": 0.2737478017807007,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 2.0126050420168067,
|
|
"grad_norm": 7.91452475392683,
|
|
"learning_rate": 2.9644557422822406e-06,
|
|
"loss": 0.3578256368637085,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 2.014705882352941,
|
|
"grad_norm": 8.457279529419324,
|
|
"learning_rate": 2.9532965963223076e-06,
|
|
"loss": 0.3519413471221924,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 2.0168067226890756,
|
|
"grad_norm": 12.98619129259147,
|
|
"learning_rate": 2.9421496837908036e-06,
|
|
"loss": 0.5450835227966309,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 2.01890756302521,
|
|
"grad_norm": 10.10649833997759,
|
|
"learning_rate": 2.9310150713143637e-06,
|
|
"loss": 0.7118933796882629,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 2.0210084033613445,
|
|
"grad_norm": 14.582945225729798,
|
|
"learning_rate": 2.9198928254461e-06,
|
|
"loss": 0.6735545992851257,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 2.023109243697479,
|
|
"grad_norm": 13.612285721248329,
|
|
"learning_rate": 2.908783012665209e-06,
|
|
"loss": 0.6521182060241699,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 2.0252100840336134,
|
|
"grad_norm": 9.78372244316324,
|
|
"learning_rate": 2.8976856993765766e-06,
|
|
"loss": 0.476604163646698,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 2.027310924369748,
|
|
"grad_norm": 12.44882078007661,
|
|
"learning_rate": 2.8866009519103705e-06,
|
|
"loss": 0.46952176094055176,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 2.0294117647058822,
|
|
"grad_norm": 9.57528793268096,
|
|
"learning_rate": 2.875528836521658e-06,
|
|
"loss": 0.4453829526901245,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 2.0315126050420167,
|
|
"grad_norm": 10.439598731466846,
|
|
"learning_rate": 2.864469419389997e-06,
|
|
"loss": 0.2944750189781189,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 2.033613445378151,
|
|
"grad_norm": 11.844927445149192,
|
|
"learning_rate": 2.8534227666190484e-06,
|
|
"loss": 0.8550271391868591,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 2.0357142857142856,
|
|
"grad_norm": 13.15280870383674,
|
|
"learning_rate": 2.8423889442361797e-06,
|
|
"loss": 0.38192200660705566,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 2.03781512605042,
|
|
"grad_norm": 8.57118977839964,
|
|
"learning_rate": 2.831368018192071e-06,
|
|
"loss": 0.7316254377365112,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 2.0399159663865545,
|
|
"grad_norm": 11.697813667138346,
|
|
"learning_rate": 2.8203600543603116e-06,
|
|
"loss": 0.7615312337875366,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 2.042016806722689,
|
|
"grad_norm": 15.849948895683397,
|
|
"learning_rate": 2.809365118537024e-06,
|
|
"loss": 0.8274880647659302,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 2.0441176470588234,
|
|
"grad_norm": 10.693659421700279,
|
|
"learning_rate": 2.7983832764404517e-06,
|
|
"loss": 0.31469643115997314,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 2.046218487394958,
|
|
"grad_norm": 11.44650988759124,
|
|
"learning_rate": 2.787414593710583e-06,
|
|
"loss": 0.20855772495269775,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 2.0483193277310923,
|
|
"grad_norm": 15.81504400596588,
|
|
"learning_rate": 2.7764591359087415e-06,
|
|
"loss": 0.6759412884712219,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 2.0504201680672267,
|
|
"grad_norm": 10.204964393759596,
|
|
"learning_rate": 2.7655169685172146e-06,
|
|
"loss": 0.4555593430995941,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 2.052521008403361,
|
|
"grad_norm": 11.261244778956014,
|
|
"learning_rate": 2.7545881569388404e-06,
|
|
"loss": 0.22477459907531738,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 2.0546218487394956,
|
|
"grad_norm": 12.246362257874592,
|
|
"learning_rate": 2.7436727664966368e-06,
|
|
"loss": 0.47387319803237915,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 2.05672268907563,
|
|
"grad_norm": 8.866095217298547,
|
|
"learning_rate": 2.7327708624333936e-06,
|
|
"loss": 0.46857523918151855,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 2.0588235294117645,
|
|
"grad_norm": 13.695082205363835,
|
|
"learning_rate": 2.7218825099112966e-06,
|
|
"loss": 0.4427967071533203,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 2.060924369747899,
|
|
"grad_norm": 14.011986042608353,
|
|
"learning_rate": 2.7110077740115315e-06,
|
|
"loss": 1.3617768287658691,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 2.0630252100840334,
|
|
"grad_norm": 11.10530101134504,
|
|
"learning_rate": 2.7001467197338905e-06,
|
|
"loss": 0.8060270547866821,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 2.0651260504201683,
|
|
"grad_norm": 11.494347595831918,
|
|
"learning_rate": 2.6892994119963965e-06,
|
|
"loss": 0.29366880655288696,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 2.0672268907563027,
|
|
"grad_norm": 11.085826364505666,
|
|
"learning_rate": 2.678465915634899e-06,
|
|
"loss": 0.40074852108955383,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 2.069327731092437,
|
|
"grad_norm": 12.536178143522665,
|
|
"learning_rate": 2.667646295402704e-06,
|
|
"loss": 0.4710817337036133,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 2.0714285714285716,
|
|
"grad_norm": 10.305745046176337,
|
|
"learning_rate": 2.656840615970169e-06,
|
|
"loss": 0.37437137961387634,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 2.073529411764706,
|
|
"grad_norm": 19.03565667772653,
|
|
"learning_rate": 2.646048941924333e-06,
|
|
"loss": 0.9739346504211426,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 2.0756302521008405,
|
|
"grad_norm": 14.96080509908609,
|
|
"learning_rate": 2.635271337768517e-06,
|
|
"loss": 0.6326197981834412,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 2.077731092436975,
|
|
"grad_norm": 7.840013094660732,
|
|
"learning_rate": 2.6245078679219503e-06,
|
|
"loss": 0.15397483110427856,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 2.0798319327731094,
|
|
"grad_norm": 14.418579873307118,
|
|
"learning_rate": 2.613758596719373e-06,
|
|
"loss": 0.5905511379241943,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 2.081932773109244,
|
|
"grad_norm": 11.365880088991135,
|
|
"learning_rate": 2.603023588410662e-06,
|
|
"loss": 0.588984489440918,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 2.0840336134453783,
|
|
"grad_norm": 8.467121557746795,
|
|
"learning_rate": 2.5923029071604443e-06,
|
|
"loss": 0.33690521121025085,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 2.0861344537815127,
|
|
"grad_norm": 12.160131630042047,
|
|
"learning_rate": 2.5815966170477065e-06,
|
|
"loss": 0.23294681310653687,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 2.088235294117647,
|
|
"grad_norm": 10.365330226343618,
|
|
"learning_rate": 2.5709047820654236e-06,
|
|
"loss": 0.4404110908508301,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 2.0903361344537816,
|
|
"grad_norm": 11.818988939924239,
|
|
"learning_rate": 2.5602274661201643e-06,
|
|
"loss": 0.37340593338012695,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 2.092436974789916,
|
|
"grad_norm": 8.208877260345274,
|
|
"learning_rate": 2.549564733031722e-06,
|
|
"loss": 0.3671455979347229,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 2.0945378151260505,
|
|
"grad_norm": 7.093067310603152,
|
|
"learning_rate": 2.538916646532718e-06,
|
|
"loss": 0.6218878030776978,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 2.096638655462185,
|
|
"grad_norm": 18.77124675473162,
|
|
"learning_rate": 2.528283270268238e-06,
|
|
"loss": 1.2778301239013672,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 2.0987394957983194,
|
|
"grad_norm": 11.063294944827689,
|
|
"learning_rate": 2.517664667795434e-06,
|
|
"loss": 0.6543454527854919,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 2.100840336134454,
|
|
"grad_norm": 14.644880561077354,
|
|
"learning_rate": 2.5070609025831605e-06,
|
|
"loss": 0.42762574553489685,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 2.1029411764705883,
|
|
"grad_norm": 9.207415458189004,
|
|
"learning_rate": 2.49647203801158e-06,
|
|
"loss": 0.40861833095550537,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 2.1050420168067228,
|
|
"grad_norm": 13.2306061568643,
|
|
"learning_rate": 2.4858981373718006e-06,
|
|
"loss": 0.6941218376159668,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 2.107142857142857,
|
|
"grad_norm": 7.498871262137285,
|
|
"learning_rate": 2.47533926386548e-06,
|
|
"loss": 0.1615523397922516,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 2.1092436974789917,
|
|
"grad_norm": 10.133323758514624,
|
|
"learning_rate": 2.4647954806044633e-06,
|
|
"loss": 0.30699750781059265,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 2.111344537815126,
|
|
"grad_norm": 7.757159399213717,
|
|
"learning_rate": 2.454266850610398e-06,
|
|
"loss": 0.27435654401779175,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 2.1134453781512605,
|
|
"grad_norm": 12.86368889886839,
|
|
"learning_rate": 2.443753436814354e-06,
|
|
"loss": 0.6352673172950745,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 2.115546218487395,
|
|
"grad_norm": 10.035026243076201,
|
|
"learning_rate": 2.433255302056458e-06,
|
|
"loss": 0.3478729724884033,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 2.1176470588235294,
|
|
"grad_norm": 24.70500681984219,
|
|
"learning_rate": 2.4227725090855063e-06,
|
|
"loss": 0.3971726894378662,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 2.119747899159664,
|
|
"grad_norm": 10.231694956778009,
|
|
"learning_rate": 2.412305120558599e-06,
|
|
"loss": 0.9241357445716858,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 2.1218487394957983,
|
|
"grad_norm": 10.754074840458836,
|
|
"learning_rate": 2.40185319904076e-06,
|
|
"loss": 0.2883678674697876,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 2.1239495798319328,
|
|
"grad_norm": 11.61442466478921,
|
|
"learning_rate": 2.391416807004568e-06,
|
|
"loss": 0.39812758564949036,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 2.1260504201680672,
|
|
"grad_norm": 12.063207487307261,
|
|
"learning_rate": 2.3809960068297732e-06,
|
|
"loss": 0.6487483978271484,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 2.1281512605042017,
|
|
"grad_norm": 10.36458033229305,
|
|
"learning_rate": 2.370590860802938e-06,
|
|
"loss": 0.44781216979026794,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 2.130252100840336,
|
|
"grad_norm": 12.86609548868944,
|
|
"learning_rate": 2.3602014311170524e-06,
|
|
"loss": 0.3241298496723175,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 2.1323529411764706,
|
|
"grad_norm": 15.003708633549396,
|
|
"learning_rate": 2.3498277798711725e-06,
|
|
"loss": 0.4608106315135956,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 2.134453781512605,
|
|
"grad_norm": 12.622777494736392,
|
|
"learning_rate": 2.3394699690700395e-06,
|
|
"loss": 0.5967488884925842,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 2.1365546218487395,
|
|
"grad_norm": 12.188684362144896,
|
|
"learning_rate": 2.3291280606237186e-06,
|
|
"loss": 0.4074782729148865,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 2.138655462184874,
|
|
"grad_norm": 8.220918786617895,
|
|
"learning_rate": 2.3188021163472206e-06,
|
|
"loss": 0.354820191860199,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 2.1407563025210083,
|
|
"grad_norm": 11.553371341140592,
|
|
"learning_rate": 2.308492197960141e-06,
|
|
"loss": 0.23287059366703033,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 14.317771847578689,
|
|
"learning_rate": 2.2981983670862796e-06,
|
|
"loss": 1.0973201990127563,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 2.1449579831932772,
|
|
"grad_norm": 15.607772284187238,
|
|
"learning_rate": 2.2879206852532854e-06,
|
|
"loss": 0.4452645778656006,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 2.1470588235294117,
|
|
"grad_norm": 7.00439488336282,
|
|
"learning_rate": 2.2776592138922806e-06,
|
|
"loss": 0.273881733417511,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 2.149159663865546,
|
|
"grad_norm": 7.187420218160357,
|
|
"learning_rate": 2.2674140143374904e-06,
|
|
"loss": 0.20633578300476074,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 2.1512605042016806,
|
|
"grad_norm": 13.420920542298727,
|
|
"learning_rate": 2.2571851478258903e-06,
|
|
"loss": 0.38969674706459045,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 2.153361344537815,
|
|
"grad_norm": 10.472493973829605,
|
|
"learning_rate": 2.2469726754968207e-06,
|
|
"loss": 0.26989856362342834,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 2.1554621848739495,
|
|
"grad_norm": 7.949292792396312,
|
|
"learning_rate": 2.236776658391641e-06,
|
|
"loss": 0.5260115265846252,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 2.157563025210084,
|
|
"grad_norm": 10.053195823647455,
|
|
"learning_rate": 2.2265971574533474e-06,
|
|
"loss": 0.2469472587108612,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 2.1596638655462184,
|
|
"grad_norm": 10.2298360545254,
|
|
"learning_rate": 2.2164342335262244e-06,
|
|
"loss": 0.201723113656044,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 2.161764705882353,
|
|
"grad_norm": 8.12616695246335,
|
|
"learning_rate": 2.2062879473554654e-06,
|
|
"loss": 0.6355183124542236,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 2.1638655462184873,
|
|
"grad_norm": 8.264174037140188,
|
|
"learning_rate": 2.1961583595868253e-06,
|
|
"loss": 0.32272863388061523,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 2.1659663865546217,
|
|
"grad_norm": 7.57735519623744,
|
|
"learning_rate": 2.186045530766244e-06,
|
|
"loss": 0.24386917054653168,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 2.168067226890756,
|
|
"grad_norm": 14.627933253501514,
|
|
"learning_rate": 2.1759495213394965e-06,
|
|
"loss": 0.482686847448349,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 2.1701680672268906,
|
|
"grad_norm": 11.019581985915595,
|
|
"learning_rate": 2.165870391651819e-06,
|
|
"loss": 0.5142661333084106,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 2.172268907563025,
|
|
"grad_norm": 5.2143700025854605,
|
|
"learning_rate": 2.155808201947563e-06,
|
|
"loss": 0.21703539788722992,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 2.1743697478991595,
|
|
"grad_norm": 8.891172137251035,
|
|
"learning_rate": 2.145763012369824e-06,
|
|
"loss": 0.4068147540092468,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 2.176470588235294,
|
|
"grad_norm": 8.535335024751161,
|
|
"learning_rate": 2.1357348829600816e-06,
|
|
"loss": 0.5949288606643677,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 2.1785714285714284,
|
|
"grad_norm": 9.215827064680754,
|
|
"learning_rate": 2.125723873657852e-06,
|
|
"loss": 0.44353166222572327,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 2.180672268907563,
|
|
"grad_norm": 10.942670044267897,
|
|
"learning_rate": 2.115730044300313e-06,
|
|
"loss": 0.4212431013584137,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 2.1827731092436973,
|
|
"grad_norm": 9.670977047855832,
|
|
"learning_rate": 2.105753454621966e-06,
|
|
"loss": 0.37279778718948364,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 2.184873949579832,
|
|
"grad_norm": 15.451697195363522,
|
|
"learning_rate": 2.095794164254259e-06,
|
|
"loss": 0.6137001514434814,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 2.1869747899159666,
|
|
"grad_norm": 9.221135375907481,
|
|
"learning_rate": 2.0858522327252467e-06,
|
|
"loss": 0.20706136524677277,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 2.189075630252101,
|
|
"grad_norm": 7.176753088694497,
|
|
"learning_rate": 2.0759277194592208e-06,
|
|
"loss": 0.3732944130897522,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 2.1911764705882355,
|
|
"grad_norm": 6.567221722351546,
|
|
"learning_rate": 2.06602068377637e-06,
|
|
"loss": 0.11849310249090195,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 2.19327731092437,
|
|
"grad_norm": 8.709699534320997,
|
|
"learning_rate": 2.0561311848924082e-06,
|
|
"loss": 0.35089147090911865,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 2.1953781512605044,
|
|
"grad_norm": 6.892712538403868,
|
|
"learning_rate": 2.0462592819182377e-06,
|
|
"loss": 0.3482816219329834,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 2.197478991596639,
|
|
"grad_norm": 11.886398314518281,
|
|
"learning_rate": 2.0364050338595792e-06,
|
|
"loss": 0.9048193097114563,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 2.1995798319327733,
|
|
"grad_norm": 9.38373013746351,
|
|
"learning_rate": 2.0265684996166345e-06,
|
|
"loss": 0.34331268072128296,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 2.2016806722689077,
|
|
"grad_norm": 13.372941805785942,
|
|
"learning_rate": 2.0167497379837254e-06,
|
|
"loss": 0.35536718368530273,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 2.203781512605042,
|
|
"grad_norm": 7.556671458015662,
|
|
"learning_rate": 2.0069488076489445e-06,
|
|
"loss": 0.20954403281211853,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 2.2058823529411766,
|
|
"grad_norm": 8.35211924521852,
|
|
"learning_rate": 1.997165767193801e-06,
|
|
"loss": 0.5290908813476562,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 2.207983193277311,
|
|
"grad_norm": 10.477600594311985,
|
|
"learning_rate": 1.9874006750928783e-06,
|
|
"loss": 0.44289880990982056,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 2.2100840336134455,
|
|
"grad_norm": 8.191084415042441,
|
|
"learning_rate": 1.97765358971348e-06,
|
|
"loss": 0.48035284876823425,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 2.21218487394958,
|
|
"grad_norm": 14.892166225942573,
|
|
"learning_rate": 1.967924569315275e-06,
|
|
"loss": 0.2514810562133789,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 2.2142857142857144,
|
|
"grad_norm": 9.73259092640212,
|
|
"learning_rate": 1.958213672049964e-06,
|
|
"loss": 0.9599279165267944,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 2.216386554621849,
|
|
"grad_norm": 10.01655023470503,
|
|
"learning_rate": 1.9485209559609148e-06,
|
|
"loss": 0.30860060453414917,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 2.2184873949579833,
|
|
"grad_norm": 21.410996670654146,
|
|
"learning_rate": 1.9388464789828316e-06,
|
|
"loss": 0.7747633457183838,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 2.2205882352941178,
|
|
"grad_norm": 14.431755517939498,
|
|
"learning_rate": 1.9291902989413935e-06,
|
|
"loss": 0.3529064655303955,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 2.222689075630252,
|
|
"grad_norm": 19.684041196466477,
|
|
"learning_rate": 1.9195524735529237e-06,
|
|
"loss": 1.0967960357666016,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 2.2247899159663866,
|
|
"grad_norm": 9.812143417300405,
|
|
"learning_rate": 1.909933060424029e-06,
|
|
"loss": 0.700248122215271,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 2.226890756302521,
|
|
"grad_norm": 10.765309787627796,
|
|
"learning_rate": 1.9003321170512728e-06,
|
|
"loss": 0.9177491068840027,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 2.2289915966386555,
|
|
"grad_norm": 12.811209076397098,
|
|
"learning_rate": 1.890749700820813e-06,
|
|
"loss": 0.543596625328064,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 2.23109243697479,
|
|
"grad_norm": 17.31969106411562,
|
|
"learning_rate": 1.8811858690080764e-06,
|
|
"loss": 0.7324357032775879,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 2.2331932773109244,
|
|
"grad_norm": 9.418477503451474,
|
|
"learning_rate": 1.8716406787774e-06,
|
|
"loss": 0.4075426459312439,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 2.235294117647059,
|
|
"grad_norm": 7.9458980097838605,
|
|
"learning_rate": 1.862114187181705e-06,
|
|
"loss": 0.39563894271850586,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 2.2373949579831933,
|
|
"grad_norm": 8.299260491259234,
|
|
"learning_rate": 1.8526064511621455e-06,
|
|
"loss": 0.37604600191116333,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 2.2394957983193278,
|
|
"grad_norm": 10.435458479716717,
|
|
"learning_rate": 1.843117527547768e-06,
|
|
"loss": 0.6682062745094299,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 2.241596638655462,
|
|
"grad_norm": 8.776734857977067,
|
|
"learning_rate": 1.8336474730551807e-06,
|
|
"loss": 0.19220635294914246,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 2.2436974789915967,
|
|
"grad_norm": 14.521651377727974,
|
|
"learning_rate": 1.8241963442882005e-06,
|
|
"loss": 0.27735865116119385,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 2.245798319327731,
|
|
"grad_norm": 8.651493755796526,
|
|
"learning_rate": 1.8147641977375313e-06,
|
|
"loss": 0.41572022438049316,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 2.2478991596638656,
|
|
"grad_norm": 7.20135853576087,
|
|
"learning_rate": 1.8053510897804105e-06,
|
|
"loss": 0.25049227476119995,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"grad_norm": 8.485627286621954,
|
|
"learning_rate": 1.7959570766802847e-06,
|
|
"loss": 0.17869159579277039,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 2.2521008403361344,
|
|
"grad_norm": 8.831664553556859,
|
|
"learning_rate": 1.786582214586462e-06,
|
|
"loss": 0.2621746361255646,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 2.254201680672269,
|
|
"grad_norm": 13.640791806331189,
|
|
"learning_rate": 1.77722655953379e-06,
|
|
"loss": 0.33446362614631653,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 2.2563025210084033,
|
|
"grad_norm": 5.657158630793571,
|
|
"learning_rate": 1.7678901674423044e-06,
|
|
"loss": 0.17267954349517822,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 2.258403361344538,
|
|
"grad_norm": 11.516922535812704,
|
|
"learning_rate": 1.7585730941169105e-06,
|
|
"loss": 0.5281901955604553,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 2.2605042016806722,
|
|
"grad_norm": 11.298403957574713,
|
|
"learning_rate": 1.7492753952470415e-06,
|
|
"loss": 0.2754780352115631,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 2.2626050420168067,
|
|
"grad_norm": 10.413722402153681,
|
|
"learning_rate": 1.739997126406322e-06,
|
|
"loss": 0.3246016502380371,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 2.264705882352941,
|
|
"grad_norm": 14.097971965363062,
|
|
"learning_rate": 1.7307383430522474e-06,
|
|
"loss": 0.6660511493682861,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 2.2668067226890756,
|
|
"grad_norm": 22.503701517732946,
|
|
"learning_rate": 1.7214991005258386e-06,
|
|
"loss": 1.2165361642837524,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 2.26890756302521,
|
|
"grad_norm": 8.328219817576464,
|
|
"learning_rate": 1.7122794540513265e-06,
|
|
"loss": 0.18396508693695068,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 2.2710084033613445,
|
|
"grad_norm": 7.768308930354123,
|
|
"learning_rate": 1.703079458735805e-06,
|
|
"loss": 0.42018991708755493,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 2.273109243697479,
|
|
"grad_norm": 9.610477928803583,
|
|
"learning_rate": 1.6938991695689184e-06,
|
|
"loss": 0.38192903995513916,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 2.2752100840336134,
|
|
"grad_norm": 9.57071965935329,
|
|
"learning_rate": 1.684738641422517e-06,
|
|
"loss": 0.4953494966030121,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 2.277310924369748,
|
|
"grad_norm": 11.62580762547179,
|
|
"learning_rate": 1.6755979290503437e-06,
|
|
"loss": 0.5324037075042725,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 2.2794117647058822,
|
|
"grad_norm": 9.119930665905265,
|
|
"learning_rate": 1.666477087087694e-06,
|
|
"loss": 0.6618460416793823,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 2.2815126050420167,
|
|
"grad_norm": 12.668770516893803,
|
|
"learning_rate": 1.6573761700511004e-06,
|
|
"loss": 0.29154300689697266,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 2.283613445378151,
|
|
"grad_norm": 10.126878534173718,
|
|
"learning_rate": 1.6482952323379958e-06,
|
|
"loss": 0.39994263648986816,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 8.084921146733947,
|
|
"learning_rate": 1.639234328226399e-06,
|
|
"loss": 0.2049681693315506,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 2.28781512605042,
|
|
"grad_norm": 9.167757841002748,
|
|
"learning_rate": 1.6301935118745826e-06,
|
|
"loss": 0.35848674178123474,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 2.2899159663865545,
|
|
"grad_norm": 12.543365522318467,
|
|
"learning_rate": 1.621172837320754e-06,
|
|
"loss": 0.4794918894767761,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 2.292016806722689,
|
|
"grad_norm": 6.873717233986044,
|
|
"learning_rate": 1.6121723584827259e-06,
|
|
"loss": 0.3671627342700958,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 2.2941176470588234,
|
|
"grad_norm": 9.315544619619539,
|
|
"learning_rate": 1.6031921291576048e-06,
|
|
"loss": 0.25063830614089966,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 2.296218487394958,
|
|
"grad_norm": 11.618408926786485,
|
|
"learning_rate": 1.5942322030214547e-06,
|
|
"loss": 0.7581193447113037,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 2.2983193277310923,
|
|
"grad_norm": 7.9613247000723595,
|
|
"learning_rate": 1.5852926336289926e-06,
|
|
"loss": 0.4217086434364319,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 2.3004201680672267,
|
|
"grad_norm": 10.341036096752598,
|
|
"learning_rate": 1.5763734744132587e-06,
|
|
"loss": 0.5018645524978638,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 2.302521008403361,
|
|
"grad_norm": 14.166467122386207,
|
|
"learning_rate": 1.5674747786852935e-06,
|
|
"loss": 0.5745636224746704,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 2.3046218487394956,
|
|
"grad_norm": 9.77165887856765,
|
|
"learning_rate": 1.5585965996338314e-06,
|
|
"loss": 0.9145222902297974,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 2.30672268907563,
|
|
"grad_norm": 15.937224453039251,
|
|
"learning_rate": 1.5497389903249705e-06,
|
|
"loss": 0.4312666058540344,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 2.3088235294117645,
|
|
"grad_norm": 8.945920679970577,
|
|
"learning_rate": 1.5409020037018652e-06,
|
|
"loss": 0.4121660590171814,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 2.310924369747899,
|
|
"grad_norm": 10.839281933281265,
|
|
"learning_rate": 1.5320856925843997e-06,
|
|
"loss": 0.8646482825279236,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.3130252100840334,
|
|
"grad_norm": 12.806561724880765,
|
|
"learning_rate": 1.5232901096688847e-06,
|
|
"loss": 0.784586489200592,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 2.315126050420168,
|
|
"grad_norm": 10.817682905964707,
|
|
"learning_rate": 1.5145153075277286e-06,
|
|
"loss": 0.9424635171890259,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 2.3172268907563023,
|
|
"grad_norm": 8.922023653272449,
|
|
"learning_rate": 1.505761338609137e-06,
|
|
"loss": 0.28385645151138306,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 2.3193277310924367,
|
|
"grad_norm": 15.30593506620364,
|
|
"learning_rate": 1.4970282552367854e-06,
|
|
"loss": 0.6689031720161438,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 2.3214285714285716,
|
|
"grad_norm": 10.05546946420467,
|
|
"learning_rate": 1.4883161096095189e-06,
|
|
"loss": 0.691364586353302,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 2.323529411764706,
|
|
"grad_norm": 13.976863852979069,
|
|
"learning_rate": 1.4796249538010354e-06,
|
|
"loss": 0.23520073294639587,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 2.3256302521008405,
|
|
"grad_norm": 9.578643377397341,
|
|
"learning_rate": 1.4709548397595674e-06,
|
|
"loss": 0.4271107316017151,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 2.327731092436975,
|
|
"grad_norm": 16.17388877757899,
|
|
"learning_rate": 1.4623058193075852e-06,
|
|
"loss": 0.9280604720115662,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 2.3298319327731094,
|
|
"grad_norm": 13.041308775276805,
|
|
"learning_rate": 1.453677944141474e-06,
|
|
"loss": 0.33376407623291016,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 2.331932773109244,
|
|
"grad_norm": 13.186142451412863,
|
|
"learning_rate": 1.4450712658312356e-06,
|
|
"loss": 0.7442219853401184,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 2.3340336134453783,
|
|
"grad_norm": 10.969810510823187,
|
|
"learning_rate": 1.43648583582017e-06,
|
|
"loss": 1.27920663356781,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 2.3361344537815127,
|
|
"grad_norm": 22.653518753891586,
|
|
"learning_rate": 1.4279217054245793e-06,
|
|
"loss": 0.6456579566001892,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 2.338235294117647,
|
|
"grad_norm": 13.638307761366974,
|
|
"learning_rate": 1.4193789258334485e-06,
|
|
"loss": 1.1350394487380981,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 2.3403361344537816,
|
|
"grad_norm": 10.59397199917471,
|
|
"learning_rate": 1.4108575481081522e-06,
|
|
"loss": 0.5290108919143677,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 2.342436974789916,
|
|
"grad_norm": 9.100247445169298,
|
|
"learning_rate": 1.4023576231821362e-06,
|
|
"loss": 0.2833002209663391,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 2.3445378151260505,
|
|
"grad_norm": 18.49442431345445,
|
|
"learning_rate": 1.3938792018606278e-06,
|
|
"loss": 0.37826409935951233,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 2.346638655462185,
|
|
"grad_norm": 12.477810112402349,
|
|
"learning_rate": 1.3854223348203171e-06,
|
|
"loss": 0.3945717215538025,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 2.3487394957983194,
|
|
"grad_norm": 8.789544191123422,
|
|
"learning_rate": 1.376987072609065e-06,
|
|
"loss": 0.31352269649505615,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 2.350840336134454,
|
|
"grad_norm": 14.219313270123468,
|
|
"learning_rate": 1.368573465645599e-06,
|
|
"loss": 0.8024647235870361,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 10.007349065084831,
|
|
"learning_rate": 1.360181564219204e-06,
|
|
"loss": 0.7791054248809814,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 2.3550420168067228,
|
|
"grad_norm": 12.213364584526106,
|
|
"learning_rate": 1.351811418489436e-06,
|
|
"loss": 0.37381619215011597,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 2.357142857142857,
|
|
"grad_norm": 10.713765677783302,
|
|
"learning_rate": 1.3434630784858067e-06,
|
|
"loss": 0.3184419870376587,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 2.3592436974789917,
|
|
"grad_norm": 11.470256693930569,
|
|
"learning_rate": 1.335136594107498e-06,
|
|
"loss": 0.3431350886821747,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 2.361344537815126,
|
|
"grad_norm": 9.119684880351647,
|
|
"learning_rate": 1.3268320151230518e-06,
|
|
"loss": 0.4296434819698334,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 2.3634453781512605,
|
|
"grad_norm": 10.866853294417046,
|
|
"learning_rate": 1.3185493911700854e-06,
|
|
"loss": 0.48791950941085815,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 2.365546218487395,
|
|
"grad_norm": 9.540925370722046,
|
|
"learning_rate": 1.3102887717549812e-06,
|
|
"loss": 0.29711413383483887,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 2.3676470588235294,
|
|
"grad_norm": 11.627989144711366,
|
|
"learning_rate": 1.302050206252602e-06,
|
|
"loss": 0.39902636408805847,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 2.369747899159664,
|
|
"grad_norm": 6.365770038684127,
|
|
"learning_rate": 1.2938337439059868e-06,
|
|
"loss": 0.2864948511123657,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 2.3718487394957983,
|
|
"grad_norm": 12.606248234313094,
|
|
"learning_rate": 1.2856394338260691e-06,
|
|
"loss": 0.42151930928230286,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 2.3739495798319328,
|
|
"grad_norm": 8.544922775672411,
|
|
"learning_rate": 1.2774673249913656e-06,
|
|
"loss": 0.330949604511261,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 2.3760504201680672,
|
|
"grad_norm": 12.681787759512487,
|
|
"learning_rate": 1.2693174662477003e-06,
|
|
"loss": 0.832221508026123,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 2.3781512605042017,
|
|
"grad_norm": 16.15687539830067,
|
|
"learning_rate": 1.2611899063079002e-06,
|
|
"loss": 0.3243201971054077,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 2.380252100840336,
|
|
"grad_norm": 11.42137338593432,
|
|
"learning_rate": 1.253084693751514e-06,
|
|
"loss": 0.4209938049316406,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 2.3823529411764706,
|
|
"grad_norm": 10.49566833203582,
|
|
"learning_rate": 1.245001877024512e-06,
|
|
"loss": 0.1905173659324646,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 2.384453781512605,
|
|
"grad_norm": 9.325292405896798,
|
|
"learning_rate": 1.2369415044390055e-06,
|
|
"loss": 0.31655293703079224,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 2.3865546218487395,
|
|
"grad_norm": 12.150405014710023,
|
|
"learning_rate": 1.228903624172954e-06,
|
|
"loss": 0.2780379354953766,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 2.388655462184874,
|
|
"grad_norm": 7.132176058282011,
|
|
"learning_rate": 1.220888284269874e-06,
|
|
"loss": 0.5738459825515747,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 2.3907563025210083,
|
|
"grad_norm": 9.199984669814489,
|
|
"learning_rate": 1.2128955326385595e-06,
|
|
"loss": 0.4594503343105316,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 2.392857142857143,
|
|
"grad_norm": 246.0490199481034,
|
|
"learning_rate": 1.2049254170527857e-06,
|
|
"loss": 1.6502771377563477,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 2.3949579831932772,
|
|
"grad_norm": 10.645480745934366,
|
|
"learning_rate": 1.196977985151036e-06,
|
|
"loss": 0.7063793540000916,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 2.3970588235294117,
|
|
"grad_norm": 12.466682957005606,
|
|
"learning_rate": 1.1890532844362035e-06,
|
|
"loss": 0.4885460138320923,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 2.399159663865546,
|
|
"grad_norm": 8.222098406246245,
|
|
"learning_rate": 1.1811513622753196e-06,
|
|
"loss": 0.29537534713745117,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 2.4012605042016806,
|
|
"grad_norm": 10.08444013945275,
|
|
"learning_rate": 1.1732722658992597e-06,
|
|
"loss": 0.6734664440155029,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 2.403361344537815,
|
|
"grad_norm": 12.85839873964936,
|
|
"learning_rate": 1.1654160424024718e-06,
|
|
"loss": 0.39790263772010803,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 2.4054621848739495,
|
|
"grad_norm": 19.32478545248631,
|
|
"learning_rate": 1.1575827387426846e-06,
|
|
"loss": 0.2750331163406372,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 2.407563025210084,
|
|
"grad_norm": 9.972826363554564,
|
|
"learning_rate": 1.149772401740637e-06,
|
|
"loss": 0.44170406460762024,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 2.4096638655462184,
|
|
"grad_norm": 7.314481031395291,
|
|
"learning_rate": 1.1419850780797864e-06,
|
|
"loss": 0.19013899564743042,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 2.411764705882353,
|
|
"grad_norm": 7.432016042351664,
|
|
"learning_rate": 1.1342208143060423e-06,
|
|
"loss": 0.4140137732028961,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 2.4138655462184873,
|
|
"grad_norm": 11.414471382112064,
|
|
"learning_rate": 1.1264796568274811e-06,
|
|
"loss": 0.4861386716365814,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 2.4159663865546217,
|
|
"grad_norm": 13.684580354320987,
|
|
"learning_rate": 1.118761651914065e-06,
|
|
"loss": 0.3487178683280945,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 2.418067226890756,
|
|
"grad_norm": 8.714659720362214,
|
|
"learning_rate": 1.1110668456973761e-06,
|
|
"loss": 0.6119335889816284,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 2.4201680672268906,
|
|
"grad_norm": 10.585809423186294,
|
|
"learning_rate": 1.10339528417033e-06,
|
|
"loss": 0.24830467998981476,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 2.422268907563025,
|
|
"grad_norm": 6.6530689382799375,
|
|
"learning_rate": 1.0957470131869102e-06,
|
|
"loss": 0.20413950085639954,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 2.4243697478991595,
|
|
"grad_norm": 11.585229233250407,
|
|
"learning_rate": 1.088122078461884e-06,
|
|
"loss": 0.7759865522384644,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 2.426470588235294,
|
|
"grad_norm": 14.183804098321202,
|
|
"learning_rate": 1.0805205255705403e-06,
|
|
"loss": 0.9713194370269775,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 2.4285714285714284,
|
|
"grad_norm": 11.392423008755229,
|
|
"learning_rate": 1.0729423999484062e-06,
|
|
"loss": 0.3234805464744568,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 2.4306722689075633,
|
|
"grad_norm": 7.504753087219636,
|
|
"learning_rate": 1.0653877468909857e-06,
|
|
"loss": 0.2364063262939453,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 2.4327731092436977,
|
|
"grad_norm": 9.823099282463206,
|
|
"learning_rate": 1.0578566115534794e-06,
|
|
"loss": 0.4705219268798828,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 2.434873949579832,
|
|
"grad_norm": 9.451122750213175,
|
|
"learning_rate": 1.0503490389505244e-06,
|
|
"loss": 0.26277682185173035,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 2.4369747899159666,
|
|
"grad_norm": 9.336273451144258,
|
|
"learning_rate": 1.0428650739559138e-06,
|
|
"loss": 0.13882672786712646,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 2.439075630252101,
|
|
"grad_norm": 7.52407325404656,
|
|
"learning_rate": 1.0354047613023404e-06,
|
|
"loss": 0.5188834071159363,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 2.4411764705882355,
|
|
"grad_norm": 7.853041816369125,
|
|
"learning_rate": 1.0279681455811219e-06,
|
|
"loss": 0.24887529015541077,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 2.44327731092437,
|
|
"grad_norm": 10.755975449643415,
|
|
"learning_rate": 1.0205552712419343e-06,
|
|
"loss": 0.28220975399017334,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 2.4453781512605044,
|
|
"grad_norm": 10.623188982430918,
|
|
"learning_rate": 1.013166182592551e-06,
|
|
"loss": 0.24789491295814514,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 2.447478991596639,
|
|
"grad_norm": 10.265380274150749,
|
|
"learning_rate": 1.0058009237985721e-06,
|
|
"loss": 0.7892224788665771,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 2.4495798319327733,
|
|
"grad_norm": 11.896169947706998,
|
|
"learning_rate": 9.98459538883167e-07,
|
|
"loss": 0.26245754957199097,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 2.4516806722689077,
|
|
"grad_norm": 11.049089295820975,
|
|
"learning_rate": 9.911420717268023e-07,
|
|
"loss": 0.27979156374931335,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 2.453781512605042,
|
|
"grad_norm": 13.265137697114756,
|
|
"learning_rate": 9.838485660669906e-07,
|
|
"loss": 0.7934341430664062,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 2.4558823529411766,
|
|
"grad_norm": 10.018047542365926,
|
|
"learning_rate": 9.765790654980195e-07,
|
|
"loss": 0.45289355516433716,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 2.457983193277311,
|
|
"grad_norm": 13.567130161558774,
|
|
"learning_rate": 9.693336134706988e-07,
|
|
"loss": 0.992337703704834,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 2.4600840336134455,
|
|
"grad_norm": 9.387778498410693,
|
|
"learning_rate": 9.621122532920908e-07,
|
|
"loss": 0.29417842626571655,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 2.46218487394958,
|
|
"grad_norm": 9.114348977338564,
|
|
"learning_rate": 9.549150281252633e-07,
|
|
"loss": 0.5845852494239807,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 2.4642857142857144,
|
|
"grad_norm": 9.288921226395173,
|
|
"learning_rate": 9.477419809890215e-07,
|
|
"loss": 0.22582799196243286,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 2.466386554621849,
|
|
"grad_norm": 13.268912659944744,
|
|
"learning_rate": 9.405931547576591e-07,
|
|
"loss": 0.26232588291168213,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 2.4684873949579833,
|
|
"grad_norm": 9.072509999987034,
|
|
"learning_rate": 9.334685921606946e-07,
|
|
"loss": 0.9084593057632446,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 2.4705882352941178,
|
|
"grad_norm": 11.285010838093699,
|
|
"learning_rate": 9.26368335782622e-07,
|
|
"loss": 0.8386296629905701,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 2.472689075630252,
|
|
"grad_norm": 11.02721308505799,
|
|
"learning_rate": 9.192924280626514e-07,
|
|
"loss": 1.0152020454406738,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 2.4747899159663866,
|
|
"grad_norm": 12.924069581096365,
|
|
"learning_rate": 9.122409112944591e-07,
|
|
"loss": 0.42396751046180725,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 2.476890756302521,
|
|
"grad_norm": 13.79023717049261,
|
|
"learning_rate": 9.052138276259348e-07,
|
|
"loss": 0.3439130485057831,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 2.4789915966386555,
|
|
"grad_norm": 8.00992187627695,
|
|
"learning_rate": 8.982112190589237e-07,
|
|
"loss": 0.21849340200424194,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 2.48109243697479,
|
|
"grad_norm": 17.238112347826142,
|
|
"learning_rate": 8.912331274489855e-07,
|
|
"loss": 1.047693133354187,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 2.4831932773109244,
|
|
"grad_norm": 10.977700782429032,
|
|
"learning_rate": 8.842795945051335e-07,
|
|
"loss": 0.4458342492580414,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 2.485294117647059,
|
|
"grad_norm": 6.2173703827542735,
|
|
"learning_rate": 8.773506617895944e-07,
|
|
"loss": 0.26556795835494995,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 2.4873949579831933,
|
|
"grad_norm": 14.598955541616366,
|
|
"learning_rate": 8.704463707175526e-07,
|
|
"loss": 0.8663069605827332,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 2.4894957983193278,
|
|
"grad_norm": 9.599096123627477,
|
|
"learning_rate": 8.6356676255691e-07,
|
|
"loss": 0.7863715291023254,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 2.491596638655462,
|
|
"grad_norm": 7.756695128139413,
|
|
"learning_rate": 8.567118784280309e-07,
|
|
"loss": 0.2747763395309448,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 2.4936974789915967,
|
|
"grad_norm": 15.296434795066423,
|
|
"learning_rate": 8.498817593035053e-07,
|
|
"loss": 0.22008158266544342,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 2.495798319327731,
|
|
"grad_norm": 42.195018093662426,
|
|
"learning_rate": 8.430764460078938e-07,
|
|
"loss": 0.7790160179138184,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 2.4978991596638656,
|
|
"grad_norm": 14.930505610933327,
|
|
"learning_rate": 8.362959792174941e-07,
|
|
"loss": 0.3692745864391327,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 15.547844843931736,
|
|
"learning_rate": 8.295403994600921e-07,
|
|
"loss": 0.5012900829315186,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 2.5021008403361344,
|
|
"grad_norm": 13.218952734739705,
|
|
"learning_rate": 8.228097471147167e-07,
|
|
"loss": 0.4049416780471802,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 2.504201680672269,
|
|
"grad_norm": 11.42318009744243,
|
|
"learning_rate": 8.161040624114075e-07,
|
|
"loss": 0.14171475172042847,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 2.5063025210084033,
|
|
"grad_norm": 8.26466575159723,
|
|
"learning_rate": 8.094233854309647e-07,
|
|
"loss": 0.32759952545166016,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 2.508403361344538,
|
|
"grad_norm": 13.359323997562882,
|
|
"learning_rate": 8.027677561047176e-07,
|
|
"loss": 0.5382500886917114,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 2.5105042016806722,
|
|
"grad_norm": 9.050399443504134,
|
|
"learning_rate": 7.961372142142776e-07,
|
|
"loss": 0.4815264940261841,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 2.5126050420168067,
|
|
"grad_norm": 8.2857361498368,
|
|
"learning_rate": 7.89531799391311e-07,
|
|
"loss": 0.28123000264167786,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 2.514705882352941,
|
|
"grad_norm": 14.813927596451204,
|
|
"learning_rate": 7.829515511172897e-07,
|
|
"loss": 0.5116557478904724,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 2.5168067226890756,
|
|
"grad_norm": 11.672590724543431,
|
|
"learning_rate": 7.763965087232678e-07,
|
|
"loss": 0.4502016603946686,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 2.51890756302521,
|
|
"grad_norm": 8.420294235923025,
|
|
"learning_rate": 7.698667113896346e-07,
|
|
"loss": 0.34997278451919556,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 2.5210084033613445,
|
|
"grad_norm": 4.856432012218632,
|
|
"learning_rate": 7.633621981458916e-07,
|
|
"loss": 0.15743517875671387,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 2.523109243697479,
|
|
"grad_norm": 10.116924629282346,
|
|
"learning_rate": 7.568830078704092e-07,
|
|
"loss": 0.4513791799545288,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 2.5252100840336134,
|
|
"grad_norm": 10.448219454906289,
|
|
"learning_rate": 7.504291792902024e-07,
|
|
"loss": 0.5203551054000854,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 2.527310924369748,
|
|
"grad_norm": 7.4896542285298,
|
|
"learning_rate": 7.440007509806946e-07,
|
|
"loss": 0.5805743932723999,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 2.5294117647058822,
|
|
"grad_norm": 6.637043733478462,
|
|
"learning_rate": 7.375977613654861e-07,
|
|
"loss": 0.21151217818260193,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 2.5315126050420167,
|
|
"grad_norm": 11.756396934264371,
|
|
"learning_rate": 7.312202487161318e-07,
|
|
"loss": 0.4486454725265503,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 2.533613445378151,
|
|
"grad_norm": 13.216341095384697,
|
|
"learning_rate": 7.248682511519006e-07,
|
|
"loss": 0.8350504040718079,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 2.5357142857142856,
|
|
"grad_norm": 14.368316188442714,
|
|
"learning_rate": 7.18541806639561e-07,
|
|
"loss": 0.37657079100608826,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 2.53781512605042,
|
|
"grad_norm": 10.572863577964558,
|
|
"learning_rate": 7.122409529931412e-07,
|
|
"loss": 0.5544061660766602,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 2.5399159663865545,
|
|
"grad_norm": 13.009489309703797,
|
|
"learning_rate": 7.059657278737136e-07,
|
|
"loss": 0.8755850791931152,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 2.542016806722689,
|
|
"grad_norm": 10.419835233671352,
|
|
"learning_rate": 6.997161687891635e-07,
|
|
"loss": 0.6084367036819458,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 2.5441176470588234,
|
|
"grad_norm": 7.424045502482636,
|
|
"learning_rate": 6.934923130939692e-07,
|
|
"loss": 0.3528558015823364,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 2.546218487394958,
|
|
"grad_norm": 22.05326914016899,
|
|
"learning_rate": 6.872941979889708e-07,
|
|
"loss": 0.3760122060775757,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 2.5483193277310923,
|
|
"grad_norm": 8.437103819513496,
|
|
"learning_rate": 6.811218605211606e-07,
|
|
"loss": 0.3798169195652008,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 2.5504201680672267,
|
|
"grad_norm": 15.105682353848836,
|
|
"learning_rate": 6.749753375834467e-07,
|
|
"loss": 0.20516347885131836,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 2.552521008403361,
|
|
"grad_norm": 15.328640967464176,
|
|
"learning_rate": 6.688546659144479e-07,
|
|
"loss": 0.39129936695098877,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 2.5546218487394956,
|
|
"grad_norm": 18.19539322746799,
|
|
"learning_rate": 6.627598820982595e-07,
|
|
"loss": 0.5815962553024292,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 2.55672268907563,
|
|
"grad_norm": 10.358040499956887,
|
|
"learning_rate": 6.566910225642475e-07,
|
|
"loss": 0.2462518960237503,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 2.5588235294117645,
|
|
"grad_norm": 23.882665351929745,
|
|
"learning_rate": 6.50648123586819e-07,
|
|
"loss": 0.7295534610748291,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 2.560924369747899,
|
|
"grad_norm": 11.419325337575849,
|
|
"learning_rate": 6.446312212852162e-07,
|
|
"loss": 0.4088057577610016,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 2.5630252100840334,
|
|
"grad_norm": 18.506668669014132,
|
|
"learning_rate": 6.386403516232948e-07,
|
|
"loss": 0.6498621106147766,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 2.565126050420168,
|
|
"grad_norm": 12.707186533109224,
|
|
"learning_rate": 6.326755504093063e-07,
|
|
"loss": 0.3554389476776123,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 2.5672268907563023,
|
|
"grad_norm": 12.462287833643975,
|
|
"learning_rate": 6.267368532956919e-07,
|
|
"loss": 1.3259708881378174,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 2.5693277310924367,
|
|
"grad_norm": 8.69258882253335,
|
|
"learning_rate": 6.208242957788613e-07,
|
|
"loss": 0.4336357116699219,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 10.998188149878677,
|
|
"learning_rate": 6.14937913198988e-07,
|
|
"loss": 0.6199144124984741,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 2.5735294117647056,
|
|
"grad_norm": 20.134120954604086,
|
|
"learning_rate": 6.090777407397902e-07,
|
|
"loss": 1.075969934463501,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 2.57563025210084,
|
|
"grad_norm": 9.646318558023589,
|
|
"learning_rate": 6.032438134283286e-07,
|
|
"loss": 0.5996450185775757,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 2.5777310924369745,
|
|
"grad_norm": 10.633484513814087,
|
|
"learning_rate": 5.974361661347889e-07,
|
|
"loss": 0.37859058380126953,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 2.5798319327731094,
|
|
"grad_norm": 18.160538186398977,
|
|
"learning_rate": 5.916548335722822e-07,
|
|
"loss": 0.3595309853553772,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 2.581932773109244,
|
|
"grad_norm": 12.156397479975382,
|
|
"learning_rate": 5.858998502966273e-07,
|
|
"loss": 0.31986016035079956,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 2.5840336134453783,
|
|
"grad_norm": 12.635254524437713,
|
|
"learning_rate": 5.801712507061563e-07,
|
|
"loss": 0.3975721597671509,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 2.5861344537815127,
|
|
"grad_norm": 10.900463191925608,
|
|
"learning_rate": 5.74469069041495e-07,
|
|
"loss": 0.6717185974121094,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 2.588235294117647,
|
|
"grad_norm": 10.60292331277609,
|
|
"learning_rate": 5.687933393853718e-07,
|
|
"loss": 0.6171470880508423,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 2.5903361344537816,
|
|
"grad_norm": 11.817453815932138,
|
|
"learning_rate": 5.631440956624057e-07,
|
|
"loss": 0.47931092977523804,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 2.592436974789916,
|
|
"grad_norm": 14.65524897977516,
|
|
"learning_rate": 5.575213716389039e-07,
|
|
"loss": 0.44013679027557373,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 2.5945378151260505,
|
|
"grad_norm": 14.640686063418055,
|
|
"learning_rate": 5.519252009226639e-07,
|
|
"loss": 0.515785276889801,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 2.596638655462185,
|
|
"grad_norm": 9.51325634200356,
|
|
"learning_rate": 5.463556169627687e-07,
|
|
"loss": 0.3664918541908264,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 2.5987394957983194,
|
|
"grad_norm": 9.591516923545466,
|
|
"learning_rate": 5.408126530493918e-07,
|
|
"loss": 0.3711666762828827,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 2.600840336134454,
|
|
"grad_norm": 7.071071092917334,
|
|
"learning_rate": 5.352963423135893e-07,
|
|
"loss": 0.12698325514793396,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 2.6029411764705883,
|
|
"grad_norm": 8.042424735857201,
|
|
"learning_rate": 5.298067177271144e-07,
|
|
"loss": 0.3730424642562866,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 2.6050420168067228,
|
|
"grad_norm": 9.069780325522164,
|
|
"learning_rate": 5.243438121022077e-07,
|
|
"loss": 0.6243601441383362,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 2.607142857142857,
|
|
"grad_norm": 7.246887997362519,
|
|
"learning_rate": 5.18907658091411e-07,
|
|
"loss": 0.18001016974449158,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 2.6092436974789917,
|
|
"grad_norm": 15.652638965395807,
|
|
"learning_rate": 5.134982881873646e-07,
|
|
"loss": 0.6635949611663818,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 2.611344537815126,
|
|
"grad_norm": 9.642543803196963,
|
|
"learning_rate": 5.081157347226201e-07,
|
|
"loss": 0.4666215777397156,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 2.6134453781512605,
|
|
"grad_norm": 9.416633968819704,
|
|
"learning_rate": 5.027600298694397e-07,
|
|
"loss": 0.1682681143283844,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 2.615546218487395,
|
|
"grad_norm": 14.036854769880513,
|
|
"learning_rate": 4.974312056396113e-07,
|
|
"loss": 0.5077744722366333,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 2.6176470588235294,
|
|
"grad_norm": 11.722652840072532,
|
|
"learning_rate": 4.92129293884252e-07,
|
|
"loss": 0.44359397888183594,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 2.619747899159664,
|
|
"grad_norm": 15.585836072486865,
|
|
"learning_rate": 4.868543262936176e-07,
|
|
"loss": 1.2246967554092407,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 2.6218487394957983,
|
|
"grad_norm": 10.770044484279795,
|
|
"learning_rate": 4.816063343969196e-07,
|
|
"loss": 0.32194000482559204,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 2.6239495798319328,
|
|
"grad_norm": 11.639608924375384,
|
|
"learning_rate": 4.763853495621251e-07,
|
|
"loss": 0.5496278405189514,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 2.6260504201680672,
|
|
"grad_norm": 6.671350027648182,
|
|
"learning_rate": 4.7119140299578424e-07,
|
|
"loss": 0.21257492899894714,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 2.6281512605042017,
|
|
"grad_norm": 12.713785072488509,
|
|
"learning_rate": 4.660245257428297e-07,
|
|
"loss": 0.3104386329650879,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 2.630252100840336,
|
|
"grad_norm": 14.031766333020213,
|
|
"learning_rate": 4.6088474868640045e-07,
|
|
"loss": 0.8334522843360901,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 2.6323529411764706,
|
|
"grad_norm": 9.251230979164895,
|
|
"learning_rate": 4.557721025476508e-07,
|
|
"loss": 0.29882583022117615,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 2.634453781512605,
|
|
"grad_norm": 10.2580288266136,
|
|
"learning_rate": 4.5068661788557345e-07,
|
|
"loss": 0.3209346830844879,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 2.6365546218487395,
|
|
"grad_norm": 5.604118390936418,
|
|
"learning_rate": 4.4562832509680963e-07,
|
|
"loss": 0.15333116054534912,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 2.638655462184874,
|
|
"grad_norm": 19.802213443985696,
|
|
"learning_rate": 4.4059725441547464e-07,
|
|
"loss": 0.48582714796066284,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 2.6407563025210083,
|
|
"grad_norm": 11.514218359185726,
|
|
"learning_rate": 4.355934359129699e-07,
|
|
"loss": 0.4873425364494324,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 2.642857142857143,
|
|
"grad_norm": 11.373550533887446,
|
|
"learning_rate": 4.3061689949780995e-07,
|
|
"loss": 0.2611161768436432,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 2.6449579831932772,
|
|
"grad_norm": 13.616066692598451,
|
|
"learning_rate": 4.2566767491543706e-07,
|
|
"loss": 0.27621158957481384,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 2.6470588235294117,
|
|
"grad_norm": 13.034515066864026,
|
|
"learning_rate": 4.2074579174805173e-07,
|
|
"loss": 0.849486231803894,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 2.649159663865546,
|
|
"grad_norm": 11.86057949603211,
|
|
"learning_rate": 4.1585127941442536e-07,
|
|
"loss": 0.7652707099914551,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 2.6512605042016806,
|
|
"grad_norm": 9.803056978877574,
|
|
"learning_rate": 4.1098416716973457e-07,
|
|
"loss": 0.27856025099754333,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 2.653361344537815,
|
|
"grad_norm": 10.956379977903175,
|
|
"learning_rate": 4.0614448410538077e-07,
|
|
"loss": 0.3749684691429138,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 2.6554621848739495,
|
|
"grad_norm": 12.001506859449199,
|
|
"learning_rate": 4.01332259148815e-07,
|
|
"loss": 0.6064971685409546,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 2.657563025210084,
|
|
"grad_norm": 8.750382381092477,
|
|
"learning_rate": 3.965475210633718e-07,
|
|
"loss": 0.31089282035827637,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 2.6596638655462184,
|
|
"grad_norm": 11.062101410973414,
|
|
"learning_rate": 3.917902984480881e-07,
|
|
"loss": 0.3686492443084717,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 2.661764705882353,
|
|
"grad_norm": 9.181597675394137,
|
|
"learning_rate": 3.870606197375415e-07,
|
|
"loss": 0.5900052785873413,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 2.6638655462184873,
|
|
"grad_norm": 11.229435985209061,
|
|
"learning_rate": 3.823585132016711e-07,
|
|
"loss": 0.23156413435935974,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 2.6659663865546217,
|
|
"grad_norm": 14.580552525176778,
|
|
"learning_rate": 3.776840069456189e-07,
|
|
"loss": 1.1965575218200684,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 2.668067226890756,
|
|
"grad_norm": 11.440843191964541,
|
|
"learning_rate": 3.730371289095508e-07,
|
|
"loss": 0.5137308835983276,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 2.6701680672268906,
|
|
"grad_norm": 10.19938284065176,
|
|
"learning_rate": 3.6841790686849897e-07,
|
|
"loss": 0.2563337981700897,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 2.6722689075630255,
|
|
"grad_norm": 12.902940912955524,
|
|
"learning_rate": 3.6382636843218967e-07,
|
|
"loss": 0.5659809708595276,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 2.67436974789916,
|
|
"grad_norm": 10.012067877403453,
|
|
"learning_rate": 3.592625410448813e-07,
|
|
"loss": 0.4689119756221771,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 2.6764705882352944,
|
|
"grad_norm": 7.152049482781003,
|
|
"learning_rate": 3.5472645198520064e-07,
|
|
"loss": 0.623033881187439,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 2.678571428571429,
|
|
"grad_norm": 12.87568935637631,
|
|
"learning_rate": 3.502181283659756e-07,
|
|
"loss": 0.5805165767669678,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 2.6806722689075633,
|
|
"grad_norm": 9.41187997958309,
|
|
"learning_rate": 3.4573759713407927e-07,
|
|
"loss": 0.5375624299049377,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 2.6827731092436977,
|
|
"grad_norm": 9.91288200334237,
|
|
"learning_rate": 3.4128488507026327e-07,
|
|
"loss": 0.3185434341430664,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 2.684873949579832,
|
|
"grad_norm": 15.97191633077991,
|
|
"learning_rate": 3.3686001878900365e-07,
|
|
"loss": 2.561387538909912,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 2.6869747899159666,
|
|
"grad_norm": 15.172775416815085,
|
|
"learning_rate": 3.324630247383337e-07,
|
|
"loss": 0.5536858439445496,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 2.689075630252101,
|
|
"grad_norm": 8.67689739732767,
|
|
"learning_rate": 3.2809392919969483e-07,
|
|
"loss": 0.18657177686691284,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 2.6911764705882355,
|
|
"grad_norm": 12.844508734340609,
|
|
"learning_rate": 3.2375275828777253e-07,
|
|
"loss": 0.9441865086555481,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 2.69327731092437,
|
|
"grad_norm": 9.066706121878353,
|
|
"learning_rate": 3.194395379503451e-07,
|
|
"loss": 0.5320143103599548,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 2.6953781512605044,
|
|
"grad_norm": 8.980483143209002,
|
|
"learning_rate": 3.151542939681235e-07,
|
|
"loss": 0.5943700075149536,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 2.697478991596639,
|
|
"grad_norm": 54.073743939162,
|
|
"learning_rate": 3.108970519546034e-07,
|
|
"loss": 1.0508530139923096,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 2.6995798319327733,
|
|
"grad_norm": 10.409633106680213,
|
|
"learning_rate": 3.066678373559062e-07,
|
|
"loss": 0.3096291124820709,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 2.7016806722689077,
|
|
"grad_norm": 17.871716634928575,
|
|
"learning_rate": 3.0246667545063057e-07,
|
|
"loss": 1.133009910583496,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 2.703781512605042,
|
|
"grad_norm": 10.727929813588565,
|
|
"learning_rate": 2.9829359134970206e-07,
|
|
"loss": 0.3362637162208557,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 2.7058823529411766,
|
|
"grad_norm": 10.027811395629024,
|
|
"learning_rate": 2.9414860999621764e-07,
|
|
"loss": 0.9418044090270996,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 2.707983193277311,
|
|
"grad_norm": 8.072200583551933,
|
|
"learning_rate": 2.9003175616530264e-07,
|
|
"loss": 0.2674849033355713,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 2.7100840336134455,
|
|
"grad_norm": 7.662563052553184,
|
|
"learning_rate": 2.8594305446396245e-07,
|
|
"loss": 0.39476725459098816,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 2.71218487394958,
|
|
"grad_norm": 7.931717377933664,
|
|
"learning_rate": 2.818825293309274e-07,
|
|
"loss": 0.5461002588272095,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 2.7142857142857144,
|
|
"grad_norm": 9.069033415947747,
|
|
"learning_rate": 2.7785020503651783e-07,
|
|
"loss": 0.36206185817718506,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 2.716386554621849,
|
|
"grad_norm": 11.269854164923549,
|
|
"learning_rate": 2.7384610568249313e-07,
|
|
"loss": 0.33151179552078247,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 2.7184873949579833,
|
|
"grad_norm": 10.182183999098427,
|
|
"learning_rate": 2.698702552019045e-07,
|
|
"loss": 0.3465487062931061,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 2.7205882352941178,
|
|
"grad_norm": 12.712025830447253,
|
|
"learning_rate": 2.659226773589607e-07,
|
|
"loss": 0.22317005693912506,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 2.722689075630252,
|
|
"grad_norm": 11.83879887595397,
|
|
"learning_rate": 2.620033957488777e-07,
|
|
"loss": 0.34791454672813416,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 2.7247899159663866,
|
|
"grad_norm": 12.416917803129223,
|
|
"learning_rate": 2.581124337977425e-07,
|
|
"loss": 0.4211697578430176,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 2.726890756302521,
|
|
"grad_norm": 15.495534828622619,
|
|
"learning_rate": 2.542498147623701e-07,
|
|
"loss": 0.4095291495323181,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 2.7289915966386555,
|
|
"grad_norm": 6.6678037455089925,
|
|
"learning_rate": 2.50415561730169e-07,
|
|
"loss": 0.2518484592437744,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 2.73109243697479,
|
|
"grad_norm": 8.694983560441388,
|
|
"learning_rate": 2.4660969761899576e-07,
|
|
"loss": 0.21484610438346863,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 2.7331932773109244,
|
|
"grad_norm": 11.820763178851392,
|
|
"learning_rate": 2.428322451770276e-07,
|
|
"loss": 0.39412614703178406,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 2.735294117647059,
|
|
"grad_norm": 11.877133639126868,
|
|
"learning_rate": 2.3908322698261597e-07,
|
|
"loss": 0.34464430809020996,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 2.7373949579831933,
|
|
"grad_norm": 10.16702078484984,
|
|
"learning_rate": 2.3536266544416043e-07,
|
|
"loss": 0.5757449865341187,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 2.7394957983193278,
|
|
"grad_norm": 12.93026525257059,
|
|
"learning_rate": 2.3167058279997156e-07,
|
|
"loss": 0.7968210577964783,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 2.741596638655462,
|
|
"grad_norm": 10.635434378996248,
|
|
"learning_rate": 2.2800700111813456e-07,
|
|
"loss": 0.40927547216415405,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 2.7436974789915967,
|
|
"grad_norm": 9.037444336220418,
|
|
"learning_rate": 2.2437194229638415e-07,
|
|
"loss": 0.23368996381759644,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 2.745798319327731,
|
|
"grad_norm": 128.54979302169804,
|
|
"learning_rate": 2.2076542806196588e-07,
|
|
"loss": 0.7368482351303101,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 2.7478991596638656,
|
|
"grad_norm": 8.202648198989193,
|
|
"learning_rate": 2.17187479971514e-07,
|
|
"loss": 0.29558128118515015,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"grad_norm": 6.885187685182277,
|
|
"learning_rate": 2.136381194109166e-07,
|
|
"loss": 0.2764503061771393,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 2.7521008403361344,
|
|
"grad_norm": 8.730199073100707,
|
|
"learning_rate": 2.1011736759519286e-07,
|
|
"loss": 0.3793492615222931,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 2.754201680672269,
|
|
"grad_norm": 8.631276466119623,
|
|
"learning_rate": 2.0662524556835982e-07,
|
|
"loss": 0.5927262902259827,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 2.7563025210084033,
|
|
"grad_norm": 12.625981527108426,
|
|
"learning_rate": 2.0316177420331375e-07,
|
|
"loss": 0.4284164607524872,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 2.758403361344538,
|
|
"grad_norm": 9.329175719292097,
|
|
"learning_rate": 1.997269742016994e-07,
|
|
"loss": 0.4722291827201843,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 2.7605042016806722,
|
|
"grad_norm": 17.532238777546283,
|
|
"learning_rate": 1.9632086609379041e-07,
|
|
"loss": 0.6627257466316223,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 2.7626050420168067,
|
|
"grad_norm": 9.92918276948977,
|
|
"learning_rate": 1.929434702383648e-07,
|
|
"loss": 0.42083340883255005,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 2.764705882352941,
|
|
"grad_norm": 9.589305807880846,
|
|
"learning_rate": 1.895948068225828e-07,
|
|
"loss": 0.39910781383514404,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 2.7668067226890756,
|
|
"grad_norm": 11.560419759358716,
|
|
"learning_rate": 1.862748958618682e-07,
|
|
"loss": 0.2765321731567383,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 2.76890756302521,
|
|
"grad_norm": 6.28366130869059,
|
|
"learning_rate": 1.8298375719978501e-07,
|
|
"loss": 0.08827929198741913,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 2.7710084033613445,
|
|
"grad_norm": 10.943149316905583,
|
|
"learning_rate": 1.797214105079248e-07,
|
|
"loss": 0.5753570795059204,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 2.773109243697479,
|
|
"grad_norm": 13.46621194548743,
|
|
"learning_rate": 1.7648787528578127e-07,
|
|
"loss": 0.7518602013587952,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 2.7752100840336134,
|
|
"grad_norm": 9.047171390898557,
|
|
"learning_rate": 1.732831708606425e-07,
|
|
"loss": 0.6446128487586975,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 2.777310924369748,
|
|
"grad_norm": 13.738460244304907,
|
|
"learning_rate": 1.7010731638746668e-07,
|
|
"loss": 0.4714201092720032,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 2.7794117647058822,
|
|
"grad_norm": 9.086358928536246,
|
|
"learning_rate": 1.669603308487755e-07,
|
|
"loss": 0.23203890025615692,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 2.7815126050420167,
|
|
"grad_norm": 25.798347784352785,
|
|
"learning_rate": 1.6384223305453417e-07,
|
|
"loss": 0.5102007389068604,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 2.783613445378151,
|
|
"grad_norm": 11.210651055014003,
|
|
"learning_rate": 1.6075304164204385e-07,
|
|
"loss": 0.45608770847320557,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 2.7857142857142856,
|
|
"grad_norm": 7.6934549954682465,
|
|
"learning_rate": 1.5769277507582725e-07,
|
|
"loss": 0.5190253257751465,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 2.78781512605042,
|
|
"grad_norm": 13.485529024983622,
|
|
"learning_rate": 1.5466145164751977e-07,
|
|
"loss": 0.5670579075813293,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 2.7899159663865545,
|
|
"grad_norm": 8.323766066498216,
|
|
"learning_rate": 1.5165908947575914e-07,
|
|
"loss": 0.4676046073436737,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 2.792016806722689,
|
|
"grad_norm": 11.23683105022603,
|
|
"learning_rate": 1.4868570650607816e-07,
|
|
"loss": 0.2914016544818878,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 2.7941176470588234,
|
|
"grad_norm": 17.258281880666775,
|
|
"learning_rate": 1.4574132051079658e-07,
|
|
"loss": 1.312021017074585,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 2.796218487394958,
|
|
"grad_norm": 9.249285303088671,
|
|
"learning_rate": 1.4282594908891666e-07,
|
|
"loss": 0.3117330074310303,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 2.7983193277310923,
|
|
"grad_norm": 9.999090012754882,
|
|
"learning_rate": 1.3993960966601328e-07,
|
|
"loss": 0.2705899775028229,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 2.8004201680672267,
|
|
"grad_norm": 9.383011281190877,
|
|
"learning_rate": 1.3708231949413676e-07,
|
|
"loss": 0.2621600031852722,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 2.802521008403361,
|
|
"grad_norm": 11.854334740139995,
|
|
"learning_rate": 1.342540956517041e-07,
|
|
"loss": 0.40849626064300537,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 2.8046218487394956,
|
|
"grad_norm": 10.05169136975745,
|
|
"learning_rate": 1.3145495504339856e-07,
|
|
"loss": 0.2958400845527649,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 2.80672268907563,
|
|
"grad_norm": 13.23608329431821,
|
|
"learning_rate": 1.2868491440007015e-07,
|
|
"loss": 0.7148715257644653,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 2.8088235294117645,
|
|
"grad_norm": 11.059610717001991,
|
|
"learning_rate": 1.2594399027863302e-07,
|
|
"loss": 0.5344212055206299,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 2.810924369747899,
|
|
"grad_norm": 9.850144807315097,
|
|
"learning_rate": 1.232321990619695e-07,
|
|
"loss": 0.3390062749385834,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 2.8130252100840334,
|
|
"grad_norm": 12.782218028007712,
|
|
"learning_rate": 1.205495569588283e-07,
|
|
"loss": 0.6602462530136108,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 2.815126050420168,
|
|
"grad_norm": 9.16202056000073,
|
|
"learning_rate": 1.1789608000373209e-07,
|
|
"loss": 0.2165951430797577,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 2.8172268907563023,
|
|
"grad_norm": 9.639282432785762,
|
|
"learning_rate": 1.1527178405687845e-07,
|
|
"loss": 0.33785703778266907,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 2.8193277310924367,
|
|
"grad_norm": 16.018225078825093,
|
|
"learning_rate": 1.1267668480404559e-07,
|
|
"loss": 0.49403730034828186,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 2.821428571428571,
|
|
"grad_norm": 13.934169182843426,
|
|
"learning_rate": 1.1011079775649969e-07,
|
|
"loss": 0.5875406265258789,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 2.8235294117647056,
|
|
"grad_norm": 10.314949862812936,
|
|
"learning_rate": 1.0757413825090212e-07,
|
|
"loss": 0.4375740885734558,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 2.82563025210084,
|
|
"grad_norm": 11.258449104141572,
|
|
"learning_rate": 1.0506672144921515e-07,
|
|
"loss": 0.6797425746917725,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 2.8277310924369745,
|
|
"grad_norm": 10.94330965699598,
|
|
"learning_rate": 1.0258856233861524e-07,
|
|
"loss": 0.36085596680641174,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 2.8298319327731094,
|
|
"grad_norm": 9.174555872255727,
|
|
"learning_rate": 1.0013967573140216e-07,
|
|
"loss": 0.43387356400489807,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 2.831932773109244,
|
|
"grad_norm": 11.299994940843328,
|
|
"learning_rate": 9.77200762649072e-08,
|
|
"loss": 0.44897180795669556,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 2.8340336134453783,
|
|
"grad_norm": 10.295685178570979,
|
|
"learning_rate": 9.532977840141123e-08,
|
|
"loss": 0.22422294318675995,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 2.8361344537815127,
|
|
"grad_norm": 8.127889054633478,
|
|
"learning_rate": 9.29687964280529e-08,
|
|
"loss": 0.642038106918335,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 2.838235294117647,
|
|
"grad_norm": 11.9364629190832,
|
|
"learning_rate": 9.063714445674776e-08,
|
|
"loss": 0.8069763779640198,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 2.8403361344537816,
|
|
"grad_norm": 11.63460546823257,
|
|
"learning_rate": 8.833483642410101e-08,
|
|
"loss": 0.36828362941741943,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 2.842436974789916,
|
|
"grad_norm": 10.042654306225293,
|
|
"learning_rate": 8.606188609132593e-08,
|
|
"loss": 0.3019287586212158,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 2.8445378151260505,
|
|
"grad_norm": 10.265644800483537,
|
|
"learning_rate": 8.381830704415839e-08,
|
|
"loss": 0.8440870046615601,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 2.846638655462185,
|
|
"grad_norm": 11.67889502498505,
|
|
"learning_rate": 8.160411269278079e-08,
|
|
"loss": 2.0406436920166016,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 2.8487394957983194,
|
|
"grad_norm": 11.659752416837614,
|
|
"learning_rate": 7.941931627173827e-08,
|
|
"loss": 0.23328936100006104,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 2.850840336134454,
|
|
"grad_norm": 14.52369655098527,
|
|
"learning_rate": 7.726393083985929e-08,
|
|
"loss": 0.552147626876831,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 2.8529411764705883,
|
|
"grad_norm": 8.467901286703713,
|
|
"learning_rate": 7.513796928018069e-08,
|
|
"loss": 0.38458627462387085,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 2.8550420168067228,
|
|
"grad_norm": 7.6124021321848,
|
|
"learning_rate": 7.30414442998667e-08,
|
|
"loss": 0.3594217300415039,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 12.272607778978339,
|
|
"learning_rate": 7.097436843013783e-08,
|
|
"loss": 0.5628789067268372,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 2.8592436974789917,
|
|
"grad_norm": 9.345724512814346,
|
|
"learning_rate": 6.893675402618982e-08,
|
|
"loss": 0.7206631898880005,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 2.861344537815126,
|
|
"grad_norm": 13.88429967852116,
|
|
"learning_rate": 6.692861326712652e-08,
|
|
"loss": 0.8038681745529175,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 2.8634453781512605,
|
|
"grad_norm": 15.55581945591023,
|
|
"learning_rate": 6.494995815588101e-08,
|
|
"loss": 0.7214268445968628,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 2.865546218487395,
|
|
"grad_norm": 10.920690128080313,
|
|
"learning_rate": 6.300080051914792e-08,
|
|
"loss": 0.3757812976837158,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 2.8676470588235294,
|
|
"grad_norm": 9.02181459032139,
|
|
"learning_rate": 6.108115200731069e-08,
|
|
"loss": 0.7154731154441833,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 2.869747899159664,
|
|
"grad_norm": 12.825891467379778,
|
|
"learning_rate": 5.9191024094374384e-08,
|
|
"loss": 0.6805951595306396,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 2.8718487394957983,
|
|
"grad_norm": 10.058469083040828,
|
|
"learning_rate": 5.7330428077893575e-08,
|
|
"loss": 0.41078895330429077,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 2.8739495798319328,
|
|
"grad_norm": 8.36551151872813,
|
|
"learning_rate": 5.5499375078906793e-08,
|
|
"loss": 0.35648801922798157,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 2.8760504201680672,
|
|
"grad_norm": 18.110033778975207,
|
|
"learning_rate": 5.369787604186993e-08,
|
|
"loss": 0.3897348642349243,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 2.8781512605042017,
|
|
"grad_norm": 8.750407331993259,
|
|
"learning_rate": 5.192594173459242e-08,
|
|
"loss": 0.613540530204773,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 2.880252100840336,
|
|
"grad_norm": 8.528139704622195,
|
|
"learning_rate": 5.018358274816892e-08,
|
|
"loss": 0.4445531964302063,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 2.8823529411764706,
|
|
"grad_norm": 11.859807560110708,
|
|
"learning_rate": 4.847080949691996e-08,
|
|
"loss": 0.5488522052764893,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 2.884453781512605,
|
|
"grad_norm": 10.23407853457865,
|
|
"learning_rate": 4.6787632218326385e-08,
|
|
"loss": 0.5596367716789246,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 2.8865546218487395,
|
|
"grad_norm": 11.296256406092558,
|
|
"learning_rate": 4.513406097297224e-08,
|
|
"loss": 0.38018864393234253,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 2.888655462184874,
|
|
"grad_norm": 12.156451974202069,
|
|
"learning_rate": 4.351010564447977e-08,
|
|
"loss": 0.661139726638794,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 2.8907563025210083,
|
|
"grad_norm": 7.3170273092091485,
|
|
"learning_rate": 4.1915775939454506e-08,
|
|
"loss": 0.2944487929344177,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 2.892857142857143,
|
|
"grad_norm": 9.331145503425732,
|
|
"learning_rate": 4.035108138742416e-08,
|
|
"loss": 0.23486556112766266,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 2.8949579831932772,
|
|
"grad_norm": 11.863919468865829,
|
|
"learning_rate": 3.881603134078482e-08,
|
|
"loss": 0.3030620813369751,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 2.8970588235294117,
|
|
"grad_norm": 12.573022210864796,
|
|
"learning_rate": 3.731063497474152e-08,
|
|
"loss": 0.3213701546192169,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 2.899159663865546,
|
|
"grad_norm": 6.674326596763006,
|
|
"learning_rate": 3.583490128725553e-08,
|
|
"loss": 0.22970488667488098,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 2.9012605042016806,
|
|
"grad_norm": 13.577742623896349,
|
|
"learning_rate": 3.4388839098992154e-08,
|
|
"loss": 0.32231050729751587,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 2.903361344537815,
|
|
"grad_norm": 16.538926889757885,
|
|
"learning_rate": 3.2972457053262466e-08,
|
|
"loss": 0.9544304609298706,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 2.9054621848739495,
|
|
"grad_norm": 20.22528777991979,
|
|
"learning_rate": 3.158576361597887e-08,
|
|
"loss": 0.7788558006286621,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 2.907563025210084,
|
|
"grad_norm": 16.130951952781945,
|
|
"learning_rate": 3.022876707559796e-08,
|
|
"loss": 0.2601931393146515,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 2.9096638655462184,
|
|
"grad_norm": 13.778242873434662,
|
|
"learning_rate": 2.890147554307665e-08,
|
|
"loss": 0.5957424640655518,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 2.911764705882353,
|
|
"grad_norm": 10.295105075509534,
|
|
"learning_rate": 2.7603896951817755e-08,
|
|
"loss": 0.2927376925945282,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 2.9138655462184873,
|
|
"grad_norm": 18.743122797679717,
|
|
"learning_rate": 2.633603905762838e-08,
|
|
"loss": 0.5990405082702637,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 2.9159663865546217,
|
|
"grad_norm": 14.887502358752755,
|
|
"learning_rate": 2.5097909438669964e-08,
|
|
"loss": 0.4513130784034729,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 2.918067226890756,
|
|
"grad_norm": 9.564277783357335,
|
|
"learning_rate": 2.3889515495413297e-08,
|
|
"loss": 0.6215352416038513,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 2.9201680672268906,
|
|
"grad_norm": 5.003008688132311,
|
|
"learning_rate": 2.2710864450596336e-08,
|
|
"loss": 0.33804643154144287,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 2.9222689075630255,
|
|
"grad_norm": 9.060022958520825,
|
|
"learning_rate": 2.1561963349178704e-08,
|
|
"loss": 0.4266011416912079,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 2.92436974789916,
|
|
"grad_norm": 9.49932040181115,
|
|
"learning_rate": 2.0442819058300588e-08,
|
|
"loss": 0.3738781809806824,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 2.9264705882352944,
|
|
"grad_norm": 11.474699381578137,
|
|
"learning_rate": 1.935343826724112e-08,
|
|
"loss": 0.26019287109375,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 2.928571428571429,
|
|
"grad_norm": 9.426307307224148,
|
|
"learning_rate": 1.8293827487380623e-08,
|
|
"loss": 0.3799281120300293,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 2.9306722689075633,
|
|
"grad_norm": 12.711822585165105,
|
|
"learning_rate": 1.726399305215787e-08,
|
|
"loss": 0.25459083914756775,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 2.9327731092436977,
|
|
"grad_norm": 12.88439286989085,
|
|
"learning_rate": 1.626394111703622e-08,
|
|
"loss": 0.4746205806732178,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 2.934873949579832,
|
|
"grad_norm": 15.570716719123634,
|
|
"learning_rate": 1.5293677659463104e-08,
|
|
"loss": 0.4622001647949219,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 2.9369747899159666,
|
|
"grad_norm": 6.94964938645385,
|
|
"learning_rate": 1.4353208478837256e-08,
|
|
"loss": 0.18047931790351868,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 2.939075630252101,
|
|
"grad_norm": 10.000416990177895,
|
|
"learning_rate": 1.3442539196472647e-08,
|
|
"loss": 0.37007540464401245,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 8.931832554567432,
|
|
"learning_rate": 1.2561675255564621e-08,
|
|
"loss": 0.7158060073852539,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 2.94327731092437,
|
|
"grad_norm": 11.017696540531707,
|
|
"learning_rate": 1.1710621921159904e-08,
|
|
"loss": 0.9123420119285583,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 2.9453781512605044,
|
|
"grad_norm": 13.459702577958145,
|
|
"learning_rate": 1.0889384280119985e-08,
|
|
"loss": 1.1057486534118652,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 2.947478991596639,
|
|
"grad_norm": 11.44996683672279,
|
|
"learning_rate": 1.009796724109613e-08,
|
|
"loss": 0.36926376819610596,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 2.9495798319327733,
|
|
"grad_norm": 8.32543080681241,
|
|
"learning_rate": 9.336375534497732e-09,
|
|
"loss": 0.5240511298179626,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 2.9516806722689077,
|
|
"grad_norm": 16.41897161685657,
|
|
"learning_rate": 8.60461371246235e-09,
|
|
"loss": 1.0361064672470093,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 2.953781512605042,
|
|
"grad_norm": 15.488903163881536,
|
|
"learning_rate": 7.902686148831273e-09,
|
|
"loss": 0.7314852476119995,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 2.9558823529411766,
|
|
"grad_norm": 10.609488526695282,
|
|
"learning_rate": 7.230597039123433e-09,
|
|
"loss": 0.5929103493690491,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 2.957983193277311,
|
|
"grad_norm": 9.545516911394982,
|
|
"learning_rate": 6.588350400507093e-09,
|
|
"loss": 0.24979953467845917,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 2.9600840336134455,
|
|
"grad_norm": 16.112825448357878,
|
|
"learning_rate": 5.975950071779313e-09,
|
|
"loss": 0.810958206653595,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 2.96218487394958,
|
|
"grad_norm": 8.6763799711489,
|
|
"learning_rate": 5.393399713341518e-09,
|
|
"loss": 0.4567590355873108,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 2.9642857142857144,
|
|
"grad_norm": 10.068385761335634,
|
|
"learning_rate": 4.8407028071773e-09,
|
|
"loss": 0.34989726543426514,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 2.966386554621849,
|
|
"grad_norm": 11.30284408928835,
|
|
"learning_rate": 4.317862656831873e-09,
|
|
"loss": 0.3826170563697815,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 2.9684873949579833,
|
|
"grad_norm": 13.404023548287954,
|
|
"learning_rate": 3.8248823873932026e-09,
|
|
"loss": 0.25103145837783813,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 2.9705882352941178,
|
|
"grad_norm": 10.423177049027613,
|
|
"learning_rate": 3.361764945473134e-09,
|
|
"loss": 0.33963871002197266,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 2.972689075630252,
|
|
"grad_norm": 9.701343246515489,
|
|
"learning_rate": 2.928513099187402e-09,
|
|
"loss": 0.5596168637275696,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 2.9747899159663866,
|
|
"grad_norm": 8.852327800983687,
|
|
"learning_rate": 2.52512943814176e-09,
|
|
"loss": 0.3114224374294281,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 2.976890756302521,
|
|
"grad_norm": 8.400624424787871,
|
|
"learning_rate": 2.151616373417542e-09,
|
|
"loss": 0.5350728631019592,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 2.9789915966386555,
|
|
"grad_norm": 10.794481012917993,
|
|
"learning_rate": 1.8079761375522365e-09,
|
|
"loss": 0.6644730567932129,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 2.98109243697479,
|
|
"grad_norm": 9.282496929164791,
|
|
"learning_rate": 1.4942107845317132e-09,
|
|
"loss": 0.2426847219467163,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 2.9831932773109244,
|
|
"grad_norm": 9.113139352861424,
|
|
"learning_rate": 1.210322189774682e-09,
|
|
"loss": 0.2127893567085266,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 2.985294117647059,
|
|
"grad_norm": 10.249299245135052,
|
|
"learning_rate": 9.563120501221434e-10,
|
|
"loss": 0.31507742404937744,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 2.9873949579831933,
|
|
"grad_norm": 10.643798027898825,
|
|
"learning_rate": 7.321818838279537e-10,
|
|
"loss": 0.49292629957199097,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 2.9894957983193278,
|
|
"grad_norm": 7.762285292055822,
|
|
"learning_rate": 5.379330305488317e-10,
|
|
"loss": 0.25357064604759216,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 2.991596638655462,
|
|
"grad_norm": 7.423869479037056,
|
|
"learning_rate": 3.735666513371428e-10,
|
|
"loss": 0.4229947328567505,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 2.9936974789915967,
|
|
"grad_norm": 10.794639890750766,
|
|
"learning_rate": 2.3908372863368223e-10,
|
|
"loss": 0.5679960250854492,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 2.995798319327731,
|
|
"grad_norm": 9.814210260546373,
|
|
"learning_rate": 1.344850662604591e-10,
|
|
"loss": 0.3406621515750885,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 2.9978991596638656,
|
|
"grad_norm": 7.52747077028302,
|
|
"learning_rate": 5.977128941903055e-11,
|
|
"loss": 0.3986052870750427,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 8.379236977666347,
|
|
"learning_rate": 1.494284468384066e-11,
|
|
"loss": 0.49183082580566406,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 1428,
|
|
"total_flos": 3902317486080.0,
|
|
"train_loss": 1.6620939874066776,
|
|
"train_runtime": 1853.0016,
|
|
"train_samples_per_second": 3.081,
|
|
"train_steps_per_second": 0.771
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1428,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3902317486080.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|