17606 lines
428 KiB
JSON
17606 lines
428 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9999003686360466,
|
|
"eval_steps": 500,
|
|
"global_step": 2509,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0003985254558134901,
|
|
"grad_norm": 1.1682030229557891,
|
|
"learning_rate": 1.5384615384615387e-06,
|
|
"loss": 1.298,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0007970509116269802,
|
|
"grad_norm": 1.1450089305489202,
|
|
"learning_rate": 3.0769230769230774e-06,
|
|
"loss": 1.2606,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0011955763674404703,
|
|
"grad_norm": 1.1741081208500113,
|
|
"learning_rate": 4.615384615384616e-06,
|
|
"loss": 1.317,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0015941018232539603,
|
|
"grad_norm": 0.9416899998464173,
|
|
"learning_rate": 6.153846153846155e-06,
|
|
"loss": 1.3244,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0019926272790674504,
|
|
"grad_norm": 1.17272634152426,
|
|
"learning_rate": 7.692307692307694e-06,
|
|
"loss": 1.2691,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0023911527348809405,
|
|
"grad_norm": 0.8938096517547656,
|
|
"learning_rate": 9.230769230769232e-06,
|
|
"loss": 1.3528,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.0027896781906944306,
|
|
"grad_norm": 1.4781168610568196,
|
|
"learning_rate": 1.076923076923077e-05,
|
|
"loss": 1.3365,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.0031882036465079207,
|
|
"grad_norm": 1.323556020239157,
|
|
"learning_rate": 1.230769230769231e-05,
|
|
"loss": 1.3375,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.0035867291023214108,
|
|
"grad_norm": 1.9185267488446602,
|
|
"learning_rate": 1.3846153846153847e-05,
|
|
"loss": 1.2539,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.003985254558134901,
|
|
"grad_norm": 1.674769393300418,
|
|
"learning_rate": 1.5384615384615387e-05,
|
|
"loss": 1.2846,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.004383780013948391,
|
|
"grad_norm": 1.509254464009656,
|
|
"learning_rate": 1.6923076923076924e-05,
|
|
"loss": 1.2827,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.004782305469761881,
|
|
"grad_norm": 1.3888982819984244,
|
|
"learning_rate": 1.8461538461538465e-05,
|
|
"loss": 1.323,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.0051808309255753715,
|
|
"grad_norm": 1.5318573252337477,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.2514,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.005579356381388861,
|
|
"grad_norm": 1.7716074190442104,
|
|
"learning_rate": 2.153846153846154e-05,
|
|
"loss": 1.224,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.005977881837202352,
|
|
"grad_norm": 1.5774334632902784,
|
|
"learning_rate": 2.3076923076923076e-05,
|
|
"loss": 1.2513,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.006376407293015841,
|
|
"grad_norm": 2.079535848411662,
|
|
"learning_rate": 2.461538461538462e-05,
|
|
"loss": 1.2324,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.006774932748829332,
|
|
"grad_norm": 1.6224827783116045,
|
|
"learning_rate": 2.6153846153846157e-05,
|
|
"loss": 1.2437,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.0071734582046428215,
|
|
"grad_norm": 1.731628237386042,
|
|
"learning_rate": 2.7692307692307694e-05,
|
|
"loss": 1.1941,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.007571983660456312,
|
|
"grad_norm": 1.6486789158728468,
|
|
"learning_rate": 2.923076923076923e-05,
|
|
"loss": 1.2157,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.007970509116269802,
|
|
"grad_norm": 1.5878767170549857,
|
|
"learning_rate": 3.0769230769230774e-05,
|
|
"loss": 1.2282,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.008369034572083291,
|
|
"grad_norm": 1.769036375459327,
|
|
"learning_rate": 3.230769230769231e-05,
|
|
"loss": 1.2276,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.008767560027896783,
|
|
"grad_norm": 1.6262212305434318,
|
|
"learning_rate": 3.384615384615385e-05,
|
|
"loss": 1.1966,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.009166085483710272,
|
|
"grad_norm": 1.6178408127403725,
|
|
"learning_rate": 3.538461538461539e-05,
|
|
"loss": 1.1681,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.009564610939523762,
|
|
"grad_norm": 1.8576583132326376,
|
|
"learning_rate": 3.692307692307693e-05,
|
|
"loss": 1.1733,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.009963136395337252,
|
|
"grad_norm": 2.10735796807257,
|
|
"learning_rate": 3.846153846153846e-05,
|
|
"loss": 1.1781,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.010361661851150743,
|
|
"grad_norm": 2.061441058129094,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.1766,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.010760187306964233,
|
|
"grad_norm": 1.689955130193812,
|
|
"learning_rate": 3.9999983991661895e-05,
|
|
"loss": 1.2193,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.011158712762777722,
|
|
"grad_norm": 2.089801173287961,
|
|
"learning_rate": 3.99999359666732e-05,
|
|
"loss": 1.1864,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.011557238218591212,
|
|
"grad_norm": 2.1810170181408584,
|
|
"learning_rate": 3.999985592511079e-05,
|
|
"loss": 1.1981,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.011955763674404703,
|
|
"grad_norm": 1.76919939388419,
|
|
"learning_rate": 3.999974386710281e-05,
|
|
"loss": 1.0961,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.012354289130218193,
|
|
"grad_norm": 1.7129497533016933,
|
|
"learning_rate": 3.999959979282864e-05,
|
|
"loss": 1.1348,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.012752814586031683,
|
|
"grad_norm": 1.23643910459474,
|
|
"learning_rate": 3.999942370251891e-05,
|
|
"loss": 1.1678,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.013151340041845172,
|
|
"grad_norm": 1.9739746593085052,
|
|
"learning_rate": 3.999921559645554e-05,
|
|
"loss": 1.1677,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.013549865497658664,
|
|
"grad_norm": 1.2119052688289602,
|
|
"learning_rate": 3.9998975474971644e-05,
|
|
"loss": 1.1073,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.013948390953472153,
|
|
"grad_norm": 2.2576004723914758,
|
|
"learning_rate": 3.999870333845162e-05,
|
|
"loss": 1.1745,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.014346916409285643,
|
|
"grad_norm": 1.0352975506141129,
|
|
"learning_rate": 3.9998399187331125e-05,
|
|
"loss": 1.1283,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.014745441865099133,
|
|
"grad_norm": 2.0996501651362243,
|
|
"learning_rate": 3.999806302209705e-05,
|
|
"loss": 1.1212,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.015143967320912624,
|
|
"grad_norm": 1.225013558748945,
|
|
"learning_rate": 3.9997694843287546e-05,
|
|
"loss": 1.1209,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.015542492776726114,
|
|
"grad_norm": 1.764970024494282,
|
|
"learning_rate": 3.999729465149199e-05,
|
|
"loss": 1.1445,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.015941018232539603,
|
|
"grad_norm": 1.4826459735644733,
|
|
"learning_rate": 3.999686244735103e-05,
|
|
"loss": 1.1341,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.016339543688353095,
|
|
"grad_norm": 1.5561072965892055,
|
|
"learning_rate": 3.9996398231556565e-05,
|
|
"loss": 1.1582,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.016738069144166583,
|
|
"grad_norm": 1.659587994848137,
|
|
"learning_rate": 3.99959020048517e-05,
|
|
"loss": 1.0567,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.017136594599980074,
|
|
"grad_norm": 1.4377552352395278,
|
|
"learning_rate": 3.999537376803085e-05,
|
|
"loss": 1.1493,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.017535120055793565,
|
|
"grad_norm": 1.4553863448092164,
|
|
"learning_rate": 3.99948135219396e-05,
|
|
"loss": 1.135,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.017933645511607053,
|
|
"grad_norm": 1.2714080589572554,
|
|
"learning_rate": 3.9994221267474826e-05,
|
|
"loss": 1.1033,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.018332170967420545,
|
|
"grad_norm": 1.4352549542350836,
|
|
"learning_rate": 3.9993597005584625e-05,
|
|
"loss": 1.1441,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.018730696423234033,
|
|
"grad_norm": 1.355132046439583,
|
|
"learning_rate": 3.9992940737268344e-05,
|
|
"loss": 1.1654,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.019129221879047524,
|
|
"grad_norm": 1.2739353234997868,
|
|
"learning_rate": 3.9992252463576547e-05,
|
|
"loss": 1.0932,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.019527747334861015,
|
|
"grad_norm": 1.5599129273160852,
|
|
"learning_rate": 3.9991532185611054e-05,
|
|
"loss": 1.1289,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.019926272790674503,
|
|
"grad_norm": 1.2761680959247894,
|
|
"learning_rate": 3.9990779904524915e-05,
|
|
"loss": 1.1008,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.020324798246487995,
|
|
"grad_norm": 1.527368858852383,
|
|
"learning_rate": 3.998999562152239e-05,
|
|
"loss": 1.0787,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.020723323702301486,
|
|
"grad_norm": 1.3999003605132498,
|
|
"learning_rate": 3.9989179337859e-05,
|
|
"loss": 1.0898,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.021121849158114974,
|
|
"grad_norm": 1.3815814063131917,
|
|
"learning_rate": 3.998833105484148e-05,
|
|
"loss": 1.1101,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.021520374613928465,
|
|
"grad_norm": 1.2557770729577848,
|
|
"learning_rate": 3.998745077382779e-05,
|
|
"loss": 1.069,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.021918900069741953,
|
|
"grad_norm": 1.3393579953921733,
|
|
"learning_rate": 3.99865384962271e-05,
|
|
"loss": 1.0726,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.022317425525555445,
|
|
"grad_norm": 1.8263793843329788,
|
|
"learning_rate": 3.998559422349983e-05,
|
|
"loss": 1.0557,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.022715950981368936,
|
|
"grad_norm": 1.0325515310273663,
|
|
"learning_rate": 3.99846179571576e-05,
|
|
"loss": 1.0813,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.023114476437182424,
|
|
"grad_norm": 1.660896527304962,
|
|
"learning_rate": 3.998360969876325e-05,
|
|
"loss": 1.0583,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.023513001892995915,
|
|
"grad_norm": 0.9235555725660893,
|
|
"learning_rate": 3.998256944993083e-05,
|
|
"loss": 1.0914,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.023911527348809407,
|
|
"grad_norm": 1.5716895827106996,
|
|
"learning_rate": 3.99814972123256e-05,
|
|
"loss": 1.0919,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.024310052804622895,
|
|
"grad_norm": 1.1583937200957837,
|
|
"learning_rate": 3.998039298766405e-05,
|
|
"loss": 1.0255,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.024708578260436386,
|
|
"grad_norm": 1.7286427351895097,
|
|
"learning_rate": 3.9979256777713856e-05,
|
|
"loss": 1.0395,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.025107103716249874,
|
|
"grad_norm": 1.1208870057484686,
|
|
"learning_rate": 3.9978088584293894e-05,
|
|
"loss": 1.0619,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.025505629172063365,
|
|
"grad_norm": 1.302369859146436,
|
|
"learning_rate": 3.997688840927425e-05,
|
|
"loss": 1.0526,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.025904154627876857,
|
|
"grad_norm": 1.4174940189185974,
|
|
"learning_rate": 3.997565625457621e-05,
|
|
"loss": 1.0629,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.026302680083690345,
|
|
"grad_norm": 1.232886411420502,
|
|
"learning_rate": 3.9974392122172244e-05,
|
|
"loss": 1.0289,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.026701205539503836,
|
|
"grad_norm": 1.3590067350773096,
|
|
"learning_rate": 3.9973096014086017e-05,
|
|
"loss": 1.0471,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.027099730995317328,
|
|
"grad_norm": 1.1328281166100345,
|
|
"learning_rate": 3.9971767932392386e-05,
|
|
"loss": 1.0373,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.027498256451130815,
|
|
"grad_norm": 1.398126297704576,
|
|
"learning_rate": 3.997040787921739e-05,
|
|
"loss": 1.01,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.027896781906944307,
|
|
"grad_norm": 1.121173880074476,
|
|
"learning_rate": 3.996901585673824e-05,
|
|
"loss": 1.0509,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.028295307362757795,
|
|
"grad_norm": 1.1562605633658927,
|
|
"learning_rate": 3.996759186718334e-05,
|
|
"loss": 1.0394,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.028693832818571286,
|
|
"grad_norm": 1.563700864160097,
|
|
"learning_rate": 3.996613591283226e-05,
|
|
"loss": 1.0338,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.029092358274384778,
|
|
"grad_norm": 1.258313908870013,
|
|
"learning_rate": 3.9964647996015745e-05,
|
|
"loss": 1.0402,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.029490883730198265,
|
|
"grad_norm": 1.191082490937846,
|
|
"learning_rate": 3.996312811911569e-05,
|
|
"loss": 1.0405,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.029889409186011757,
|
|
"grad_norm": 1.1466323991622203,
|
|
"learning_rate": 3.996157628456518e-05,
|
|
"loss": 1.0211,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.030287934641825248,
|
|
"grad_norm": 1.528035955341438,
|
|
"learning_rate": 3.9959992494848433e-05,
|
|
"loss": 1.0462,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.030686460097638736,
|
|
"grad_norm": 1.324417703714102,
|
|
"learning_rate": 3.995837675250084e-05,
|
|
"loss": 1.0842,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.031084985553452227,
|
|
"grad_norm": 0.893828852913908,
|
|
"learning_rate": 3.995672906010893e-05,
|
|
"loss": 1.0135,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.03148351100926572,
|
|
"grad_norm": 1.5886985675595782,
|
|
"learning_rate": 3.9955049420310386e-05,
|
|
"loss": 0.985,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.03188203646507921,
|
|
"grad_norm": 0.8842933361031705,
|
|
"learning_rate": 3.995333783579404e-05,
|
|
"loss": 0.9826,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.032280561920892695,
|
|
"grad_norm": 1.2312713203427161,
|
|
"learning_rate": 3.995159430929984e-05,
|
|
"loss": 0.9933,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.03267908737670619,
|
|
"grad_norm": 0.968587201770918,
|
|
"learning_rate": 3.99498188436189e-05,
|
|
"loss": 1.0305,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.03307761283251968,
|
|
"grad_norm": 1.354446902187372,
|
|
"learning_rate": 3.994801144159343e-05,
|
|
"loss": 1.0015,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.033476138288333165,
|
|
"grad_norm": 0.9815177511320659,
|
|
"learning_rate": 3.9946172106116786e-05,
|
|
"loss": 1.0419,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.03387466374414666,
|
|
"grad_norm": 1.4163104282934211,
|
|
"learning_rate": 3.994430084013345e-05,
|
|
"loss": 1.0693,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.03427318919996015,
|
|
"grad_norm": 0.9575099047174793,
|
|
"learning_rate": 3.994239764663898e-05,
|
|
"loss": 1.0352,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.034671714655773636,
|
|
"grad_norm": 1.3071690946757393,
|
|
"learning_rate": 3.99404625286801e-05,
|
|
"loss": 0.9971,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.03507024011158713,
|
|
"grad_norm": 1.0094650013129123,
|
|
"learning_rate": 3.993849548935459e-05,
|
|
"loss": 1.0347,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.03546876556740062,
|
|
"grad_norm": 1.2105057500431875,
|
|
"learning_rate": 3.993649653181138e-05,
|
|
"loss": 1.0249,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.03586729102321411,
|
|
"grad_norm": 1.1494038215569387,
|
|
"learning_rate": 3.9934465659250445e-05,
|
|
"loss": 1.07,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.036265816479027595,
|
|
"grad_norm": 1.3619795030427553,
|
|
"learning_rate": 3.993240287492288e-05,
|
|
"loss": 0.9727,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.03666434193484109,
|
|
"grad_norm": 1.0095282991348078,
|
|
"learning_rate": 3.993030818213087e-05,
|
|
"loss": 1.0542,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.03706286739065458,
|
|
"grad_norm": 1.233700566815371,
|
|
"learning_rate": 3.992818158422766e-05,
|
|
"loss": 1.0034,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.037461392846468065,
|
|
"grad_norm": 1.0447313763347152,
|
|
"learning_rate": 3.992602308461758e-05,
|
|
"loss": 1.0058,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.03785991830228156,
|
|
"grad_norm": 1.0696169481085038,
|
|
"learning_rate": 3.992383268675603e-05,
|
|
"loss": 1.0478,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.03825844375809505,
|
|
"grad_norm": 1.3030274633669099,
|
|
"learning_rate": 3.9921610394149484e-05,
|
|
"loss": 0.9885,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.038656969213908536,
|
|
"grad_norm": 0.9547168721038842,
|
|
"learning_rate": 3.991935621035545e-05,
|
|
"loss": 1.0126,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.03905549466972203,
|
|
"grad_norm": 1.0282165364592126,
|
|
"learning_rate": 3.9917070138982496e-05,
|
|
"loss": 1.0352,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.03945402012553552,
|
|
"grad_norm": 1.4052288957523145,
|
|
"learning_rate": 3.991475218369026e-05,
|
|
"loss": 0.9908,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.03985254558134901,
|
|
"grad_norm": 0.883707027247818,
|
|
"learning_rate": 3.99124023481894e-05,
|
|
"loss": 1.0155,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.0402510710371625,
|
|
"grad_norm": 1.0103744787259499,
|
|
"learning_rate": 3.991002063624159e-05,
|
|
"loss": 1.0398,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.04064959649297599,
|
|
"grad_norm": 1.3196267795391554,
|
|
"learning_rate": 3.9907607051659594e-05,
|
|
"loss": 0.9986,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.04104812194878948,
|
|
"grad_norm": 0.9068591396167901,
|
|
"learning_rate": 3.990516159830712e-05,
|
|
"loss": 0.988,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.04144664740460297,
|
|
"grad_norm": 1.3332646337147993,
|
|
"learning_rate": 3.9902684280098965e-05,
|
|
"loss": 1.0022,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.04184517286041646,
|
|
"grad_norm": 1.0383165114992166,
|
|
"learning_rate": 3.990017510100088e-05,
|
|
"loss": 0.9767,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.04224369831622995,
|
|
"grad_norm": 1.0850955219468192,
|
|
"learning_rate": 3.9897634065029656e-05,
|
|
"loss": 1.0166,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.042642223772043436,
|
|
"grad_norm": 1.0137112717519785,
|
|
"learning_rate": 3.989506117625306e-05,
|
|
"loss": 1.0039,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.04304074922785693,
|
|
"grad_norm": 1.3161286100477132,
|
|
"learning_rate": 3.989245643878987e-05,
|
|
"loss": 1.031,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.04343927468367042,
|
|
"grad_norm": 0.9789302387291591,
|
|
"learning_rate": 3.988981985680983e-05,
|
|
"loss": 1.0007,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.04383780013948391,
|
|
"grad_norm": 1.367535024910473,
|
|
"learning_rate": 3.9887151434533674e-05,
|
|
"loss": 1.018,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.0442363255952974,
|
|
"grad_norm": 0.7004934620329838,
|
|
"learning_rate": 3.988445117623311e-05,
|
|
"loss": 0.9821,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.04463485105111089,
|
|
"grad_norm": 1.158874430209204,
|
|
"learning_rate": 3.9881719086230786e-05,
|
|
"loss": 0.9865,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.04503337650692438,
|
|
"grad_norm": 1.152431912909897,
|
|
"learning_rate": 3.9878955168900334e-05,
|
|
"loss": 0.9645,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.04543190196273787,
|
|
"grad_norm": 1.1079205102947556,
|
|
"learning_rate": 3.987615942866632e-05,
|
|
"loss": 0.9582,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.04583042741855136,
|
|
"grad_norm": 1.1791654374723093,
|
|
"learning_rate": 3.987333187000427e-05,
|
|
"loss": 1.0214,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.04622895287436485,
|
|
"grad_norm": 0.936906534851351,
|
|
"learning_rate": 3.9870472497440624e-05,
|
|
"loss": 1.0127,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.04662747833017834,
|
|
"grad_norm": 1.092836008794883,
|
|
"learning_rate": 3.986758131555278e-05,
|
|
"loss": 0.9664,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.04702600378599183,
|
|
"grad_norm": 1.094413912535255,
|
|
"learning_rate": 3.986465832896902e-05,
|
|
"loss": 0.9757,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.04742452924180532,
|
|
"grad_norm": 1.0623495271819532,
|
|
"learning_rate": 3.986170354236856e-05,
|
|
"loss": 0.9984,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.047823054697618814,
|
|
"grad_norm": 0.854179583596702,
|
|
"learning_rate": 3.985871696048154e-05,
|
|
"loss": 0.9864,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.0482215801534323,
|
|
"grad_norm": 1.0432520232855218,
|
|
"learning_rate": 3.9855698588088965e-05,
|
|
"loss": 0.9548,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.04862010560924579,
|
|
"grad_norm": 1.0755622132654334,
|
|
"learning_rate": 3.9852648430022754e-05,
|
|
"loss": 0.9485,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.04901863106505928,
|
|
"grad_norm": 1.2217694552157112,
|
|
"learning_rate": 3.984956649116571e-05,
|
|
"loss": 0.9855,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.04941715652087277,
|
|
"grad_norm": 1.0275276231271884,
|
|
"learning_rate": 3.984645277645149e-05,
|
|
"loss": 0.9964,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.04981568197668626,
|
|
"grad_norm": 1.1178940979524548,
|
|
"learning_rate": 3.984330729086464e-05,
|
|
"loss": 0.9497,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.05021420743249975,
|
|
"grad_norm": 0.741923762221831,
|
|
"learning_rate": 3.984013003944056e-05,
|
|
"loss": 1.0072,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.05061273288831324,
|
|
"grad_norm": 0.8682737579433879,
|
|
"learning_rate": 3.983692102726551e-05,
|
|
"loss": 1.0082,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.05101125834412673,
|
|
"grad_norm": 1.0434473812056535,
|
|
"learning_rate": 3.983368025947657e-05,
|
|
"loss": 0.9831,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.05140978379994022,
|
|
"grad_norm": 1.022692118220617,
|
|
"learning_rate": 3.983040774126169e-05,
|
|
"loss": 0.9566,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.051808309255753714,
|
|
"grad_norm": 1.2484490098325738,
|
|
"learning_rate": 3.9827103477859605e-05,
|
|
"loss": 1.0005,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.0522068347115672,
|
|
"grad_norm": 0.8271462851970588,
|
|
"learning_rate": 3.9823767474559905e-05,
|
|
"loss": 0.968,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.05260536016738069,
|
|
"grad_norm": 0.8519476486723382,
|
|
"learning_rate": 3.982039973670298e-05,
|
|
"loss": 0.9617,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.053003885623194184,
|
|
"grad_norm": 0.8333279737618872,
|
|
"learning_rate": 3.9817000269680005e-05,
|
|
"loss": 0.9757,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.05340241107900767,
|
|
"grad_norm": 0.8703944410797784,
|
|
"learning_rate": 3.981356907893298e-05,
|
|
"loss": 0.9917,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.05380093653482116,
|
|
"grad_norm": 0.9994780910035236,
|
|
"learning_rate": 3.981010616995465e-05,
|
|
"loss": 0.9603,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.054199461990634655,
|
|
"grad_norm": 1.1123731475641294,
|
|
"learning_rate": 3.980661154828857e-05,
|
|
"loss": 0.9695,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.05459798744644814,
|
|
"grad_norm": 0.9337508858933264,
|
|
"learning_rate": 3.980308521952905e-05,
|
|
"loss": 0.9786,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.05499651290226163,
|
|
"grad_norm": 0.8773514301553659,
|
|
"learning_rate": 3.979952718932116e-05,
|
|
"loss": 0.9829,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.05539503835807512,
|
|
"grad_norm": 0.8259379275752252,
|
|
"learning_rate": 3.97959374633607e-05,
|
|
"loss": 0.9731,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.055793563813888614,
|
|
"grad_norm": 0.9481177250720214,
|
|
"learning_rate": 3.979231604739423e-05,
|
|
"loss": 1.0004,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.0561920892697021,
|
|
"grad_norm": 1.0333391418969482,
|
|
"learning_rate": 3.978866294721904e-05,
|
|
"loss": 0.9685,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.05659061472551559,
|
|
"grad_norm": 0.9955889948584824,
|
|
"learning_rate": 3.9784978168683134e-05,
|
|
"loss": 0.9716,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.056989140181329084,
|
|
"grad_norm": 1.0603086583420307,
|
|
"learning_rate": 3.978126171768523e-05,
|
|
"loss": 0.9801,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.05738766563714257,
|
|
"grad_norm": 0.812587571522746,
|
|
"learning_rate": 3.977751360017474e-05,
|
|
"loss": 0.9595,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.05778619109295606,
|
|
"grad_norm": 0.7781386777987177,
|
|
"learning_rate": 3.97737338221518e-05,
|
|
"loss": 1.0095,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.058184716548769555,
|
|
"grad_norm": 0.9828802357688441,
|
|
"learning_rate": 3.976992238966719e-05,
|
|
"loss": 0.992,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.05858324200458304,
|
|
"grad_norm": 0.9416827586556631,
|
|
"learning_rate": 3.976607930882238e-05,
|
|
"loss": 0.9628,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.05898176746039653,
|
|
"grad_norm": 0.7650913970674944,
|
|
"learning_rate": 3.97622045857695e-05,
|
|
"loss": 0.9995,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.059380292916210026,
|
|
"grad_norm": 0.6668203189771907,
|
|
"learning_rate": 3.9758298226711346e-05,
|
|
"loss": 0.9709,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.059778818372023514,
|
|
"grad_norm": 0.9120833321517047,
|
|
"learning_rate": 3.975436023790135e-05,
|
|
"loss": 0.9644,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.060177343827837,
|
|
"grad_norm": 1.0907868368195024,
|
|
"learning_rate": 3.975039062564357e-05,
|
|
"loss": 0.9628,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.060575869283650496,
|
|
"grad_norm": 0.9368612099613929,
|
|
"learning_rate": 3.9746389396292705e-05,
|
|
"loss": 0.9937,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.060974394739463984,
|
|
"grad_norm": 0.9737465093992717,
|
|
"learning_rate": 3.974235655625405e-05,
|
|
"loss": 0.961,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.06137292019527747,
|
|
"grad_norm": 0.8996382068900802,
|
|
"learning_rate": 3.973829211198352e-05,
|
|
"loss": 0.9339,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.06177144565109096,
|
|
"grad_norm": 0.9165314697100433,
|
|
"learning_rate": 3.973419606998761e-05,
|
|
"loss": 0.9568,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.062169971106904455,
|
|
"grad_norm": 0.9274654639084001,
|
|
"learning_rate": 3.9730068436823395e-05,
|
|
"loss": 0.9389,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.06256849656271794,
|
|
"grad_norm": 0.8441046935557636,
|
|
"learning_rate": 3.9725909219098546e-05,
|
|
"loss": 0.9388,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.06296702201853144,
|
|
"grad_norm": 0.9902084616052694,
|
|
"learning_rate": 3.972171842347127e-05,
|
|
"loss": 0.9596,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.06336554747434492,
|
|
"grad_norm": 1.1115069818338272,
|
|
"learning_rate": 3.9717496056650325e-05,
|
|
"loss": 0.9421,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.06376407293015841,
|
|
"grad_norm": 0.9808461355374265,
|
|
"learning_rate": 3.9713242125395035e-05,
|
|
"loss": 0.9549,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.06416259838597191,
|
|
"grad_norm": 0.6838984370781541,
|
|
"learning_rate": 3.970895663651523e-05,
|
|
"loss": 0.9577,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.06456112384178539,
|
|
"grad_norm": 0.5849603441312805,
|
|
"learning_rate": 3.970463959687127e-05,
|
|
"loss": 0.9391,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.06495964929759888,
|
|
"grad_norm": 0.8012305866704266,
|
|
"learning_rate": 3.9700291013374005e-05,
|
|
"loss": 0.9749,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.06535817475341238,
|
|
"grad_norm": 0.9116141961043895,
|
|
"learning_rate": 3.969591089298481e-05,
|
|
"loss": 0.9734,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.06575670020922586,
|
|
"grad_norm": 0.7666536547751186,
|
|
"learning_rate": 3.9691499242715524e-05,
|
|
"loss": 0.9679,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.06615522566503935,
|
|
"grad_norm": 0.5587510714841003,
|
|
"learning_rate": 3.968705606962847e-05,
|
|
"loss": 0.9581,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.06655375112085285,
|
|
"grad_norm": 0.5276592494284221,
|
|
"learning_rate": 3.9682581380836415e-05,
|
|
"loss": 0.9171,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.06695227657666633,
|
|
"grad_norm": 0.7394645356756339,
|
|
"learning_rate": 3.967807518350261e-05,
|
|
"loss": 0.9612,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.06735080203247983,
|
|
"grad_norm": 1.1007193079182445,
|
|
"learning_rate": 3.967353748484071e-05,
|
|
"loss": 0.9118,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.06774932748829332,
|
|
"grad_norm": 1.0581797805010837,
|
|
"learning_rate": 3.966896829211483e-05,
|
|
"loss": 0.9641,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.0681478529441068,
|
|
"grad_norm": 0.8757602622657974,
|
|
"learning_rate": 3.966436761263949e-05,
|
|
"loss": 0.9566,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.0685463783999203,
|
|
"grad_norm": 0.8687270000650961,
|
|
"learning_rate": 3.96597354537796e-05,
|
|
"loss": 0.9701,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.06894490385573379,
|
|
"grad_norm": 1.0166656418615307,
|
|
"learning_rate": 3.965507182295049e-05,
|
|
"loss": 0.9564,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.06934342931154727,
|
|
"grad_norm": 0.8215033487256318,
|
|
"learning_rate": 3.965037672761785e-05,
|
|
"loss": 1.0189,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.06974195476736077,
|
|
"grad_norm": 0.7260355443552792,
|
|
"learning_rate": 3.964565017529775e-05,
|
|
"loss": 0.9431,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.07014048022317426,
|
|
"grad_norm": 0.7653437077317252,
|
|
"learning_rate": 3.9640892173556624e-05,
|
|
"loss": 0.947,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.07053900567898774,
|
|
"grad_norm": 0.9116401355112523,
|
|
"learning_rate": 3.963610273001122e-05,
|
|
"loss": 0.9472,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.07093753113480124,
|
|
"grad_norm": 0.9609189669126867,
|
|
"learning_rate": 3.963128185232866e-05,
|
|
"loss": 0.9427,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.07133605659061472,
|
|
"grad_norm": 0.8565841157727021,
|
|
"learning_rate": 3.9626429548226364e-05,
|
|
"loss": 0.9477,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.07173458204642821,
|
|
"grad_norm": 0.7814839364600451,
|
|
"learning_rate": 3.962154582547205e-05,
|
|
"loss": 0.9094,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.07213310750224171,
|
|
"grad_norm": 0.7824911161278741,
|
|
"learning_rate": 3.961663069188377e-05,
|
|
"loss": 0.9647,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.07253163295805519,
|
|
"grad_norm": 0.8488502117489565,
|
|
"learning_rate": 3.9611684155329825e-05,
|
|
"loss": 0.9634,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.07293015841386868,
|
|
"grad_norm": 0.8663407155900105,
|
|
"learning_rate": 3.9606706223728796e-05,
|
|
"loss": 0.9522,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.07332868386968218,
|
|
"grad_norm": 0.8427930838971712,
|
|
"learning_rate": 3.960169690504952e-05,
|
|
"loss": 0.957,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.07372720932549566,
|
|
"grad_norm": 0.8728940813219989,
|
|
"learning_rate": 3.9596656207311096e-05,
|
|
"loss": 0.9103,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.07412573478130915,
|
|
"grad_norm": 0.8964681349142457,
|
|
"learning_rate": 3.9591584138582835e-05,
|
|
"loss": 0.9783,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.07452426023712265,
|
|
"grad_norm": 0.747475640936641,
|
|
"learning_rate": 3.958648070698428e-05,
|
|
"loss": 0.9343,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.07492278569293613,
|
|
"grad_norm": 0.6081767246649388,
|
|
"learning_rate": 3.9581345920685176e-05,
|
|
"loss": 0.9426,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.07532131114874963,
|
|
"grad_norm": 0.646327313636509,
|
|
"learning_rate": 3.957617978790546e-05,
|
|
"loss": 0.936,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.07571983660456312,
|
|
"grad_norm": 0.5762067425821266,
|
|
"learning_rate": 3.9570982316915245e-05,
|
|
"loss": 0.9869,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.0761183620603766,
|
|
"grad_norm": 0.5277633100224635,
|
|
"learning_rate": 3.956575351603484e-05,
|
|
"loss": 0.9247,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.0765168875161901,
|
|
"grad_norm": 0.6079283681455546,
|
|
"learning_rate": 3.9560493393634665e-05,
|
|
"loss": 0.9003,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.07691541297200359,
|
|
"grad_norm": 0.6485268072649816,
|
|
"learning_rate": 3.955520195813531e-05,
|
|
"loss": 0.9428,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.07731393842781707,
|
|
"grad_norm": 0.6753541169437033,
|
|
"learning_rate": 3.954987921800749e-05,
|
|
"loss": 0.9546,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.07771246388363057,
|
|
"grad_norm": 0.6320121035158947,
|
|
"learning_rate": 3.954452518177201e-05,
|
|
"loss": 0.9425,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.07811098933944406,
|
|
"grad_norm": 0.8024382967580528,
|
|
"learning_rate": 3.953913985799982e-05,
|
|
"loss": 0.9575,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.07850951479525754,
|
|
"grad_norm": 0.6451828329766384,
|
|
"learning_rate": 3.95337232553119e-05,
|
|
"loss": 0.9618,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.07890804025107104,
|
|
"grad_norm": 0.5637480570882453,
|
|
"learning_rate": 3.952827538237934e-05,
|
|
"loss": 0.9436,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.07930656570688453,
|
|
"grad_norm": 0.6287403860445728,
|
|
"learning_rate": 3.952279624792329e-05,
|
|
"loss": 0.9585,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.07970509116269801,
|
|
"grad_norm": 0.6133071011985074,
|
|
"learning_rate": 3.9517285860714915e-05,
|
|
"loss": 0.9447,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.08010361661851151,
|
|
"grad_norm": 0.5782665343325509,
|
|
"learning_rate": 3.951174422957545e-05,
|
|
"loss": 0.9381,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.080502142074325,
|
|
"grad_norm": 0.5255985375741193,
|
|
"learning_rate": 3.950617136337611e-05,
|
|
"loss": 0.893,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.08090066753013848,
|
|
"grad_norm": 0.5926087052436324,
|
|
"learning_rate": 3.950056727103813e-05,
|
|
"loss": 0.9226,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.08129919298595198,
|
|
"grad_norm": 0.6283429524618049,
|
|
"learning_rate": 3.949493196153274e-05,
|
|
"loss": 0.9381,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.08169771844176547,
|
|
"grad_norm": 0.6457268317630597,
|
|
"learning_rate": 3.948926544388112e-05,
|
|
"loss": 0.9097,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.08209624389757895,
|
|
"grad_norm": 0.8396169584539872,
|
|
"learning_rate": 3.948356772715443e-05,
|
|
"loss": 0.9303,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.08249476935339245,
|
|
"grad_norm": 0.9970461466822023,
|
|
"learning_rate": 3.9477838820473776e-05,
|
|
"loss": 0.9218,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.08289329480920594,
|
|
"grad_norm": 1.1370242066432408,
|
|
"learning_rate": 3.9472078733010174e-05,
|
|
"loss": 0.9393,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.08329182026501943,
|
|
"grad_norm": 0.8481740560416752,
|
|
"learning_rate": 3.946628747398457e-05,
|
|
"loss": 0.9539,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.08369034572083292,
|
|
"grad_norm": 0.7749044455116462,
|
|
"learning_rate": 3.94604650526678e-05,
|
|
"loss": 0.9064,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.0840888711766464,
|
|
"grad_norm": 0.8242769108366514,
|
|
"learning_rate": 3.9454611478380604e-05,
|
|
"loss": 0.9578,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.0844873966324599,
|
|
"grad_norm": 0.7060014980899263,
|
|
"learning_rate": 3.944872676049358e-05,
|
|
"loss": 0.9586,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.08488592208827339,
|
|
"grad_norm": 0.8645214673367116,
|
|
"learning_rate": 3.944281090842718e-05,
|
|
"loss": 0.919,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.08528444754408687,
|
|
"grad_norm": 1.0934973623844684,
|
|
"learning_rate": 3.943686393165171e-05,
|
|
"loss": 0.955,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.08568297299990037,
|
|
"grad_norm": 0.8673963340448777,
|
|
"learning_rate": 3.943088583968726e-05,
|
|
"loss": 0.9304,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.08608149845571386,
|
|
"grad_norm": 0.835352668198479,
|
|
"learning_rate": 3.9424876642103805e-05,
|
|
"loss": 0.9615,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.08648002391152734,
|
|
"grad_norm": 0.8611507271565368,
|
|
"learning_rate": 3.9418836348521045e-05,
|
|
"loss": 0.929,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.08687854936734084,
|
|
"grad_norm": 0.8251142229076397,
|
|
"learning_rate": 3.941276496860849e-05,
|
|
"loss": 0.9642,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.08727707482315433,
|
|
"grad_norm": 0.7930096914994095,
|
|
"learning_rate": 3.9406662512085416e-05,
|
|
"loss": 0.9622,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.08767560027896781,
|
|
"grad_norm": 0.6629634789706741,
|
|
"learning_rate": 3.940052898872084e-05,
|
|
"loss": 0.9083,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.08807412573478131,
|
|
"grad_norm": 0.6439473882747895,
|
|
"learning_rate": 3.93943644083335e-05,
|
|
"loss": 0.9155,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.0884726511905948,
|
|
"grad_norm": 0.7838839076395734,
|
|
"learning_rate": 3.9388168780791883e-05,
|
|
"loss": 0.9127,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.08887117664640828,
|
|
"grad_norm": 0.7675321153839495,
|
|
"learning_rate": 3.938194211601416e-05,
|
|
"loss": 0.9313,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.08926970210222178,
|
|
"grad_norm": 0.670858178864275,
|
|
"learning_rate": 3.937568442396817e-05,
|
|
"loss": 0.9215,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.08966822755803527,
|
|
"grad_norm": 0.5430100456071535,
|
|
"learning_rate": 3.936939571467145e-05,
|
|
"loss": 0.9215,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.09006675301384875,
|
|
"grad_norm": 0.645122412385762,
|
|
"learning_rate": 3.9363075998191175e-05,
|
|
"loss": 0.9518,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.09046527846966225,
|
|
"grad_norm": 0.7124302784985599,
|
|
"learning_rate": 3.935672528464416e-05,
|
|
"loss": 0.9472,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.09086380392547574,
|
|
"grad_norm": 0.6944932728108557,
|
|
"learning_rate": 3.935034358419684e-05,
|
|
"loss": 0.9043,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.09126232938128923,
|
|
"grad_norm": 0.7428731739366404,
|
|
"learning_rate": 3.934393090706527e-05,
|
|
"loss": 0.9276,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.09166085483710272,
|
|
"grad_norm": 0.7237371542570604,
|
|
"learning_rate": 3.9337487263515065e-05,
|
|
"loss": 0.966,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.09205938029291622,
|
|
"grad_norm": 0.7584658608788947,
|
|
"learning_rate": 3.9331012663861435e-05,
|
|
"loss": 0.9195,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.0924579057487297,
|
|
"grad_norm": 0.8151922759638645,
|
|
"learning_rate": 3.932450711846914e-05,
|
|
"loss": 0.9352,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.09285643120454319,
|
|
"grad_norm": 0.7799720068156271,
|
|
"learning_rate": 3.931797063775246e-05,
|
|
"loss": 0.867,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.09325495666035669,
|
|
"grad_norm": 0.7195572843892059,
|
|
"learning_rate": 3.931140323217524e-05,
|
|
"loss": 0.9485,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.09365348211617017,
|
|
"grad_norm": 0.5676394070871306,
|
|
"learning_rate": 3.9304804912250785e-05,
|
|
"loss": 0.9479,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.09405200757198366,
|
|
"grad_norm": 0.603554245394414,
|
|
"learning_rate": 3.9298175688541916e-05,
|
|
"loss": 0.8831,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.09445053302779716,
|
|
"grad_norm": 0.7416220601956737,
|
|
"learning_rate": 3.9291515571660926e-05,
|
|
"loss": 0.9537,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.09484905848361064,
|
|
"grad_norm": 0.7400965861280613,
|
|
"learning_rate": 3.928482457226954e-05,
|
|
"loss": 0.9087,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.09524758393942413,
|
|
"grad_norm": 0.8210302591504622,
|
|
"learning_rate": 3.927810270107894e-05,
|
|
"loss": 0.8909,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.09564610939523763,
|
|
"grad_norm": 0.7137333890568919,
|
|
"learning_rate": 3.9271349968849735e-05,
|
|
"loss": 0.9301,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.09604463485105111,
|
|
"grad_norm": 0.5314296904513427,
|
|
"learning_rate": 3.9264566386391925e-05,
|
|
"loss": 0.9233,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.0964431603068646,
|
|
"grad_norm": 0.6166230859092278,
|
|
"learning_rate": 3.925775196456488e-05,
|
|
"loss": 0.8958,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.09684168576267808,
|
|
"grad_norm": 0.6958069670048053,
|
|
"learning_rate": 3.925090671427739e-05,
|
|
"loss": 0.9278,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.09724021121849158,
|
|
"grad_norm": 0.6889489906309647,
|
|
"learning_rate": 3.9244030646487524e-05,
|
|
"loss": 0.9453,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.09763873667430507,
|
|
"grad_norm": 0.6113796976521826,
|
|
"learning_rate": 3.923712377220275e-05,
|
|
"loss": 0.9042,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.09803726213011855,
|
|
"grad_norm": 0.5576650794524141,
|
|
"learning_rate": 3.9230186102479824e-05,
|
|
"loss": 0.9457,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.09843578758593205,
|
|
"grad_norm": 0.5050600559673174,
|
|
"learning_rate": 3.922321764842479e-05,
|
|
"loss": 0.9128,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.09883431304174554,
|
|
"grad_norm": 0.5792206556379802,
|
|
"learning_rate": 3.9216218421193e-05,
|
|
"loss": 0.9346,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.09923283849755903,
|
|
"grad_norm": 0.7117260079905121,
|
|
"learning_rate": 3.9209188431989044e-05,
|
|
"loss": 0.9242,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.09963136395337252,
|
|
"grad_norm": 0.5411445590412157,
|
|
"learning_rate": 3.920212769206676e-05,
|
|
"loss": 0.8808,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.10002988940918602,
|
|
"grad_norm": 0.6383206470777513,
|
|
"learning_rate": 3.919503621272924e-05,
|
|
"loss": 0.9014,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.1004284148649995,
|
|
"grad_norm": 0.5870726238645826,
|
|
"learning_rate": 3.918791400532874e-05,
|
|
"loss": 0.8833,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.10082694032081299,
|
|
"grad_norm": 0.5677734635394229,
|
|
"learning_rate": 3.918076108126675e-05,
|
|
"loss": 0.9128,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.10122546577662649,
|
|
"grad_norm": 0.6397706154970396,
|
|
"learning_rate": 3.91735774519939e-05,
|
|
"loss": 0.8892,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.10162399123243997,
|
|
"grad_norm": 0.6025324225700743,
|
|
"learning_rate": 3.916636312900999e-05,
|
|
"loss": 0.8924,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.10202251668825346,
|
|
"grad_norm": 0.5993884554898958,
|
|
"learning_rate": 3.9159118123863964e-05,
|
|
"loss": 0.9249,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.10242104214406696,
|
|
"grad_norm": 0.5139396732603375,
|
|
"learning_rate": 3.915184244815385e-05,
|
|
"loss": 0.8977,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.10281956759988044,
|
|
"grad_norm": 0.582154606226688,
|
|
"learning_rate": 3.9144536113526806e-05,
|
|
"loss": 0.9064,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.10321809305569393,
|
|
"grad_norm": 0.5737111995658692,
|
|
"learning_rate": 3.9137199131679064e-05,
|
|
"loss": 0.9003,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.10361661851150743,
|
|
"grad_norm": 0.5501293796446101,
|
|
"learning_rate": 3.912983151435591e-05,
|
|
"loss": 0.9053,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.10401514396732091,
|
|
"grad_norm": 0.5616191359055138,
|
|
"learning_rate": 3.912243327335167e-05,
|
|
"loss": 0.9059,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.1044136694231344,
|
|
"grad_norm": 0.5779090114992178,
|
|
"learning_rate": 3.91150044205097e-05,
|
|
"loss": 0.9215,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.1048121948789479,
|
|
"grad_norm": 0.5621858273638006,
|
|
"learning_rate": 3.910754496772236e-05,
|
|
"loss": 0.9231,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.10521072033476138,
|
|
"grad_norm": 0.5187195624713219,
|
|
"learning_rate": 3.9100054926931e-05,
|
|
"loss": 0.9077,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.10560924579057487,
|
|
"grad_norm": 0.5484074323672972,
|
|
"learning_rate": 3.909253431012592e-05,
|
|
"loss": 0.8943,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.10600777124638837,
|
|
"grad_norm": 0.5860726206207597,
|
|
"learning_rate": 3.9084983129346386e-05,
|
|
"loss": 0.9215,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.10640629670220185,
|
|
"grad_norm": 0.5785145319929371,
|
|
"learning_rate": 3.907740139668058e-05,
|
|
"loss": 0.9079,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.10680482215801534,
|
|
"grad_norm": 0.6293154987830761,
|
|
"learning_rate": 3.9069789124265595e-05,
|
|
"loss": 0.9199,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.10720334761382884,
|
|
"grad_norm": 0.6138996610001156,
|
|
"learning_rate": 3.906214632428742e-05,
|
|
"loss": 0.9307,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.10760187306964232,
|
|
"grad_norm": 0.5574707399267468,
|
|
"learning_rate": 3.90544730089809e-05,
|
|
"loss": 0.9235,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.10800039852545582,
|
|
"grad_norm": 0.49410092240642955,
|
|
"learning_rate": 3.904676919062973e-05,
|
|
"loss": 0.8892,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.10839892398126931,
|
|
"grad_norm": 0.551637520171974,
|
|
"learning_rate": 3.903903488156646e-05,
|
|
"loss": 0.9133,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.10879744943708279,
|
|
"grad_norm": 0.5731759290280689,
|
|
"learning_rate": 3.903127009417244e-05,
|
|
"loss": 0.8961,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.10919597489289629,
|
|
"grad_norm": 0.5762364732869328,
|
|
"learning_rate": 3.9023474840877775e-05,
|
|
"loss": 0.8803,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.10959450034870978,
|
|
"grad_norm": 0.6947758285401612,
|
|
"learning_rate": 3.901564913416139e-05,
|
|
"loss": 0.8906,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.10999302580452326,
|
|
"grad_norm": 0.7885605570685301,
|
|
"learning_rate": 3.9007792986550937e-05,
|
|
"loss": 0.9016,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.11039155126033676,
|
|
"grad_norm": 0.779205530434434,
|
|
"learning_rate": 3.8999906410622805e-05,
|
|
"loss": 0.909,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.11079007671615024,
|
|
"grad_norm": 0.9502303024617071,
|
|
"learning_rate": 3.899198941900209e-05,
|
|
"loss": 0.8972,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.11118860217196373,
|
|
"grad_norm": 0.7020948558600761,
|
|
"learning_rate": 3.898404202436258e-05,
|
|
"loss": 0.8992,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.11158712762777723,
|
|
"grad_norm": 0.6253004452655916,
|
|
"learning_rate": 3.8976064239426727e-05,
|
|
"loss": 0.8983,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.11198565308359071,
|
|
"grad_norm": 0.48947446603739525,
|
|
"learning_rate": 3.896805607696565e-05,
|
|
"loss": 0.9092,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.1123841785394042,
|
|
"grad_norm": 0.5305885289397677,
|
|
"learning_rate": 3.896001754979908e-05,
|
|
"loss": 0.8828,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.1127827039952177,
|
|
"grad_norm": 0.6511594701603155,
|
|
"learning_rate": 3.8951948670795356e-05,
|
|
"loss": 0.8949,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.11318122945103118,
|
|
"grad_norm": 0.7838264076235747,
|
|
"learning_rate": 3.8943849452871416e-05,
|
|
"loss": 0.9061,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.11357975490684467,
|
|
"grad_norm": 0.8176144028366352,
|
|
"learning_rate": 3.8935719908992776e-05,
|
|
"loss": 0.9139,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.11397828036265817,
|
|
"grad_norm": 0.7483860858548197,
|
|
"learning_rate": 3.892756005217347e-05,
|
|
"loss": 0.9092,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.11437680581847165,
|
|
"grad_norm": 0.6145924322571729,
|
|
"learning_rate": 3.891936989547608e-05,
|
|
"loss": 0.9052,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.11477533127428514,
|
|
"grad_norm": 0.5793992708257767,
|
|
"learning_rate": 3.891114945201168e-05,
|
|
"loss": 0.9041,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.11517385673009864,
|
|
"grad_norm": 0.6386399436855802,
|
|
"learning_rate": 3.890289873493984e-05,
|
|
"loss": 0.8765,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.11557238218591212,
|
|
"grad_norm": 0.7545452332949172,
|
|
"learning_rate": 3.889461775746858e-05,
|
|
"loss": 0.9407,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.11597090764172562,
|
|
"grad_norm": 0.643068181670375,
|
|
"learning_rate": 3.888630653285437e-05,
|
|
"loss": 0.9044,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.11636943309753911,
|
|
"grad_norm": 0.4963770968380342,
|
|
"learning_rate": 3.887796507440211e-05,
|
|
"loss": 0.9244,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.11676795855335259,
|
|
"grad_norm": 0.5330885480112182,
|
|
"learning_rate": 3.8869593395465066e-05,
|
|
"loss": 0.9007,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.11716648400916609,
|
|
"grad_norm": 0.6867642996793515,
|
|
"learning_rate": 3.8861191509444926e-05,
|
|
"loss": 0.8923,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.11756500946497958,
|
|
"grad_norm": 0.5931835622625073,
|
|
"learning_rate": 3.88527594297917e-05,
|
|
"loss": 0.9172,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.11796353492079306,
|
|
"grad_norm": 0.6693705563895682,
|
|
"learning_rate": 3.884429717000376e-05,
|
|
"loss": 0.8941,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.11836206037660656,
|
|
"grad_norm": 0.6616211136884201,
|
|
"learning_rate": 3.883580474362777e-05,
|
|
"loss": 0.9208,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.11876058583242005,
|
|
"grad_norm": 0.5241813662858397,
|
|
"learning_rate": 3.88272821642587e-05,
|
|
"loss": 0.9295,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.11915911128823353,
|
|
"grad_norm": 0.47998991090285037,
|
|
"learning_rate": 3.8818729445539765e-05,
|
|
"loss": 0.9134,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.11955763674404703,
|
|
"grad_norm": 0.5575410784453981,
|
|
"learning_rate": 3.881014660116246e-05,
|
|
"loss": 0.9264,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.11995616219986052,
|
|
"grad_norm": 0.5762249128335137,
|
|
"learning_rate": 3.880153364486649e-05,
|
|
"loss": 0.8924,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.120354687655674,
|
|
"grad_norm": 0.9257335770621549,
|
|
"learning_rate": 3.8792890590439764e-05,
|
|
"loss": 0.8861,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.1207532131114875,
|
|
"grad_norm": 0.5676730409091856,
|
|
"learning_rate": 3.878421745171839e-05,
|
|
"loss": 0.9112,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.12115173856730099,
|
|
"grad_norm": 0.4637670476081397,
|
|
"learning_rate": 3.87755142425866e-05,
|
|
"loss": 0.8917,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.12155026402311447,
|
|
"grad_norm": 0.5310661309184922,
|
|
"learning_rate": 3.8766780976976795e-05,
|
|
"loss": 0.9182,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.12194878947892797,
|
|
"grad_norm": 0.5584733508565086,
|
|
"learning_rate": 3.8758017668869484e-05,
|
|
"loss": 0.9396,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.12234731493474146,
|
|
"grad_norm": 0.5545890950572487,
|
|
"learning_rate": 3.8749224332293265e-05,
|
|
"loss": 0.9016,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.12274584039055494,
|
|
"grad_norm": 0.5692405766886073,
|
|
"learning_rate": 3.874040098132481e-05,
|
|
"loss": 0.8543,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.12314436584636844,
|
|
"grad_norm": 0.5829038395471384,
|
|
"learning_rate": 3.873154763008884e-05,
|
|
"loss": 0.8766,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.12354289130218192,
|
|
"grad_norm": 0.6399720498446062,
|
|
"learning_rate": 3.872266429275809e-05,
|
|
"loss": 0.8924,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.12394141675799542,
|
|
"grad_norm": 0.5563668304631704,
|
|
"learning_rate": 3.871375098355331e-05,
|
|
"loss": 0.9351,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.12433994221380891,
|
|
"grad_norm": 0.4891838671794899,
|
|
"learning_rate": 3.8704807716743235e-05,
|
|
"loss": 0.9084,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.12473846766962239,
|
|
"grad_norm": 0.5390514488310643,
|
|
"learning_rate": 3.869583450664454e-05,
|
|
"loss": 0.9006,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.12513699312543589,
|
|
"grad_norm": 0.6535963479715494,
|
|
"learning_rate": 3.868683136762185e-05,
|
|
"loss": 0.8946,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.12553551858124937,
|
|
"grad_norm": 0.6360077741778746,
|
|
"learning_rate": 3.867779831408768e-05,
|
|
"loss": 0.8997,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.12593404403706288,
|
|
"grad_norm": 0.47594974316839744,
|
|
"learning_rate": 3.8668735360502474e-05,
|
|
"loss": 0.9135,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.12633256949287636,
|
|
"grad_norm": 0.554635644525251,
|
|
"learning_rate": 3.865964252137449e-05,
|
|
"loss": 0.9056,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.12673109494868984,
|
|
"grad_norm": 0.6542560775862073,
|
|
"learning_rate": 3.8650519811259856e-05,
|
|
"loss": 0.8837,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.12712962040450335,
|
|
"grad_norm": 0.5504279116926618,
|
|
"learning_rate": 3.864136724476252e-05,
|
|
"loss": 0.909,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.12752814586031683,
|
|
"grad_norm": 0.4207363922400064,
|
|
"learning_rate": 3.863218483653423e-05,
|
|
"loss": 0.9199,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.1279266713161303,
|
|
"grad_norm": 0.5480250503031011,
|
|
"learning_rate": 3.862297260127447e-05,
|
|
"loss": 0.9115,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.12832519677194382,
|
|
"grad_norm": 0.7116612376007252,
|
|
"learning_rate": 3.8613730553730525e-05,
|
|
"loss": 0.902,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.1287237222277573,
|
|
"grad_norm": 0.7034441679085705,
|
|
"learning_rate": 3.8604458708697354e-05,
|
|
"loss": 0.93,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.12912224768357078,
|
|
"grad_norm": 0.6464461922880574,
|
|
"learning_rate": 3.859515708101766e-05,
|
|
"loss": 0.9027,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.1295207731393843,
|
|
"grad_norm": 0.5724183071806952,
|
|
"learning_rate": 3.858582568558179e-05,
|
|
"loss": 0.9152,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.12991929859519777,
|
|
"grad_norm": 0.5434975703367534,
|
|
"learning_rate": 3.857646453732776e-05,
|
|
"loss": 0.8873,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.13031782405101125,
|
|
"grad_norm": 0.5134121010042222,
|
|
"learning_rate": 3.856707365124122e-05,
|
|
"loss": 0.8728,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.13071634950682476,
|
|
"grad_norm": 0.5097236839503941,
|
|
"learning_rate": 3.85576530423554e-05,
|
|
"loss": 0.911,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.13111487496263824,
|
|
"grad_norm": 0.5227325664183777,
|
|
"learning_rate": 3.854820272575115e-05,
|
|
"loss": 0.8658,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.13151340041845172,
|
|
"grad_norm": 0.6322853032653781,
|
|
"learning_rate": 3.853872271655685e-05,
|
|
"loss": 0.891,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.13191192587426523,
|
|
"grad_norm": 0.5184506986493536,
|
|
"learning_rate": 3.852921302994841e-05,
|
|
"loss": 0.8612,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.1323104513300787,
|
|
"grad_norm": 0.5046807022502423,
|
|
"learning_rate": 3.8519673681149265e-05,
|
|
"loss": 0.8994,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.1327089767858922,
|
|
"grad_norm": 0.5061850051002039,
|
|
"learning_rate": 3.851010468543033e-05,
|
|
"loss": 0.8849,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.1331075022417057,
|
|
"grad_norm": 0.4935717896499033,
|
|
"learning_rate": 3.850050605810997e-05,
|
|
"loss": 0.9285,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.13350602769751918,
|
|
"grad_norm": 0.4947315091214366,
|
|
"learning_rate": 3.8490877814553996e-05,
|
|
"loss": 0.9004,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.13390455315333266,
|
|
"grad_norm": 0.46140205389577676,
|
|
"learning_rate": 3.848121997017563e-05,
|
|
"loss": 0.9065,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.13430307860914617,
|
|
"grad_norm": 0.47248289695698514,
|
|
"learning_rate": 3.847153254043547e-05,
|
|
"loss": 0.8805,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.13470160406495965,
|
|
"grad_norm": 0.45224697013215626,
|
|
"learning_rate": 3.846181554084147e-05,
|
|
"loss": 0.896,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.13510012952077313,
|
|
"grad_norm": 0.527417114425614,
|
|
"learning_rate": 3.8452068986948956e-05,
|
|
"loss": 0.9383,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.13549865497658664,
|
|
"grad_norm": 0.5092127958405034,
|
|
"learning_rate": 3.844229289436053e-05,
|
|
"loss": 0.8961,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.13589718043240012,
|
|
"grad_norm": 0.4746200986505316,
|
|
"learning_rate": 3.8432487278726084e-05,
|
|
"loss": 0.9281,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.1362957058882136,
|
|
"grad_norm": 0.484617132707988,
|
|
"learning_rate": 3.842265215574279e-05,
|
|
"loss": 0.8799,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.1366942313440271,
|
|
"grad_norm": 0.472139637172473,
|
|
"learning_rate": 3.8412787541155035e-05,
|
|
"loss": 0.8571,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.1370927567998406,
|
|
"grad_norm": 0.4750954980383929,
|
|
"learning_rate": 3.840289345075444e-05,
|
|
"loss": 0.8997,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.13749128225565407,
|
|
"grad_norm": 0.5058566298011136,
|
|
"learning_rate": 3.839296990037979e-05,
|
|
"loss": 0.8947,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.13788980771146758,
|
|
"grad_norm": 0.5034036144166951,
|
|
"learning_rate": 3.838301690591704e-05,
|
|
"loss": 0.856,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.13828833316728106,
|
|
"grad_norm": 0.5109042435371637,
|
|
"learning_rate": 3.8373034483299286e-05,
|
|
"loss": 0.8676,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.13868685862309454,
|
|
"grad_norm": 0.5076861609812875,
|
|
"learning_rate": 3.836302264850673e-05,
|
|
"loss": 0.8899,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.13908538407890805,
|
|
"grad_norm": 0.48688791345770777,
|
|
"learning_rate": 3.835298141756664e-05,
|
|
"loss": 0.8952,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.13948390953472153,
|
|
"grad_norm": 0.4294678692671596,
|
|
"learning_rate": 3.8342910806553374e-05,
|
|
"loss": 0.896,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.13988243499053502,
|
|
"grad_norm": 0.4759618640018106,
|
|
"learning_rate": 3.83328108315883e-05,
|
|
"loss": 0.8925,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.14028096044634852,
|
|
"grad_norm": 0.498083239156812,
|
|
"learning_rate": 3.8322681508839796e-05,
|
|
"loss": 0.897,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.140679485902162,
|
|
"grad_norm": 0.47774282716676997,
|
|
"learning_rate": 3.8312522854523236e-05,
|
|
"loss": 0.853,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.14107801135797549,
|
|
"grad_norm": 0.5425614790073936,
|
|
"learning_rate": 3.830233488490092e-05,
|
|
"loss": 0.9072,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.141476536813789,
|
|
"grad_norm": 0.607352655774501,
|
|
"learning_rate": 3.8292117616282116e-05,
|
|
"loss": 0.8849,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.14187506226960248,
|
|
"grad_norm": 0.622366562638722,
|
|
"learning_rate": 3.828187106502295e-05,
|
|
"loss": 0.8743,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.14227358772541596,
|
|
"grad_norm": 0.6880401152515128,
|
|
"learning_rate": 3.827159524752646e-05,
|
|
"loss": 0.854,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.14267211318122944,
|
|
"grad_norm": 0.6320544909726663,
|
|
"learning_rate": 3.8261290180242524e-05,
|
|
"loss": 0.8823,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.14307063863704295,
|
|
"grad_norm": 0.6117634467858145,
|
|
"learning_rate": 3.825095587966784e-05,
|
|
"loss": 0.8821,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.14346916409285643,
|
|
"grad_norm": 0.5586681204591263,
|
|
"learning_rate": 3.82405923623459e-05,
|
|
"loss": 0.8851,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.1438676895486699,
|
|
"grad_norm": 0.568103604064326,
|
|
"learning_rate": 3.823019964486698e-05,
|
|
"loss": 0.8963,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.14426621500448342,
|
|
"grad_norm": 0.5481484665397642,
|
|
"learning_rate": 3.8219777743868095e-05,
|
|
"loss": 0.8847,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.1446647404602969,
|
|
"grad_norm": 0.5839213790650319,
|
|
"learning_rate": 3.820932667603297e-05,
|
|
"loss": 0.8858,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.14506326591611038,
|
|
"grad_norm": 0.6803626614692434,
|
|
"learning_rate": 3.819884645809203e-05,
|
|
"loss": 0.9316,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.1454617913719239,
|
|
"grad_norm": 0.5826226983177064,
|
|
"learning_rate": 3.8188337106822364e-05,
|
|
"loss": 0.8926,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.14586031682773737,
|
|
"grad_norm": 0.4587053421690505,
|
|
"learning_rate": 3.8177798639047693e-05,
|
|
"loss": 0.9015,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.14625884228355085,
|
|
"grad_norm": 0.4979532996043012,
|
|
"learning_rate": 3.8167231071638355e-05,
|
|
"loss": 0.9084,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.14665736773936436,
|
|
"grad_norm": 0.6060462788501415,
|
|
"learning_rate": 3.815663442151127e-05,
|
|
"loss": 0.8913,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.14705589319517784,
|
|
"grad_norm": 0.5719962639011669,
|
|
"learning_rate": 3.8146008705629916e-05,
|
|
"loss": 0.9119,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.14745441865099132,
|
|
"grad_norm": 0.49076638405233397,
|
|
"learning_rate": 3.813535394100429e-05,
|
|
"loss": 0.8802,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.14785294410680483,
|
|
"grad_norm": 0.49594758931441285,
|
|
"learning_rate": 3.81246701446909e-05,
|
|
"loss": 0.8639,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.1482514695626183,
|
|
"grad_norm": 0.5940377132680764,
|
|
"learning_rate": 3.8113957333792744e-05,
|
|
"loss": 0.87,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.1486499950184318,
|
|
"grad_norm": 0.5596407953869648,
|
|
"learning_rate": 3.810321552545924e-05,
|
|
"loss": 0.8875,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.1490485204742453,
|
|
"grad_norm": 0.5587229850427988,
|
|
"learning_rate": 3.8092444736886235e-05,
|
|
"loss": 0.8823,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.14944704593005878,
|
|
"grad_norm": 0.6185912922060778,
|
|
"learning_rate": 3.808164498531598e-05,
|
|
"loss": 0.8736,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.14984557138587226,
|
|
"grad_norm": 0.5707944153693156,
|
|
"learning_rate": 3.8070816288037076e-05,
|
|
"loss": 0.9053,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.15024409684168577,
|
|
"grad_norm": 0.5131528156556673,
|
|
"learning_rate": 3.805995866238446e-05,
|
|
"loss": 0.9038,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.15064262229749925,
|
|
"grad_norm": 0.5289298616408312,
|
|
"learning_rate": 3.804907212573941e-05,
|
|
"loss": 0.9067,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.15104114775331273,
|
|
"grad_norm": 0.5460088042514601,
|
|
"learning_rate": 3.803815669552944e-05,
|
|
"loss": 0.8742,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.15143967320912624,
|
|
"grad_norm": 0.5901247804029622,
|
|
"learning_rate": 3.802721238922835e-05,
|
|
"loss": 0.8788,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.15183819866493972,
|
|
"grad_norm": 0.43400747036846915,
|
|
"learning_rate": 3.801623922435615e-05,
|
|
"loss": 0.8676,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.1522367241207532,
|
|
"grad_norm": 0.580607227815199,
|
|
"learning_rate": 3.800523721847906e-05,
|
|
"loss": 0.9247,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.1526352495765667,
|
|
"grad_norm": 0.553191736940903,
|
|
"learning_rate": 3.7994206389209457e-05,
|
|
"loss": 0.8516,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.1530337750323802,
|
|
"grad_norm": 0.5178209878197958,
|
|
"learning_rate": 3.7983146754205866e-05,
|
|
"loss": 0.8759,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.15343230048819367,
|
|
"grad_norm": 0.5241403248580444,
|
|
"learning_rate": 3.7972058331172935e-05,
|
|
"loss": 0.9084,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.15383082594400718,
|
|
"grad_norm": 0.4871129484635027,
|
|
"learning_rate": 3.796094113786137e-05,
|
|
"loss": 0.886,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.15422935139982066,
|
|
"grad_norm": 0.43638582131414316,
|
|
"learning_rate": 3.794979519206796e-05,
|
|
"loss": 0.8884,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.15462787685563414,
|
|
"grad_norm": 0.4833333706695009,
|
|
"learning_rate": 3.793862051163551e-05,
|
|
"loss": 0.8911,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.15502640231144765,
|
|
"grad_norm": 0.5314502365145202,
|
|
"learning_rate": 3.792741711445283e-05,
|
|
"loss": 0.9347,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.15542492776726113,
|
|
"grad_norm": 0.47578888436804323,
|
|
"learning_rate": 3.791618501845469e-05,
|
|
"loss": 0.8512,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.15582345322307461,
|
|
"grad_norm": 0.5374852434985777,
|
|
"learning_rate": 3.790492424162181e-05,
|
|
"loss": 0.8765,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.15622197867888812,
|
|
"grad_norm": 0.568861342025691,
|
|
"learning_rate": 3.789363480198083e-05,
|
|
"loss": 0.88,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.1566205041347016,
|
|
"grad_norm": 0.5082814585192399,
|
|
"learning_rate": 3.788231671760426e-05,
|
|
"loss": 0.8846,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.15701902959051509,
|
|
"grad_norm": 0.5514304292988225,
|
|
"learning_rate": 3.787097000661047e-05,
|
|
"loss": 0.9023,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.1574175550463286,
|
|
"grad_norm": 0.5203382428096642,
|
|
"learning_rate": 3.785959468716367e-05,
|
|
"loss": 0.9036,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.15781608050214208,
|
|
"grad_norm": 0.43118668216324796,
|
|
"learning_rate": 3.7848190777473836e-05,
|
|
"loss": 0.8952,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.15821460595795556,
|
|
"grad_norm": 0.4912071245587214,
|
|
"learning_rate": 3.783675829579675e-05,
|
|
"loss": 0.8798,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.15861313141376907,
|
|
"grad_norm": 0.5961696064294701,
|
|
"learning_rate": 3.7825297260433904e-05,
|
|
"loss": 0.8888,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.15901165686958255,
|
|
"grad_norm": 0.7191150184982619,
|
|
"learning_rate": 3.781380768973252e-05,
|
|
"loss": 0.9002,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.15941018232539603,
|
|
"grad_norm": 0.7060067375415279,
|
|
"learning_rate": 3.7802289602085485e-05,
|
|
"loss": 0.8741,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.15980870778120954,
|
|
"grad_norm": 0.5469078244459111,
|
|
"learning_rate": 3.779074301593135e-05,
|
|
"loss": 0.8786,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.16020723323702302,
|
|
"grad_norm": 0.4518738436666743,
|
|
"learning_rate": 3.777916794975428e-05,
|
|
"loss": 0.8641,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.1606057586928365,
|
|
"grad_norm": 0.7446776049733693,
|
|
"learning_rate": 3.776756442208402e-05,
|
|
"loss": 0.8841,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.16100428414865,
|
|
"grad_norm": 0.8590281212461937,
|
|
"learning_rate": 3.7755932451495906e-05,
|
|
"loss": 0.8589,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.1614028096044635,
|
|
"grad_norm": 0.8179740795657136,
|
|
"learning_rate": 3.774427205661077e-05,
|
|
"loss": 0.8997,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.16180133506027697,
|
|
"grad_norm": 0.6554445877560577,
|
|
"learning_rate": 3.773258325609499e-05,
|
|
"loss": 0.8686,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.16219986051609048,
|
|
"grad_norm": 0.5244424483306168,
|
|
"learning_rate": 3.7720866068660376e-05,
|
|
"loss": 0.8705,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.16259838597190396,
|
|
"grad_norm": 0.5471724085897548,
|
|
"learning_rate": 3.7709120513064196e-05,
|
|
"loss": 0.8629,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.16299691142771744,
|
|
"grad_norm": 0.6834100949875108,
|
|
"learning_rate": 3.769734660810915e-05,
|
|
"loss": 0.8863,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.16339543688353095,
|
|
"grad_norm": 0.7279947229048482,
|
|
"learning_rate": 3.768554437264329e-05,
|
|
"loss": 0.8666,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.16379396233934443,
|
|
"grad_norm": 0.6176989230226226,
|
|
"learning_rate": 3.767371382556003e-05,
|
|
"loss": 0.8537,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.1641924877951579,
|
|
"grad_norm": 0.4903712989166882,
|
|
"learning_rate": 3.766185498579813e-05,
|
|
"loss": 0.903,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.16459101325097142,
|
|
"grad_norm": 0.552748741724315,
|
|
"learning_rate": 3.76499678723416e-05,
|
|
"loss": 0.8765,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.1649895387067849,
|
|
"grad_norm": 0.6272889269130209,
|
|
"learning_rate": 3.763805250421974e-05,
|
|
"loss": 0.8738,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.16538806416259838,
|
|
"grad_norm": 0.5264119048766897,
|
|
"learning_rate": 3.762610890050707e-05,
|
|
"loss": 0.8776,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.1657865896184119,
|
|
"grad_norm": 0.5169756029407534,
|
|
"learning_rate": 3.761413708032332e-05,
|
|
"loss": 0.9039,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.16618511507422537,
|
|
"grad_norm": 0.5970794940209743,
|
|
"learning_rate": 3.760213706283339e-05,
|
|
"loss": 0.9157,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.16658364053003885,
|
|
"grad_norm": 0.5978586824697808,
|
|
"learning_rate": 3.759010886724731e-05,
|
|
"loss": 0.8627,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.16698216598585236,
|
|
"grad_norm": 0.6350014516716387,
|
|
"learning_rate": 3.757805251282021e-05,
|
|
"loss": 0.8924,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.16738069144166584,
|
|
"grad_norm": 0.4788632516360886,
|
|
"learning_rate": 3.756596801885232e-05,
|
|
"loss": 0.8823,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.16777921689747932,
|
|
"grad_norm": 0.4586359434458119,
|
|
"learning_rate": 3.755385540468892e-05,
|
|
"loss": 0.8929,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.1681777423532928,
|
|
"grad_norm": 0.44317101728143243,
|
|
"learning_rate": 3.7541714689720265e-05,
|
|
"loss": 0.8649,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.1685762678091063,
|
|
"grad_norm": 0.5122716359415467,
|
|
"learning_rate": 3.7529545893381645e-05,
|
|
"loss": 0.853,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.1689747932649198,
|
|
"grad_norm": 0.5459289409614204,
|
|
"learning_rate": 3.7517349035153265e-05,
|
|
"loss": 0.884,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.16937331872073327,
|
|
"grad_norm": 0.5242102541749672,
|
|
"learning_rate": 3.750512413456027e-05,
|
|
"loss": 0.8657,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.16977184417654678,
|
|
"grad_norm": 0.4867591923017328,
|
|
"learning_rate": 3.749287121117271e-05,
|
|
"loss": 0.8792,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.17017036963236026,
|
|
"grad_norm": 0.46645737295772005,
|
|
"learning_rate": 3.7480590284605456e-05,
|
|
"loss": 0.8555,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.17056889508817374,
|
|
"grad_norm": 0.5173979998559967,
|
|
"learning_rate": 3.746828137451825e-05,
|
|
"loss": 0.8767,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.17096742054398725,
|
|
"grad_norm": 0.5369165613294684,
|
|
"learning_rate": 3.74559445006156e-05,
|
|
"loss": 0.8705,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.17136594599980073,
|
|
"grad_norm": 0.5189321766211082,
|
|
"learning_rate": 3.74435796826468e-05,
|
|
"loss": 0.8903,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.17176447145561421,
|
|
"grad_norm": 0.5153398576442575,
|
|
"learning_rate": 3.743118694040585e-05,
|
|
"loss": 0.856,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.17216299691142772,
|
|
"grad_norm": 0.6454497262759452,
|
|
"learning_rate": 3.74187662937315e-05,
|
|
"loss": 0.9,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.1725615223672412,
|
|
"grad_norm": 0.49986119364421433,
|
|
"learning_rate": 3.740631776250712e-05,
|
|
"loss": 0.8445,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.17296004782305469,
|
|
"grad_norm": 0.48967274132042343,
|
|
"learning_rate": 3.7393841366660735e-05,
|
|
"loss": 0.8767,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.1733585732788682,
|
|
"grad_norm": 0.45785208420296847,
|
|
"learning_rate": 3.7381337126165e-05,
|
|
"loss": 0.9046,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.17375709873468168,
|
|
"grad_norm": 0.5084392551993347,
|
|
"learning_rate": 3.736880506103711e-05,
|
|
"loss": 0.8463,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.17415562419049516,
|
|
"grad_norm": 0.6260870917802238,
|
|
"learning_rate": 3.735624519133883e-05,
|
|
"loss": 0.8526,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.17455414964630867,
|
|
"grad_norm": 0.667002011430546,
|
|
"learning_rate": 3.734365753717642e-05,
|
|
"loss": 0.9163,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.17495267510212215,
|
|
"grad_norm": 0.5524932335618813,
|
|
"learning_rate": 3.7331042118700616e-05,
|
|
"loss": 0.8909,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.17535120055793563,
|
|
"grad_norm": 0.5179221999500747,
|
|
"learning_rate": 3.731839895610662e-05,
|
|
"loss": 0.8491,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.17574972601374914,
|
|
"grad_norm": 0.6055468639799181,
|
|
"learning_rate": 3.7305728069634024e-05,
|
|
"loss": 0.9039,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.17614825146956262,
|
|
"grad_norm": 0.6369378504491895,
|
|
"learning_rate": 3.729302947956681e-05,
|
|
"loss": 0.8699,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.1765467769253761,
|
|
"grad_norm": 0.517132348583334,
|
|
"learning_rate": 3.728030320623332e-05,
|
|
"loss": 0.8747,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.1769453023811896,
|
|
"grad_norm": 0.4377714733389691,
|
|
"learning_rate": 3.7267549270006195e-05,
|
|
"loss": 0.8574,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.1773438278370031,
|
|
"grad_norm": 0.5519428657517451,
|
|
"learning_rate": 3.7254767691302366e-05,
|
|
"loss": 0.8716,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.17774235329281657,
|
|
"grad_norm": 0.5779289605769454,
|
|
"learning_rate": 3.724195849058302e-05,
|
|
"loss": 0.855,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.17814087874863008,
|
|
"grad_norm": 0.5189071675619338,
|
|
"learning_rate": 3.722912168835356e-05,
|
|
"loss": 0.8789,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.17853940420444356,
|
|
"grad_norm": 0.44907580503791095,
|
|
"learning_rate": 3.7216257305163576e-05,
|
|
"loss": 0.8659,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.17893792966025704,
|
|
"grad_norm": 0.5335537287232798,
|
|
"learning_rate": 3.7203365361606796e-05,
|
|
"loss": 0.896,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.17933645511607055,
|
|
"grad_norm": 0.5493861171202665,
|
|
"learning_rate": 3.719044587832109e-05,
|
|
"loss": 0.8547,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.17973498057188403,
|
|
"grad_norm": 0.4686748664722927,
|
|
"learning_rate": 3.71774988759884e-05,
|
|
"loss": 0.8288,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.1801335060276975,
|
|
"grad_norm": 0.4149387142024727,
|
|
"learning_rate": 3.716452437533471e-05,
|
|
"loss": 0.8596,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.18053203148351102,
|
|
"grad_norm": 0.4325334501517392,
|
|
"learning_rate": 3.715152239713007e-05,
|
|
"loss": 0.859,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.1809305569393245,
|
|
"grad_norm": 0.4976629397106674,
|
|
"learning_rate": 3.713849296218847e-05,
|
|
"loss": 0.8789,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.18132908239513798,
|
|
"grad_norm": 0.507007279338876,
|
|
"learning_rate": 3.7125436091367866e-05,
|
|
"loss": 0.8726,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.1817276078509515,
|
|
"grad_norm": 0.5348993862470603,
|
|
"learning_rate": 3.711235180557014e-05,
|
|
"loss": 0.9106,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.18212613330676497,
|
|
"grad_norm": 0.46294587476217225,
|
|
"learning_rate": 3.709924012574107e-05,
|
|
"loss": 0.8358,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.18252465876257845,
|
|
"grad_norm": 0.43107837967105883,
|
|
"learning_rate": 3.708610107287026e-05,
|
|
"loss": 0.8448,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.18292318421839196,
|
|
"grad_norm": 0.48433441169264524,
|
|
"learning_rate": 3.7072934667991157e-05,
|
|
"loss": 0.8677,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.18332170967420544,
|
|
"grad_norm": 0.5181824793139834,
|
|
"learning_rate": 3.705974093218099e-05,
|
|
"loss": 0.8867,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.18372023513001892,
|
|
"grad_norm": 0.5376360855846708,
|
|
"learning_rate": 3.704651988656074e-05,
|
|
"loss": 0.9073,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.18411876058583243,
|
|
"grad_norm": 0.5000814848716162,
|
|
"learning_rate": 3.703327155229509e-05,
|
|
"loss": 0.87,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.1845172860416459,
|
|
"grad_norm": 0.4780561422951961,
|
|
"learning_rate": 3.701999595059244e-05,
|
|
"loss": 0.8614,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.1849158114974594,
|
|
"grad_norm": 0.4722288774763096,
|
|
"learning_rate": 3.700669310270481e-05,
|
|
"loss": 0.8507,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.1853143369532729,
|
|
"grad_norm": 0.46238619081900495,
|
|
"learning_rate": 3.699336302992786e-05,
|
|
"loss": 0.8795,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.18571286240908638,
|
|
"grad_norm": 0.5217809598476334,
|
|
"learning_rate": 3.69800057536008e-05,
|
|
"loss": 0.8679,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.18611138786489986,
|
|
"grad_norm": 0.5670490274865951,
|
|
"learning_rate": 3.6966621295106425e-05,
|
|
"loss": 0.8821,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.18650991332071337,
|
|
"grad_norm": 0.5541701975380785,
|
|
"learning_rate": 3.695320967587103e-05,
|
|
"loss": 0.8671,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.18690843877652685,
|
|
"grad_norm": 0.48332966121728094,
|
|
"learning_rate": 3.693977091736438e-05,
|
|
"loss": 0.8543,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.18730696423234033,
|
|
"grad_norm": 0.4228426707268364,
|
|
"learning_rate": 3.6926305041099705e-05,
|
|
"loss": 0.8421,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.18770548968815384,
|
|
"grad_norm": 0.4683111306073849,
|
|
"learning_rate": 3.6912812068633626e-05,
|
|
"loss": 0.8584,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.18810401514396732,
|
|
"grad_norm": 0.5422991697909932,
|
|
"learning_rate": 3.689929202156615e-05,
|
|
"loss": 0.9349,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.1885025405997808,
|
|
"grad_norm": 0.474589914149524,
|
|
"learning_rate": 3.688574492154063e-05,
|
|
"loss": 0.8683,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.1889010660555943,
|
|
"grad_norm": 0.4982233301174737,
|
|
"learning_rate": 3.687217079024371e-05,
|
|
"loss": 0.8636,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.1892995915114078,
|
|
"grad_norm": 0.5267276262142256,
|
|
"learning_rate": 3.6858569649405336e-05,
|
|
"loss": 0.8559,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.18969811696722128,
|
|
"grad_norm": 0.40458583321271047,
|
|
"learning_rate": 3.6844941520798664e-05,
|
|
"loss": 0.8432,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.19009664242303478,
|
|
"grad_norm": 0.38424753205506557,
|
|
"learning_rate": 3.683128642624007e-05,
|
|
"loss": 0.857,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.19049516787884826,
|
|
"grad_norm": 0.4997565524770705,
|
|
"learning_rate": 3.6817604387589086e-05,
|
|
"loss": 0.8763,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.19089369333466175,
|
|
"grad_norm": 0.38620309944213566,
|
|
"learning_rate": 3.680389542674837e-05,
|
|
"loss": 0.8402,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.19129221879047525,
|
|
"grad_norm": 0.4217979959268514,
|
|
"learning_rate": 3.679015956566371e-05,
|
|
"loss": 0.8921,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.19169074424628874,
|
|
"grad_norm": 0.6509327369251123,
|
|
"learning_rate": 3.6776396826323925e-05,
|
|
"loss": 0.8981,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.19208926970210222,
|
|
"grad_norm": 0.3957479519147936,
|
|
"learning_rate": 3.6762607230760884e-05,
|
|
"loss": 0.887,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.19248779515791573,
|
|
"grad_norm": 0.3933212374183316,
|
|
"learning_rate": 3.6748790801049435e-05,
|
|
"loss": 0.8555,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.1928863206137292,
|
|
"grad_norm": 0.3942675959179187,
|
|
"learning_rate": 3.673494755930737e-05,
|
|
"loss": 0.8619,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.1932848460695427,
|
|
"grad_norm": 0.4102773938392307,
|
|
"learning_rate": 3.6721077527695435e-05,
|
|
"loss": 0.8684,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.19368337152535617,
|
|
"grad_norm": 0.5537091771770686,
|
|
"learning_rate": 3.670718072841724e-05,
|
|
"loss": 0.8657,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.19408189698116968,
|
|
"grad_norm": 0.4445425000622428,
|
|
"learning_rate": 3.6693257183719256e-05,
|
|
"loss": 0.8527,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.19448042243698316,
|
|
"grad_norm": 0.4048218822376927,
|
|
"learning_rate": 3.667930691589075e-05,
|
|
"loss": 0.8786,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.19487894789279664,
|
|
"grad_norm": 0.4525605726219098,
|
|
"learning_rate": 3.666532994726381e-05,
|
|
"loss": 0.8544,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.19527747334861015,
|
|
"grad_norm": 0.4471569397505119,
|
|
"learning_rate": 3.665132630021321e-05,
|
|
"loss": 0.8506,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.19567599880442363,
|
|
"grad_norm": 0.47638751339784896,
|
|
"learning_rate": 3.6637295997156475e-05,
|
|
"loss": 0.887,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.1960745242602371,
|
|
"grad_norm": 0.5025010909937182,
|
|
"learning_rate": 3.662323906055379e-05,
|
|
"loss": 0.8653,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.19647304971605062,
|
|
"grad_norm": 0.42315599557494776,
|
|
"learning_rate": 3.6609155512907966e-05,
|
|
"loss": 0.8531,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.1968715751718641,
|
|
"grad_norm": 0.4543478055892151,
|
|
"learning_rate": 3.659504537676444e-05,
|
|
"loss": 0.8512,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.19727010062767758,
|
|
"grad_norm": 0.5089668275890759,
|
|
"learning_rate": 3.658090867471118e-05,
|
|
"loss": 0.8733,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.1976686260834911,
|
|
"grad_norm": 0.48725887709055965,
|
|
"learning_rate": 3.656674542937869e-05,
|
|
"loss": 0.8629,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.19806715153930457,
|
|
"grad_norm": 0.5284757567578545,
|
|
"learning_rate": 3.655255566343999e-05,
|
|
"loss": 0.8845,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.19846567699511805,
|
|
"grad_norm": 0.5026058309669479,
|
|
"learning_rate": 3.653833939961053e-05,
|
|
"loss": 0.8876,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.19886420245093156,
|
|
"grad_norm": 0.4169989456283724,
|
|
"learning_rate": 3.6524096660648186e-05,
|
|
"loss": 0.8713,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.19926272790674504,
|
|
"grad_norm": 0.43259318912302097,
|
|
"learning_rate": 3.650982746935321e-05,
|
|
"loss": 0.8463,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.19966125336255852,
|
|
"grad_norm": 0.47387386790106595,
|
|
"learning_rate": 3.6495531848568206e-05,
|
|
"loss": 0.8315,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.20005977881837203,
|
|
"grad_norm": 0.4501351523826911,
|
|
"learning_rate": 3.6481209821178104e-05,
|
|
"loss": 0.8628,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.2004583042741855,
|
|
"grad_norm": 0.5285961857854481,
|
|
"learning_rate": 3.646686141011008e-05,
|
|
"loss": 0.8605,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.200856829729999,
|
|
"grad_norm": 0.40989354815942786,
|
|
"learning_rate": 3.645248663833354e-05,
|
|
"loss": 0.8688,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.2012553551858125,
|
|
"grad_norm": 0.4346950335335224,
|
|
"learning_rate": 3.643808552886012e-05,
|
|
"loss": 0.873,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.20165388064162598,
|
|
"grad_norm": 0.5336085053270726,
|
|
"learning_rate": 3.6423658104743606e-05,
|
|
"loss": 0.8593,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.20205240609743946,
|
|
"grad_norm": 0.4077411294947737,
|
|
"learning_rate": 3.6409204389079896e-05,
|
|
"loss": 0.8444,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.20245093155325297,
|
|
"grad_norm": 0.44445720308169706,
|
|
"learning_rate": 3.6394724405007e-05,
|
|
"loss": 0.8636,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.20284945700906645,
|
|
"grad_norm": 0.3987179137110336,
|
|
"learning_rate": 3.6380218175704954e-05,
|
|
"loss": 0.8897,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.20324798246487993,
|
|
"grad_norm": 0.4428095828153124,
|
|
"learning_rate": 3.636568572439582e-05,
|
|
"loss": 0.8471,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.20364650792069344,
|
|
"grad_norm": 0.46898791636388926,
|
|
"learning_rate": 3.6351127074343654e-05,
|
|
"loss": 0.8567,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.20404503337650692,
|
|
"grad_norm": 0.44474651154582173,
|
|
"learning_rate": 3.633654224885441e-05,
|
|
"loss": 0.848,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.2044435588323204,
|
|
"grad_norm": 0.4312423546670495,
|
|
"learning_rate": 3.632193127127598e-05,
|
|
"loss": 0.8693,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.2048420842881339,
|
|
"grad_norm": 0.49632782286130483,
|
|
"learning_rate": 3.630729416499813e-05,
|
|
"loss": 0.8814,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.2052406097439474,
|
|
"grad_norm": 0.45170716058550536,
|
|
"learning_rate": 3.6292630953452406e-05,
|
|
"loss": 0.8685,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.20563913519976088,
|
|
"grad_norm": 0.5446213353134834,
|
|
"learning_rate": 3.627794166011219e-05,
|
|
"loss": 0.8717,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.20603766065557438,
|
|
"grad_norm": 0.5471560197738125,
|
|
"learning_rate": 3.626322630849259e-05,
|
|
"loss": 0.8667,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.20643618611138786,
|
|
"grad_norm": 0.5858086900062635,
|
|
"learning_rate": 3.6248484922150445e-05,
|
|
"loss": 0.8279,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.20683471156720135,
|
|
"grad_norm": 0.5915507808065805,
|
|
"learning_rate": 3.6233717524684264e-05,
|
|
"loss": 0.8647,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.20723323702301485,
|
|
"grad_norm": 0.5742838245899272,
|
|
"learning_rate": 3.62189241397342e-05,
|
|
"loss": 0.8756,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.20763176247882834,
|
|
"grad_norm": 0.4770900993779875,
|
|
"learning_rate": 3.620410479098199e-05,
|
|
"loss": 0.8595,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.20803028793464182,
|
|
"grad_norm": 0.4639336066600716,
|
|
"learning_rate": 3.618925950215096e-05,
|
|
"loss": 0.8539,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.20842881339045533,
|
|
"grad_norm": 0.5019882836143528,
|
|
"learning_rate": 3.617438829700595e-05,
|
|
"loss": 0.8461,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.2088273388462688,
|
|
"grad_norm": 0.4562491167280308,
|
|
"learning_rate": 3.615949119935328e-05,
|
|
"loss": 0.8631,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.2092258643020823,
|
|
"grad_norm": 0.46086677639660656,
|
|
"learning_rate": 3.614456823304073e-05,
|
|
"loss": 0.8489,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.2096243897578958,
|
|
"grad_norm": 0.44996342982439314,
|
|
"learning_rate": 3.61296194219575e-05,
|
|
"loss": 0.8554,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.21002291521370928,
|
|
"grad_norm": 0.4156003055691938,
|
|
"learning_rate": 3.6114644790034144e-05,
|
|
"loss": 0.8566,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.21042144066952276,
|
|
"grad_norm": 0.4501085849731328,
|
|
"learning_rate": 3.609964436124255e-05,
|
|
"loss": 0.8728,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.21081996612533627,
|
|
"grad_norm": 0.40787146977289557,
|
|
"learning_rate": 3.6084618159595935e-05,
|
|
"loss": 0.8667,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.21121849158114975,
|
|
"grad_norm": 0.474878191977019,
|
|
"learning_rate": 3.606956620914873e-05,
|
|
"loss": 0.8295,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.21161701703696323,
|
|
"grad_norm": 0.46121373114207476,
|
|
"learning_rate": 3.605448853399661e-05,
|
|
"loss": 0.8647,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.21201554249277674,
|
|
"grad_norm": 0.5256057649499315,
|
|
"learning_rate": 3.603938515827643e-05,
|
|
"loss": 0.8765,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.21241406794859022,
|
|
"grad_norm": 0.4296063955695742,
|
|
"learning_rate": 3.6024256106166194e-05,
|
|
"loss": 0.8698,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.2128125934044037,
|
|
"grad_norm": 0.49055349825343775,
|
|
"learning_rate": 3.600910140188498e-05,
|
|
"loss": 0.8554,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.2132111188602172,
|
|
"grad_norm": 0.4028038490785686,
|
|
"learning_rate": 3.599392106969296e-05,
|
|
"loss": 0.8797,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.2136096443160307,
|
|
"grad_norm": 0.4426507424773926,
|
|
"learning_rate": 3.5978715133891334e-05,
|
|
"loss": 0.8433,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.21400816977184417,
|
|
"grad_norm": 0.4408887572324347,
|
|
"learning_rate": 3.596348361882226e-05,
|
|
"loss": 0.8919,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.21440669522765768,
|
|
"grad_norm": 0.4023818298390077,
|
|
"learning_rate": 3.594822654886888e-05,
|
|
"loss": 0.8219,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.21480522068347116,
|
|
"grad_norm": 0.5445602848649418,
|
|
"learning_rate": 3.593294394845521e-05,
|
|
"loss": 0.8561,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.21520374613928464,
|
|
"grad_norm": 0.44164972512016026,
|
|
"learning_rate": 3.5917635842046165e-05,
|
|
"loss": 0.8428,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.21560227159509815,
|
|
"grad_norm": 0.48977170056676267,
|
|
"learning_rate": 3.590230225414748e-05,
|
|
"loss": 0.8701,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.21600079705091163,
|
|
"grad_norm": 0.465180272328864,
|
|
"learning_rate": 3.588694320930567e-05,
|
|
"loss": 0.837,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.2163993225067251,
|
|
"grad_norm": 0.3718782369142703,
|
|
"learning_rate": 3.5871558732108034e-05,
|
|
"loss": 0.8491,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.21679784796253862,
|
|
"grad_norm": 0.4506626708822692,
|
|
"learning_rate": 3.5856148847182535e-05,
|
|
"loss": 0.8293,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.2171963734183521,
|
|
"grad_norm": 0.5210277329620194,
|
|
"learning_rate": 3.5840713579197856e-05,
|
|
"loss": 0.8587,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.21759489887416558,
|
|
"grad_norm": 0.5358427464347824,
|
|
"learning_rate": 3.5825252952863296e-05,
|
|
"loss": 0.8251,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.2179934243299791,
|
|
"grad_norm": 0.48542122022372863,
|
|
"learning_rate": 3.5809766992928746e-05,
|
|
"loss": 0.8725,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.21839194978579257,
|
|
"grad_norm": 0.4243230434228638,
|
|
"learning_rate": 3.579425572418465e-05,
|
|
"loss": 0.8518,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.21879047524160605,
|
|
"grad_norm": 0.4218795984129036,
|
|
"learning_rate": 3.5778719171461975e-05,
|
|
"loss": 0.8548,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.21918900069741956,
|
|
"grad_norm": 0.4590501106129811,
|
|
"learning_rate": 3.5763157359632164e-05,
|
|
"loss": 0.8531,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.21958752615323304,
|
|
"grad_norm": 0.46361499771905873,
|
|
"learning_rate": 3.574757031360708e-05,
|
|
"loss": 0.8817,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.21998605160904652,
|
|
"grad_norm": 0.47793550002117074,
|
|
"learning_rate": 3.5731958058339e-05,
|
|
"loss": 0.856,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.22038457706486,
|
|
"grad_norm": 0.4585859943216561,
|
|
"learning_rate": 3.571632061882056e-05,
|
|
"loss": 0.8616,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.2207831025206735,
|
|
"grad_norm": 0.3864454910550978,
|
|
"learning_rate": 3.570065802008468e-05,
|
|
"loss": 0.8621,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.221181627976487,
|
|
"grad_norm": 0.42677803227423167,
|
|
"learning_rate": 3.56849702872046e-05,
|
|
"loss": 0.8824,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.22158015343230048,
|
|
"grad_norm": 0.4968788156141536,
|
|
"learning_rate": 3.5669257445293755e-05,
|
|
"loss": 0.8601,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.22197867888811398,
|
|
"grad_norm": 0.4839933541994568,
|
|
"learning_rate": 3.5653519519505803e-05,
|
|
"loss": 0.852,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.22237720434392746,
|
|
"grad_norm": 0.43272119648953283,
|
|
"learning_rate": 3.563775653503455e-05,
|
|
"loss": 0.8733,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.22277572979974095,
|
|
"grad_norm": 0.39605938751897557,
|
|
"learning_rate": 3.562196851711391e-05,
|
|
"loss": 0.8417,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.22317425525555445,
|
|
"grad_norm": 0.43460908962065953,
|
|
"learning_rate": 3.560615549101788e-05,
|
|
"loss": 0.8443,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.22357278071136794,
|
|
"grad_norm": 0.49038667322845025,
|
|
"learning_rate": 3.5590317482060474e-05,
|
|
"loss": 0.8441,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.22397130616718142,
|
|
"grad_norm": 0.4896634258033811,
|
|
"learning_rate": 3.5574454515595735e-05,
|
|
"loss": 0.8216,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.22436983162299493,
|
|
"grad_norm": 0.47379532355614734,
|
|
"learning_rate": 3.5558566617017616e-05,
|
|
"loss": 0.8664,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.2247683570788084,
|
|
"grad_norm": 0.4064914983245694,
|
|
"learning_rate": 3.554265381176e-05,
|
|
"loss": 0.8195,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.2251668825346219,
|
|
"grad_norm": 0.44817812465361634,
|
|
"learning_rate": 3.552671612529667e-05,
|
|
"loss": 0.8251,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.2255654079904354,
|
|
"grad_norm": 0.5252162424970518,
|
|
"learning_rate": 3.5510753583141185e-05,
|
|
"loss": 0.8873,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.22596393344624888,
|
|
"grad_norm": 0.5355671371355674,
|
|
"learning_rate": 3.5494766210846936e-05,
|
|
"loss": 0.8544,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.22636245890206236,
|
|
"grad_norm": 0.4819586808295284,
|
|
"learning_rate": 3.547875403400705e-05,
|
|
"loss": 0.8619,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.22676098435787587,
|
|
"grad_norm": 0.3961215921893707,
|
|
"learning_rate": 3.5462717078254353e-05,
|
|
"loss": 0.8687,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.22715950981368935,
|
|
"grad_norm": 0.4108394698195708,
|
|
"learning_rate": 3.5446655369261355e-05,
|
|
"loss": 0.8629,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.22755803526950283,
|
|
"grad_norm": 0.4534157567866205,
|
|
"learning_rate": 3.543056893274017e-05,
|
|
"loss": 0.843,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.22795656072531634,
|
|
"grad_norm": 0.5102875270779772,
|
|
"learning_rate": 3.541445779444252e-05,
|
|
"loss": 0.8485,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.22835508618112982,
|
|
"grad_norm": 0.3859177522136378,
|
|
"learning_rate": 3.5398321980159666e-05,
|
|
"loss": 0.8373,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.2287536116369433,
|
|
"grad_norm": 0.37972963850475683,
|
|
"learning_rate": 3.5382161515722354e-05,
|
|
"loss": 0.8741,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.2291521370927568,
|
|
"grad_norm": 0.4136228500070505,
|
|
"learning_rate": 3.53659764270008e-05,
|
|
"loss": 0.8739,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.2295506625485703,
|
|
"grad_norm": 0.42386454317477146,
|
|
"learning_rate": 3.534976673990465e-05,
|
|
"loss": 0.8504,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.22994918800438377,
|
|
"grad_norm": 0.4131700773814348,
|
|
"learning_rate": 3.5333532480382915e-05,
|
|
"loss": 0.8325,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.23034771346019728,
|
|
"grad_norm": 0.4321055766938808,
|
|
"learning_rate": 3.5317273674423944e-05,
|
|
"loss": 0.842,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.23074623891601076,
|
|
"grad_norm": 0.4452054733522704,
|
|
"learning_rate": 3.5300990348055385e-05,
|
|
"loss": 0.8826,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.23114476437182424,
|
|
"grad_norm": 0.4096599637303119,
|
|
"learning_rate": 3.528468252734414e-05,
|
|
"loss": 0.8633,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.23154328982763775,
|
|
"grad_norm": 0.4122953744704833,
|
|
"learning_rate": 3.526835023839632e-05,
|
|
"loss": 0.8772,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.23194181528345123,
|
|
"grad_norm": 0.4547152883012281,
|
|
"learning_rate": 3.52519935073572e-05,
|
|
"loss": 0.8613,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.2323403407392647,
|
|
"grad_norm": 0.4725670891982683,
|
|
"learning_rate": 3.5235612360411196e-05,
|
|
"loss": 0.8819,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.23273886619507822,
|
|
"grad_norm": 0.40729982125282965,
|
|
"learning_rate": 3.521920682378179e-05,
|
|
"loss": 0.8471,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.2331373916508917,
|
|
"grad_norm": 0.4348949494906739,
|
|
"learning_rate": 3.520277692373154e-05,
|
|
"loss": 0.8682,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.23353591710670518,
|
|
"grad_norm": 0.4881551767292844,
|
|
"learning_rate": 3.518632268656196e-05,
|
|
"loss": 0.8408,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.2339344425625187,
|
|
"grad_norm": 0.5373093582603797,
|
|
"learning_rate": 3.516984413861357e-05,
|
|
"loss": 0.8646,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.23433296801833217,
|
|
"grad_norm": 0.4789730876955116,
|
|
"learning_rate": 3.5153341306265775e-05,
|
|
"loss": 0.8489,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.23473149347414565,
|
|
"grad_norm": 0.5957598632234159,
|
|
"learning_rate": 3.5136814215936864e-05,
|
|
"loss": 0.8478,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.23513001892995916,
|
|
"grad_norm": 0.6296888663536283,
|
|
"learning_rate": 3.512026289408398e-05,
|
|
"loss": 0.866,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.23552854438577264,
|
|
"grad_norm": 0.5086372892787441,
|
|
"learning_rate": 3.5103687367203025e-05,
|
|
"loss": 0.8893,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.23592706984158612,
|
|
"grad_norm": 0.4732493082235356,
|
|
"learning_rate": 3.508708766182866e-05,
|
|
"loss": 0.8435,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.23632559529739963,
|
|
"grad_norm": 0.5470935688327907,
|
|
"learning_rate": 3.507046380453426e-05,
|
|
"loss": 0.8572,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.2367241207532131,
|
|
"grad_norm": 0.5928161194589755,
|
|
"learning_rate": 3.5053815821931865e-05,
|
|
"loss": 0.8991,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.2371226462090266,
|
|
"grad_norm": 0.541542878613048,
|
|
"learning_rate": 3.503714374067212e-05,
|
|
"loss": 0.843,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.2375211716648401,
|
|
"grad_norm": 0.5945037136372829,
|
|
"learning_rate": 3.502044758744425e-05,
|
|
"loss": 0.8313,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.23791969712065358,
|
|
"grad_norm": 0.5262860873148738,
|
|
"learning_rate": 3.500372738897603e-05,
|
|
"loss": 0.8302,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.23831822257646706,
|
|
"grad_norm": 0.47043036160591684,
|
|
"learning_rate": 3.498698317203372e-05,
|
|
"loss": 0.8483,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.23871674803228057,
|
|
"grad_norm": 0.48587903119210246,
|
|
"learning_rate": 3.497021496342203e-05,
|
|
"loss": 0.8435,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.23911527348809405,
|
|
"grad_norm": 0.5044732980078849,
|
|
"learning_rate": 3.495342278998406e-05,
|
|
"loss": 0.828,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.23951379894390754,
|
|
"grad_norm": 0.4739216086634541,
|
|
"learning_rate": 3.493660667860131e-05,
|
|
"loss": 0.8077,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.23991232439972104,
|
|
"grad_norm": 0.5102507150713297,
|
|
"learning_rate": 3.4919766656193576e-05,
|
|
"loss": 0.8558,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.24031084985553453,
|
|
"grad_norm": 0.4940960518342556,
|
|
"learning_rate": 3.490290274971892e-05,
|
|
"loss": 0.8655,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.240709375311348,
|
|
"grad_norm": 0.4976450093350724,
|
|
"learning_rate": 3.488601498617367e-05,
|
|
"loss": 0.8451,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.24110790076716151,
|
|
"grad_norm": 0.5068077897232314,
|
|
"learning_rate": 3.486910339259231e-05,
|
|
"loss": 0.8424,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.241506426222975,
|
|
"grad_norm": 0.40217075451363676,
|
|
"learning_rate": 3.485216799604752e-05,
|
|
"loss": 0.8766,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.24190495167878848,
|
|
"grad_norm": 0.41405398864625936,
|
|
"learning_rate": 3.483520882365003e-05,
|
|
"loss": 0.8295,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.24230347713460199,
|
|
"grad_norm": 0.45479094126766634,
|
|
"learning_rate": 3.4818225902548666e-05,
|
|
"loss": 0.8832,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.24270200259041547,
|
|
"grad_norm": 0.44930048442037135,
|
|
"learning_rate": 3.480121925993026e-05,
|
|
"loss": 0.8775,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.24310052804622895,
|
|
"grad_norm": 0.4305314388039683,
|
|
"learning_rate": 3.478418892301962e-05,
|
|
"loss": 0.8585,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.24349905350204246,
|
|
"grad_norm": 0.42635507279318796,
|
|
"learning_rate": 3.47671349190795e-05,
|
|
"loss": 0.8748,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.24389757895785594,
|
|
"grad_norm": 0.4470652779000305,
|
|
"learning_rate": 3.475005727541049e-05,
|
|
"loss": 0.8707,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.24429610441366942,
|
|
"grad_norm": 0.6860613528881833,
|
|
"learning_rate": 3.4732956019351105e-05,
|
|
"loss": 0.8586,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.24469462986948293,
|
|
"grad_norm": 0.4514815132734232,
|
|
"learning_rate": 3.471583117827758e-05,
|
|
"loss": 0.847,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.2450931553252964,
|
|
"grad_norm": 0.405387622879431,
|
|
"learning_rate": 3.469868277960395e-05,
|
|
"loss": 0.8537,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.2454916807811099,
|
|
"grad_norm": 0.45894128089045466,
|
|
"learning_rate": 3.468151085078196e-05,
|
|
"loss": 0.8329,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.24589020623692337,
|
|
"grad_norm": 0.5102574940014621,
|
|
"learning_rate": 3.4664315419301e-05,
|
|
"loss": 0.8407,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.24628873169273688,
|
|
"grad_norm": 0.44535784146833973,
|
|
"learning_rate": 3.464709651268811e-05,
|
|
"loss": 0.8503,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.24668725714855036,
|
|
"grad_norm": 0.43055173741202407,
|
|
"learning_rate": 3.4629854158507884e-05,
|
|
"loss": 0.8685,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.24708578260436384,
|
|
"grad_norm": 0.44729573957137375,
|
|
"learning_rate": 3.461258838436248e-05,
|
|
"loss": 0.8708,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.24748430806017735,
|
|
"grad_norm": 0.4062311195130286,
|
|
"learning_rate": 3.459529921789153e-05,
|
|
"loss": 0.824,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.24788283351599083,
|
|
"grad_norm": 0.4359478505964142,
|
|
"learning_rate": 3.457798668677211e-05,
|
|
"loss": 0.849,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.2482813589718043,
|
|
"grad_norm": 0.4269566124271948,
|
|
"learning_rate": 3.456065081871871e-05,
|
|
"loss": 0.8504,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.24867988442761782,
|
|
"grad_norm": 0.39280331015093617,
|
|
"learning_rate": 3.454329164148317e-05,
|
|
"loss": 0.8529,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.2490784098834313,
|
|
"grad_norm": 0.414050219224192,
|
|
"learning_rate": 3.452590918285465e-05,
|
|
"loss": 0.871,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.24947693533924478,
|
|
"grad_norm": 0.4021318325147454,
|
|
"learning_rate": 3.450850347065958e-05,
|
|
"loss": 0.841,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.2498754607950583,
|
|
"grad_norm": 0.4120701796015395,
|
|
"learning_rate": 3.4491074532761614e-05,
|
|
"loss": 0.8261,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.25027398625087177,
|
|
"grad_norm": 0.42792903386869047,
|
|
"learning_rate": 3.4473622397061576e-05,
|
|
"loss": 0.8366,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.25067251170668525,
|
|
"grad_norm": 0.5026276371812628,
|
|
"learning_rate": 3.445614709149744e-05,
|
|
"loss": 0.8797,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.25107103716249873,
|
|
"grad_norm": 0.42307765492760363,
|
|
"learning_rate": 3.443864864404427e-05,
|
|
"loss": 0.8333,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.25146956261831227,
|
|
"grad_norm": 0.43146499355102447,
|
|
"learning_rate": 3.4421127082714165e-05,
|
|
"loss": 0.8745,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.25186808807412575,
|
|
"grad_norm": 0.4232386337048391,
|
|
"learning_rate": 3.4403582435556235e-05,
|
|
"loss": 0.8615,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.25226661352993923,
|
|
"grad_norm": 0.39549286132767947,
|
|
"learning_rate": 3.4386014730656554e-05,
|
|
"loss": 0.852,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.2526651389857527,
|
|
"grad_norm": 0.37990676255356576,
|
|
"learning_rate": 3.436842399613808e-05,
|
|
"loss": 0.8667,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.2530636644415662,
|
|
"grad_norm": 0.35437344682645827,
|
|
"learning_rate": 3.435081026016067e-05,
|
|
"loss": 0.8629,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.2534621898973797,
|
|
"grad_norm": 0.4072267228198412,
|
|
"learning_rate": 3.433317355092098e-05,
|
|
"loss": 0.863,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.2538607153531932,
|
|
"grad_norm": 0.4087915210981998,
|
|
"learning_rate": 3.431551389665246e-05,
|
|
"loss": 0.8629,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.2542592408090067,
|
|
"grad_norm": 0.39541112177531035,
|
|
"learning_rate": 3.429783132562527e-05,
|
|
"loss": 0.8431,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.2546577662648202,
|
|
"grad_norm": 0.449324447165349,
|
|
"learning_rate": 3.428012586614628e-05,
|
|
"loss": 0.8301,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.25505629172063365,
|
|
"grad_norm": 0.438103934508987,
|
|
"learning_rate": 3.426239754655898e-05,
|
|
"loss": 0.8346,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.25545481717644714,
|
|
"grad_norm": 0.4080543057741031,
|
|
"learning_rate": 3.4244646395243456e-05,
|
|
"loss": 0.8199,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.2558533426322606,
|
|
"grad_norm": 0.4781805788640452,
|
|
"learning_rate": 3.422687244061636e-05,
|
|
"loss": 0.8396,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.25625186808807415,
|
|
"grad_norm": 0.39665434242169373,
|
|
"learning_rate": 3.420907571113085e-05,
|
|
"loss": 0.8738,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.25665039354388763,
|
|
"grad_norm": 0.44427399502026793,
|
|
"learning_rate": 3.419125623527651e-05,
|
|
"loss": 0.8276,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.2570489189997011,
|
|
"grad_norm": 0.4971093826856599,
|
|
"learning_rate": 3.417341404157938e-05,
|
|
"loss": 0.844,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.2574474444555146,
|
|
"grad_norm": 0.40433006793477544,
|
|
"learning_rate": 3.415554915860184e-05,
|
|
"loss": 0.8515,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.2578459699113281,
|
|
"grad_norm": 0.41435122876017727,
|
|
"learning_rate": 3.413766161494259e-05,
|
|
"loss": 0.8504,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.25824449536714156,
|
|
"grad_norm": 0.4025721405079423,
|
|
"learning_rate": 3.411975143923662e-05,
|
|
"loss": 0.8003,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.25864302082295504,
|
|
"grad_norm": 0.4230151107223422,
|
|
"learning_rate": 3.410181866015515e-05,
|
|
"loss": 0.8253,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.2590415462787686,
|
|
"grad_norm": 0.43018219174517974,
|
|
"learning_rate": 3.4083863306405576e-05,
|
|
"loss": 0.8494,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.25944007173458206,
|
|
"grad_norm": 0.5580571782658815,
|
|
"learning_rate": 3.406588540673143e-05,
|
|
"loss": 0.839,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.25983859719039554,
|
|
"grad_norm": 0.40240838407878654,
|
|
"learning_rate": 3.4047884989912355e-05,
|
|
"loss": 0.8295,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.260237122646209,
|
|
"grad_norm": 0.42705376431218756,
|
|
"learning_rate": 3.402986208476401e-05,
|
|
"loss": 0.8513,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.2606356481020225,
|
|
"grad_norm": 0.37891252038962947,
|
|
"learning_rate": 3.4011816720138076e-05,
|
|
"loss": 0.8551,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.261034173557836,
|
|
"grad_norm": 0.4742754786354608,
|
|
"learning_rate": 3.39937489249222e-05,
|
|
"loss": 0.8494,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.2614326990136495,
|
|
"grad_norm": 0.5757481855161607,
|
|
"learning_rate": 3.3975658728039894e-05,
|
|
"loss": 0.866,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.261831224469463,
|
|
"grad_norm": 0.41879176964003356,
|
|
"learning_rate": 3.395754615845057e-05,
|
|
"loss": 0.8199,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.2622297499252765,
|
|
"grad_norm": 0.3977116381507401,
|
|
"learning_rate": 3.393941124514944e-05,
|
|
"loss": 0.8464,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.26262827538108996,
|
|
"grad_norm": 0.4361036030052378,
|
|
"learning_rate": 3.3921254017167485e-05,
|
|
"loss": 0.8554,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.26302680083690344,
|
|
"grad_norm": 0.36947748546095344,
|
|
"learning_rate": 3.3903074503571414e-05,
|
|
"loss": 0.8332,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.2634253262927169,
|
|
"grad_norm": 0.39322680162826995,
|
|
"learning_rate": 3.3884872733463605e-05,
|
|
"loss": 0.8522,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.26382385174853046,
|
|
"grad_norm": 0.4426408711257021,
|
|
"learning_rate": 3.386664873598206e-05,
|
|
"loss": 0.8439,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.26422237720434394,
|
|
"grad_norm": 0.40481569528280453,
|
|
"learning_rate": 3.384840254030039e-05,
|
|
"loss": 0.8463,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.2646209026601574,
|
|
"grad_norm": 0.486897366169285,
|
|
"learning_rate": 3.3830134175627694e-05,
|
|
"loss": 0.8383,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.2650194281159709,
|
|
"grad_norm": 0.4124318747978423,
|
|
"learning_rate": 3.3811843671208604e-05,
|
|
"loss": 0.8341,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.2654179535717844,
|
|
"grad_norm": 0.4480853051751989,
|
|
"learning_rate": 3.379353105632318e-05,
|
|
"loss": 0.8719,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.26581647902759786,
|
|
"grad_norm": 0.4075223126165696,
|
|
"learning_rate": 3.3775196360286864e-05,
|
|
"loss": 0.825,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.2662150044834114,
|
|
"grad_norm": 0.4598432178350243,
|
|
"learning_rate": 3.375683961245047e-05,
|
|
"loss": 0.8459,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.2666135299392249,
|
|
"grad_norm": 0.4747860282082611,
|
|
"learning_rate": 3.3738460842200095e-05,
|
|
"loss": 0.8448,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.26701205539503836,
|
|
"grad_norm": 0.42550536631714303,
|
|
"learning_rate": 3.37200600789571e-05,
|
|
"loss": 0.8482,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.26741058085085184,
|
|
"grad_norm": 0.5014696923841511,
|
|
"learning_rate": 3.3701637352178035e-05,
|
|
"loss": 0.839,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.2678091063066653,
|
|
"grad_norm": 0.44071644150719574,
|
|
"learning_rate": 3.368319269135464e-05,
|
|
"loss": 0.8499,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.2682076317624788,
|
|
"grad_norm": 0.45694183948733363,
|
|
"learning_rate": 3.366472612601374e-05,
|
|
"loss": 0.8495,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.26860615721829234,
|
|
"grad_norm": 0.45776428701146005,
|
|
"learning_rate": 3.364623768571725e-05,
|
|
"loss": 0.8683,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.2690046826741058,
|
|
"grad_norm": 0.4300670256635499,
|
|
"learning_rate": 3.3627727400062074e-05,
|
|
"loss": 0.8409,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.2694032081299193,
|
|
"grad_norm": 0.4522484813223993,
|
|
"learning_rate": 3.360919529868012e-05,
|
|
"loss": 0.8549,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.2698017335857328,
|
|
"grad_norm": 0.46483110883882417,
|
|
"learning_rate": 3.3590641411238184e-05,
|
|
"loss": 0.8316,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.27020025904154626,
|
|
"grad_norm": 0.46516087115887955,
|
|
"learning_rate": 3.3572065767437974e-05,
|
|
"loss": 0.847,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.27059878449735975,
|
|
"grad_norm": 0.4870114489474851,
|
|
"learning_rate": 3.355346839701601e-05,
|
|
"loss": 0.866,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.2709973099531733,
|
|
"grad_norm": 0.4112151077893339,
|
|
"learning_rate": 3.353484932974357e-05,
|
|
"loss": 0.8747,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.27139583540898676,
|
|
"grad_norm": 0.39988331169551145,
|
|
"learning_rate": 3.35162085954267e-05,
|
|
"loss": 0.8491,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.27179436086480024,
|
|
"grad_norm": 0.4580861040010356,
|
|
"learning_rate": 3.3497546223906114e-05,
|
|
"loss": 0.8373,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.2721928863206137,
|
|
"grad_norm": 0.4676988585541286,
|
|
"learning_rate": 3.347886224505718e-05,
|
|
"loss": 0.8562,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.2725914117764272,
|
|
"grad_norm": 0.3815018026041965,
|
|
"learning_rate": 3.346015668878982e-05,
|
|
"loss": 0.8865,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.2729899372322407,
|
|
"grad_norm": 0.3853282548165928,
|
|
"learning_rate": 3.3441429585048544e-05,
|
|
"loss": 0.8451,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.2733884626880542,
|
|
"grad_norm": 0.46857379361810175,
|
|
"learning_rate": 3.342268096381233e-05,
|
|
"loss": 0.8343,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.2737869881438677,
|
|
"grad_norm": 0.44893908766670865,
|
|
"learning_rate": 3.340391085509458e-05,
|
|
"loss": 0.8425,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.2741855135996812,
|
|
"grad_norm": 0.4623804261603112,
|
|
"learning_rate": 3.338511928894315e-05,
|
|
"loss": 0.8752,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.27458403905549467,
|
|
"grad_norm": 0.40030690241398437,
|
|
"learning_rate": 3.3366306295440195e-05,
|
|
"loss": 0.8854,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.27498256451130815,
|
|
"grad_norm": 0.41617160670796793,
|
|
"learning_rate": 3.3347471904702196e-05,
|
|
"loss": 0.8976,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.27538108996712163,
|
|
"grad_norm": 0.4056939768327828,
|
|
"learning_rate": 3.3328616146879886e-05,
|
|
"loss": 0.872,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.27577961542293516,
|
|
"grad_norm": 0.37847852674838545,
|
|
"learning_rate": 3.33097390521582e-05,
|
|
"loss": 0.8155,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.27617814087874865,
|
|
"grad_norm": 0.35872927161364443,
|
|
"learning_rate": 3.329084065075622e-05,
|
|
"loss": 0.8273,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.2765766663345621,
|
|
"grad_norm": 0.39096155431724333,
|
|
"learning_rate": 3.327192097292715e-05,
|
|
"loss": 0.8581,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.2769751917903756,
|
|
"grad_norm": 0.3861177159461641,
|
|
"learning_rate": 3.325298004895826e-05,
|
|
"loss": 0.8132,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.2773737172461891,
|
|
"grad_norm": 0.4171747417597138,
|
|
"learning_rate": 3.323401790917082e-05,
|
|
"loss": 0.8347,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.27777224270200257,
|
|
"grad_norm": 0.364670807824471,
|
|
"learning_rate": 3.321503458392005e-05,
|
|
"loss": 0.8415,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.2781707681578161,
|
|
"grad_norm": 0.331401074927844,
|
|
"learning_rate": 3.3196030103595105e-05,
|
|
"loss": 0.8459,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.2785692936136296,
|
|
"grad_norm": 0.43255738046602604,
|
|
"learning_rate": 3.317700449861901e-05,
|
|
"loss": 0.8335,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.27896781906944307,
|
|
"grad_norm": 0.33456506773762923,
|
|
"learning_rate": 3.315795779944858e-05,
|
|
"loss": 0.8647,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.27936634452525655,
|
|
"grad_norm": 0.3715707582620995,
|
|
"learning_rate": 3.313889003657443e-05,
|
|
"loss": 0.8547,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.27976486998107003,
|
|
"grad_norm": 0.3331498560093925,
|
|
"learning_rate": 3.311980124052087e-05,
|
|
"loss": 0.8447,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.2801633954368835,
|
|
"grad_norm": 0.4038630202134111,
|
|
"learning_rate": 3.3100691441845896e-05,
|
|
"loss": 0.8247,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.28056192089269705,
|
|
"grad_norm": 0.365237203718338,
|
|
"learning_rate": 3.308156067114111e-05,
|
|
"loss": 0.8737,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.28096044634851053,
|
|
"grad_norm": 0.4002592791047349,
|
|
"learning_rate": 3.3062408959031715e-05,
|
|
"loss": 0.8478,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.281358971804324,
|
|
"grad_norm": 0.34357520687563103,
|
|
"learning_rate": 3.304323633617641e-05,
|
|
"loss": 0.8233,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.2817574972601375,
|
|
"grad_norm": 0.3505454925796206,
|
|
"learning_rate": 3.3024042833267357e-05,
|
|
"loss": 0.8281,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.28215602271595097,
|
|
"grad_norm": 0.35854787844493347,
|
|
"learning_rate": 3.3004828481030197e-05,
|
|
"loss": 0.8314,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.28255454817176445,
|
|
"grad_norm": 0.3633810116569549,
|
|
"learning_rate": 3.2985593310223905e-05,
|
|
"loss": 0.8337,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.282953073627578,
|
|
"grad_norm": 0.40905086354028014,
|
|
"learning_rate": 3.296633735164078e-05,
|
|
"loss": 0.8278,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.28335159908339147,
|
|
"grad_norm": 0.39198864644450826,
|
|
"learning_rate": 3.294706063610642e-05,
|
|
"loss": 0.8495,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.28375012453920495,
|
|
"grad_norm": 0.39676678952183586,
|
|
"learning_rate": 3.292776319447965e-05,
|
|
"loss": 0.841,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.28414864999501843,
|
|
"grad_norm": 0.4904457094152149,
|
|
"learning_rate": 3.290844505765246e-05,
|
|
"loss": 0.8538,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.2845471754508319,
|
|
"grad_norm": 0.38619534462184524,
|
|
"learning_rate": 3.288910625654997e-05,
|
|
"loss": 0.831,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.2849457009066454,
|
|
"grad_norm": 0.3965911327088796,
|
|
"learning_rate": 3.28697468221304e-05,
|
|
"loss": 0.855,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.2853442263624589,
|
|
"grad_norm": 0.4104504182776709,
|
|
"learning_rate": 3.2850366785384975e-05,
|
|
"loss": 0.8312,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.2857427518182724,
|
|
"grad_norm": 0.39320803615560024,
|
|
"learning_rate": 3.2830966177337926e-05,
|
|
"loss": 0.8256,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.2861412772740859,
|
|
"grad_norm": 0.36766055059184494,
|
|
"learning_rate": 3.281154502904639e-05,
|
|
"loss": 0.8612,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.2865398027298994,
|
|
"grad_norm": 0.3523821293496536,
|
|
"learning_rate": 3.279210337160041e-05,
|
|
"loss": 0.8546,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.28693832818571285,
|
|
"grad_norm": 0.4303479446087632,
|
|
"learning_rate": 3.277264123612283e-05,
|
|
"loss": 0.843,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.28733685364152634,
|
|
"grad_norm": 0.37256602383763016,
|
|
"learning_rate": 3.275315865376932e-05,
|
|
"loss": 0.8525,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.2877353790973398,
|
|
"grad_norm": 0.372312406331151,
|
|
"learning_rate": 3.273365565572824e-05,
|
|
"loss": 0.8718,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.28813390455315335,
|
|
"grad_norm": 0.3748404787253373,
|
|
"learning_rate": 3.271413227322064e-05,
|
|
"loss": 0.8284,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.28853243000896683,
|
|
"grad_norm": 0.40949697147874353,
|
|
"learning_rate": 3.269458853750023e-05,
|
|
"loss": 0.8342,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.2889309554647803,
|
|
"grad_norm": 0.35759282756001504,
|
|
"learning_rate": 3.267502447985328e-05,
|
|
"loss": 0.8376,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.2893294809205938,
|
|
"grad_norm": 0.424890270877448,
|
|
"learning_rate": 3.2655440131598585e-05,
|
|
"loss": 0.8144,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.2897280063764073,
|
|
"grad_norm": 0.37228222071530115,
|
|
"learning_rate": 3.263583552408744e-05,
|
|
"loss": 0.8203,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.29012653183222076,
|
|
"grad_norm": 0.36804439864776206,
|
|
"learning_rate": 3.261621068870355e-05,
|
|
"loss": 0.8436,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.2905250572880343,
|
|
"grad_norm": 0.4010864307131854,
|
|
"learning_rate": 3.2596565656863036e-05,
|
|
"loss": 0.8211,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.2909235827438478,
|
|
"grad_norm": 0.43321148633091444,
|
|
"learning_rate": 3.257690046001431e-05,
|
|
"loss": 0.8659,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.29132210819966126,
|
|
"grad_norm": 0.37678425829862483,
|
|
"learning_rate": 3.255721512963811e-05,
|
|
"loss": 0.8549,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.29172063365547474,
|
|
"grad_norm": 0.38473774610717565,
|
|
"learning_rate": 3.253750969724735e-05,
|
|
"loss": 0.8584,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.2921191591112882,
|
|
"grad_norm": 0.3274732323738536,
|
|
"learning_rate": 3.251778419438716e-05,
|
|
"loss": 0.8197,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.2925176845671017,
|
|
"grad_norm": 0.37385182013341806,
|
|
"learning_rate": 3.2498038652634797e-05,
|
|
"loss": 0.8485,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.29291621002291524,
|
|
"grad_norm": 0.37571422954043315,
|
|
"learning_rate": 3.2478273103599587e-05,
|
|
"loss": 0.8131,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.2933147354787287,
|
|
"grad_norm": 1.0205773925944017,
|
|
"learning_rate": 3.24584875789229e-05,
|
|
"loss": 0.8122,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.2937132609345422,
|
|
"grad_norm": 0.397474423244844,
|
|
"learning_rate": 3.243868211027807e-05,
|
|
"loss": 0.8575,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.2941117863903557,
|
|
"grad_norm": 0.35542654634964194,
|
|
"learning_rate": 3.241885672937034e-05,
|
|
"loss": 0.8459,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.29451031184616916,
|
|
"grad_norm": 0.7079812695011942,
|
|
"learning_rate": 3.239901146793688e-05,
|
|
"loss": 0.8235,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.29490883730198264,
|
|
"grad_norm": 0.40472908559410964,
|
|
"learning_rate": 3.237914635774664e-05,
|
|
"loss": 0.8358,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.2953073627577962,
|
|
"grad_norm": 0.6704919581462614,
|
|
"learning_rate": 3.235926143060036e-05,
|
|
"loss": 0.881,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.29570588821360966,
|
|
"grad_norm": 0.373533664396295,
|
|
"learning_rate": 3.23393567183305e-05,
|
|
"loss": 0.853,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.29610441366942314,
|
|
"grad_norm": 0.4047009515080516,
|
|
"learning_rate": 3.231943225280121e-05,
|
|
"loss": 0.8569,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.2965029391252366,
|
|
"grad_norm": 0.3877536209778869,
|
|
"learning_rate": 3.229948806590824e-05,
|
|
"loss": 0.835,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.2969014645810501,
|
|
"grad_norm": 0.4714038839534881,
|
|
"learning_rate": 3.227952418957892e-05,
|
|
"loss": 0.868,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.2972999900368636,
|
|
"grad_norm": 0.4463329373269963,
|
|
"learning_rate": 3.225954065577209e-05,
|
|
"loss": 0.848,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.2976985154926771,
|
|
"grad_norm": 0.42587530691745,
|
|
"learning_rate": 3.223953749647807e-05,
|
|
"loss": 0.8607,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.2980970409484906,
|
|
"grad_norm": 0.4379931392773523,
|
|
"learning_rate": 3.221951474371861e-05,
|
|
"loss": 0.813,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.2984955664043041,
|
|
"grad_norm": 0.38309480692550185,
|
|
"learning_rate": 3.2199472429546785e-05,
|
|
"loss": 0.8474,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.29889409186011756,
|
|
"grad_norm": 0.3616798063850079,
|
|
"learning_rate": 3.2179410586047025e-05,
|
|
"loss": 0.8154,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.29929261731593104,
|
|
"grad_norm": 0.3747541200969163,
|
|
"learning_rate": 3.215932924533501e-05,
|
|
"loss": 0.8378,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.2996911427717445,
|
|
"grad_norm": 0.38031077846694633,
|
|
"learning_rate": 3.213922843955762e-05,
|
|
"loss": 0.8543,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.30008966822755806,
|
|
"grad_norm": 0.41068418371221344,
|
|
"learning_rate": 3.21191082008929e-05,
|
|
"loss": 0.8392,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.30048819368337154,
|
|
"grad_norm": 0.3644597909816924,
|
|
"learning_rate": 3.2098968561550024e-05,
|
|
"loss": 0.8061,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.300886719139185,
|
|
"grad_norm": 0.37311229876996665,
|
|
"learning_rate": 3.2078809553769195e-05,
|
|
"loss": 0.8693,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.3012852445949985,
|
|
"grad_norm": 0.45016158998524075,
|
|
"learning_rate": 3.205863120982164e-05,
|
|
"loss": 0.8602,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.301683770050812,
|
|
"grad_norm": 0.42629280896654315,
|
|
"learning_rate": 3.203843356200952e-05,
|
|
"loss": 0.8532,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.30208229550662546,
|
|
"grad_norm": 0.4110371155650319,
|
|
"learning_rate": 3.201821664266595e-05,
|
|
"loss": 0.8451,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.302480820962439,
|
|
"grad_norm": 0.4192137078636866,
|
|
"learning_rate": 3.199798048415481e-05,
|
|
"loss": 0.8436,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.3028793464182525,
|
|
"grad_norm": 0.4446866796453996,
|
|
"learning_rate": 3.197772511887086e-05,
|
|
"loss": 0.8235,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.30327787187406596,
|
|
"grad_norm": 0.433556905913176,
|
|
"learning_rate": 3.195745057923957e-05,
|
|
"loss": 0.8603,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.30367639732987944,
|
|
"grad_norm": 0.4114711662961495,
|
|
"learning_rate": 3.193715689771709e-05,
|
|
"loss": 0.838,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.3040749227856929,
|
|
"grad_norm": 0.3926214986996156,
|
|
"learning_rate": 3.191684410679025e-05,
|
|
"loss": 0.8502,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.3044734482415064,
|
|
"grad_norm": 0.4139928341021709,
|
|
"learning_rate": 3.189651223897644e-05,
|
|
"loss": 0.8385,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.30487197369731994,
|
|
"grad_norm": 0.4129548938591373,
|
|
"learning_rate": 3.1876161326823615e-05,
|
|
"loss": 0.8791,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.3052704991531334,
|
|
"grad_norm": 0.3955272894598311,
|
|
"learning_rate": 3.185579140291019e-05,
|
|
"loss": 0.8384,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.3056690246089469,
|
|
"grad_norm": 0.3585005878079346,
|
|
"learning_rate": 3.183540249984504e-05,
|
|
"loss": 0.8132,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.3060675500647604,
|
|
"grad_norm": 0.4212205077030527,
|
|
"learning_rate": 3.18149946502674e-05,
|
|
"loss": 0.8308,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.30646607552057387,
|
|
"grad_norm": 0.3638728218380253,
|
|
"learning_rate": 3.179456788684685e-05,
|
|
"loss": 0.8097,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.30686460097638735,
|
|
"grad_norm": 0.4024379131636804,
|
|
"learning_rate": 3.1774122242283236e-05,
|
|
"loss": 0.8401,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.3072631264322009,
|
|
"grad_norm": 0.4132201236498637,
|
|
"learning_rate": 3.175365774930665e-05,
|
|
"loss": 0.8111,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.30766165188801436,
|
|
"grad_norm": 0.3525657580163014,
|
|
"learning_rate": 3.1733174440677346e-05,
|
|
"loss": 0.8201,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.30806017734382785,
|
|
"grad_norm": 0.35083612349906135,
|
|
"learning_rate": 3.171267234918568e-05,
|
|
"loss": 0.815,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.3084587027996413,
|
|
"grad_norm": 0.4002385012230293,
|
|
"learning_rate": 3.169215150765211e-05,
|
|
"loss": 0.8168,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.3088572282554548,
|
|
"grad_norm": 0.3629878196057507,
|
|
"learning_rate": 3.1671611948927074e-05,
|
|
"loss": 0.8367,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.3092557537112683,
|
|
"grad_norm": 0.34583616562695413,
|
|
"learning_rate": 3.165105370589102e-05,
|
|
"loss": 0.8253,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.3096542791670818,
|
|
"grad_norm": 0.3607827655628309,
|
|
"learning_rate": 3.1630476811454246e-05,
|
|
"loss": 0.8284,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.3100528046228953,
|
|
"grad_norm": 0.37546990727594654,
|
|
"learning_rate": 3.160988129855697e-05,
|
|
"loss": 0.8376,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.3104513300787088,
|
|
"grad_norm": 0.39969610145426393,
|
|
"learning_rate": 3.158926720016917e-05,
|
|
"loss": 0.8516,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.31084985553452227,
|
|
"grad_norm": 0.36953469600153793,
|
|
"learning_rate": 3.156863454929059e-05,
|
|
"loss": 0.8236,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.31124838099033575,
|
|
"grad_norm": 0.33928479120444516,
|
|
"learning_rate": 3.154798337895067e-05,
|
|
"loss": 0.8443,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.31164690644614923,
|
|
"grad_norm": 0.3966330597527675,
|
|
"learning_rate": 3.152731372220852e-05,
|
|
"loss": 0.8188,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.3120454319019627,
|
|
"grad_norm": 0.3946127272938953,
|
|
"learning_rate": 3.1506625612152814e-05,
|
|
"loss": 0.832,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.31244395735777625,
|
|
"grad_norm": 0.3785322567375632,
|
|
"learning_rate": 3.148591908190178e-05,
|
|
"loss": 0.8393,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.31284248281358973,
|
|
"grad_norm": 0.36331251784056434,
|
|
"learning_rate": 3.1465194164603135e-05,
|
|
"loss": 0.8403,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.3132410082694032,
|
|
"grad_norm": 0.35684726071521566,
|
|
"learning_rate": 3.1444450893434025e-05,
|
|
"loss": 0.8464,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.3136395337252167,
|
|
"grad_norm": 0.33346839612618157,
|
|
"learning_rate": 3.142368930160098e-05,
|
|
"loss": 0.8607,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.31403805918103017,
|
|
"grad_norm": 0.34733144268906585,
|
|
"learning_rate": 3.140290942233985e-05,
|
|
"loss": 0.858,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.31443658463684365,
|
|
"grad_norm": 0.3523769266485713,
|
|
"learning_rate": 3.138211128891578e-05,
|
|
"loss": 0.8245,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.3148351100926572,
|
|
"grad_norm": 0.3491121768861967,
|
|
"learning_rate": 3.136129493462312e-05,
|
|
"loss": 0.8394,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.31523363554847067,
|
|
"grad_norm": 0.3878058197741651,
|
|
"learning_rate": 3.134046039278539e-05,
|
|
"loss": 0.8406,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.31563216100428415,
|
|
"grad_norm": 0.3331713976353916,
|
|
"learning_rate": 3.131960769675524e-05,
|
|
"loss": 0.8205,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.31603068646009763,
|
|
"grad_norm": 0.3902176893077025,
|
|
"learning_rate": 3.1298736879914364e-05,
|
|
"loss": 0.8634,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.3164292119159111,
|
|
"grad_norm": 0.39518447785038,
|
|
"learning_rate": 3.127784797567347e-05,
|
|
"loss": 0.8298,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.3168277373717246,
|
|
"grad_norm": 0.3422487336442997,
|
|
"learning_rate": 3.125694101747222e-05,
|
|
"loss": 0.8613,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.31722626282753813,
|
|
"grad_norm": 0.33332846452402065,
|
|
"learning_rate": 3.123601603877918e-05,
|
|
"loss": 0.8502,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.3176247882833516,
|
|
"grad_norm": 0.6423101526850392,
|
|
"learning_rate": 3.121507307309178e-05,
|
|
"loss": 0.8338,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.3180233137391651,
|
|
"grad_norm": 0.38531993142674054,
|
|
"learning_rate": 3.11941121539362e-05,
|
|
"loss": 0.7963,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.3184218391949786,
|
|
"grad_norm": 0.3592316503041697,
|
|
"learning_rate": 3.1173133314867414e-05,
|
|
"loss": 0.8411,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.31882036465079205,
|
|
"grad_norm": 0.3598280004430287,
|
|
"learning_rate": 3.115213658946904e-05,
|
|
"loss": 0.8336,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.31921889010660554,
|
|
"grad_norm": 0.3496111681067253,
|
|
"learning_rate": 3.113112201135335e-05,
|
|
"loss": 0.8574,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.31961741556241907,
|
|
"grad_norm": 0.3664242703958735,
|
|
"learning_rate": 3.11100896141612e-05,
|
|
"loss": 0.8436,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.32001594101823255,
|
|
"grad_norm": 0.32787991821140705,
|
|
"learning_rate": 3.108903943156194e-05,
|
|
"loss": 0.8489,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.32041446647404603,
|
|
"grad_norm": 0.40557517482435224,
|
|
"learning_rate": 3.106797149725344e-05,
|
|
"loss": 0.8237,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.3208129919298595,
|
|
"grad_norm": 0.37518817153121636,
|
|
"learning_rate": 3.1046885844961946e-05,
|
|
"loss": 0.8274,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.321211517385673,
|
|
"grad_norm": 0.37714764259452016,
|
|
"learning_rate": 3.102578250844209e-05,
|
|
"loss": 0.8331,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.3216100428414865,
|
|
"grad_norm": 0.37798047544093105,
|
|
"learning_rate": 3.10046615214768e-05,
|
|
"loss": 0.8502,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.3220085682973,
|
|
"grad_norm": 0.4109920014418336,
|
|
"learning_rate": 3.098352291787728e-05,
|
|
"loss": 0.8227,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.3224070937531135,
|
|
"grad_norm": 0.4499775221189975,
|
|
"learning_rate": 3.09623667314829e-05,
|
|
"loss": 0.8247,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.322805619208927,
|
|
"grad_norm": 0.331922156881542,
|
|
"learning_rate": 3.0941192996161215e-05,
|
|
"loss": 0.7928,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.32320414466474046,
|
|
"grad_norm": 0.34248930965498,
|
|
"learning_rate": 3.092000174580785e-05,
|
|
"loss": 0.8432,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.32360267012055394,
|
|
"grad_norm": 0.35843509172736904,
|
|
"learning_rate": 3.089879301434648e-05,
|
|
"loss": 0.8477,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.3240011955763674,
|
|
"grad_norm": 0.3683897489622322,
|
|
"learning_rate": 3.0877566835728755e-05,
|
|
"loss": 0.8091,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.32439972103218095,
|
|
"grad_norm": 0.5313658405862416,
|
|
"learning_rate": 3.0856323243934255e-05,
|
|
"loss": 0.8279,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.32479824648799444,
|
|
"grad_norm": 0.37323472384352163,
|
|
"learning_rate": 3.083506227297045e-05,
|
|
"loss": 0.8326,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.3251967719438079,
|
|
"grad_norm": 0.39228250684825317,
|
|
"learning_rate": 3.0813783956872615e-05,
|
|
"loss": 0.8294,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.3255952973996214,
|
|
"grad_norm": 0.3652945541655549,
|
|
"learning_rate": 3.07924883297038e-05,
|
|
"loss": 0.846,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.3259938228554349,
|
|
"grad_norm": 0.3659044128890069,
|
|
"learning_rate": 3.0771175425554766e-05,
|
|
"loss": 0.8204,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.32639234831124836,
|
|
"grad_norm": 0.4707331446693342,
|
|
"learning_rate": 3.074984527854392e-05,
|
|
"loss": 0.8163,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.3267908737670619,
|
|
"grad_norm": 0.3606528922605574,
|
|
"learning_rate": 3.072849792281731e-05,
|
|
"loss": 0.8334,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.3271893992228754,
|
|
"grad_norm": 0.3770070622615337,
|
|
"learning_rate": 3.0707133392548474e-05,
|
|
"loss": 0.8224,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.32758792467868886,
|
|
"grad_norm": 0.359650139273174,
|
|
"learning_rate": 3.068575172193849e-05,
|
|
"loss": 0.8534,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.32798645013450234,
|
|
"grad_norm": 0.3456572438444792,
|
|
"learning_rate": 3.066435294521584e-05,
|
|
"loss": 0.889,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.3283849755903158,
|
|
"grad_norm": 0.3918222247018766,
|
|
"learning_rate": 3.064293709663645e-05,
|
|
"loss": 0.7898,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.3287835010461293,
|
|
"grad_norm": 0.4247237481434523,
|
|
"learning_rate": 3.0621504210483495e-05,
|
|
"loss": 0.8535,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.32918202650194284,
|
|
"grad_norm": 0.36874426839954455,
|
|
"learning_rate": 3.0600054321067486e-05,
|
|
"loss": 0.8336,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.3295805519577563,
|
|
"grad_norm": 0.4207632539441216,
|
|
"learning_rate": 3.057858746272611e-05,
|
|
"loss": 0.841,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.3299790774135698,
|
|
"grad_norm": 0.38496904071215293,
|
|
"learning_rate": 3.055710366982427e-05,
|
|
"loss": 0.8195,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.3303776028693833,
|
|
"grad_norm": 0.4663868777863652,
|
|
"learning_rate": 3.053560297675392e-05,
|
|
"loss": 0.8419,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.33077612832519676,
|
|
"grad_norm": 0.5264881698443798,
|
|
"learning_rate": 3.0514085417934112e-05,
|
|
"loss": 0.8017,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.33117465378101024,
|
|
"grad_norm": 0.4647249062040843,
|
|
"learning_rate": 3.0492551027810876e-05,
|
|
"loss": 0.8468,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.3315731792368238,
|
|
"grad_norm": 0.305099119380529,
|
|
"learning_rate": 3.04709998408572e-05,
|
|
"loss": 0.7996,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.33197170469263726,
|
|
"grad_norm": 0.46977408947791516,
|
|
"learning_rate": 3.0449431891572936e-05,
|
|
"loss": 0.8474,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.33237023014845074,
|
|
"grad_norm": 0.44745094401575514,
|
|
"learning_rate": 3.0427847214484804e-05,
|
|
"loss": 0.8349,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.3327687556042642,
|
|
"grad_norm": 0.3543202737692515,
|
|
"learning_rate": 3.0406245844146273e-05,
|
|
"loss": 0.8253,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.3331672810600777,
|
|
"grad_norm": 0.3933697240001331,
|
|
"learning_rate": 3.0384627815137553e-05,
|
|
"loss": 0.8125,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.3335658065158912,
|
|
"grad_norm": 0.5148260560348337,
|
|
"learning_rate": 3.0362993162065516e-05,
|
|
"loss": 0.8627,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.3339643319717047,
|
|
"grad_norm": 0.48857380870627215,
|
|
"learning_rate": 3.034134191956364e-05,
|
|
"loss": 0.8236,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.3343628574275182,
|
|
"grad_norm": 0.4056146666480351,
|
|
"learning_rate": 3.0319674122291977e-05,
|
|
"loss": 0.8302,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.3347613828833317,
|
|
"grad_norm": 0.3904977617394034,
|
|
"learning_rate": 3.0297989804937057e-05,
|
|
"loss": 0.8167,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.33515990833914516,
|
|
"grad_norm": 0.5044251985190126,
|
|
"learning_rate": 3.027628900221187e-05,
|
|
"loss": 0.8233,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.33555843379495864,
|
|
"grad_norm": 0.4250841928547596,
|
|
"learning_rate": 3.025457174885581e-05,
|
|
"loss": 0.8281,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.3359569592507721,
|
|
"grad_norm": 0.3823907908471619,
|
|
"learning_rate": 3.0232838079634575e-05,
|
|
"loss": 0.8242,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.3363554847065856,
|
|
"grad_norm": 0.42945934078552406,
|
|
"learning_rate": 3.0211088029340154e-05,
|
|
"loss": 0.8354,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.33675401016239914,
|
|
"grad_norm": 0.42902292521578395,
|
|
"learning_rate": 3.018932163279078e-05,
|
|
"loss": 0.833,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.3371525356182126,
|
|
"grad_norm": 0.3937451062114422,
|
|
"learning_rate": 3.016753892483083e-05,
|
|
"loss": 0.7891,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.3375510610740261,
|
|
"grad_norm": 0.3540399272237491,
|
|
"learning_rate": 3.0145739940330786e-05,
|
|
"loss": 0.8573,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.3379495865298396,
|
|
"grad_norm": 0.4084630243877346,
|
|
"learning_rate": 3.0123924714187214e-05,
|
|
"loss": 0.8234,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.33834811198565307,
|
|
"grad_norm": 0.42274333879010845,
|
|
"learning_rate": 3.0102093281322666e-05,
|
|
"loss": 0.8212,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.33874663744146655,
|
|
"grad_norm": 0.3321533474722135,
|
|
"learning_rate": 3.008024567668563e-05,
|
|
"loss": 0.8173,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.3391451628972801,
|
|
"grad_norm": 0.3692564529574208,
|
|
"learning_rate": 3.0058381935250495e-05,
|
|
"loss": 0.8557,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.33954368835309356,
|
|
"grad_norm": 0.39610202569549047,
|
|
"learning_rate": 3.0036502092017473e-05,
|
|
"loss": 0.8654,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.33994221380890705,
|
|
"grad_norm": 0.3661238023568551,
|
|
"learning_rate": 3.0014606182012566e-05,
|
|
"loss": 0.8727,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.3403407392647205,
|
|
"grad_norm": 0.3872040100330332,
|
|
"learning_rate": 2.9992694240287474e-05,
|
|
"loss": 0.8291,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.340739264720534,
|
|
"grad_norm": 0.3974606504195108,
|
|
"learning_rate": 2.9970766301919583e-05,
|
|
"loss": 0.8679,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.3411377901763475,
|
|
"grad_norm": 0.3938746707369231,
|
|
"learning_rate": 2.994882240201188e-05,
|
|
"loss": 0.8433,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.341536315632161,
|
|
"grad_norm": 0.3691659772037152,
|
|
"learning_rate": 2.99268625756929e-05,
|
|
"loss": 0.8393,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.3419348410879745,
|
|
"grad_norm": 0.3780103920503278,
|
|
"learning_rate": 2.990488685811667e-05,
|
|
"loss": 0.8346,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.342333366543788,
|
|
"grad_norm": 0.4073582614267046,
|
|
"learning_rate": 2.9882895284462664e-05,
|
|
"loss": 0.8476,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.34273189199960147,
|
|
"grad_norm": 0.34365964699391127,
|
|
"learning_rate": 2.9860887889935744e-05,
|
|
"loss": 0.8282,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.34313041745541495,
|
|
"grad_norm": 0.40120857716998304,
|
|
"learning_rate": 2.983886470976608e-05,
|
|
"loss": 0.8275,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.34352894291122843,
|
|
"grad_norm": 0.3959132704688456,
|
|
"learning_rate": 2.9816825779209133e-05,
|
|
"loss": 0.8251,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.34392746836704197,
|
|
"grad_norm": 0.4334298136162478,
|
|
"learning_rate": 2.9794771133545565e-05,
|
|
"loss": 0.822,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.34432599382285545,
|
|
"grad_norm": 0.3870945760786885,
|
|
"learning_rate": 2.977270080808119e-05,
|
|
"loss": 0.8251,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.34472451927866893,
|
|
"grad_norm": 0.37106301614057785,
|
|
"learning_rate": 2.975061483814694e-05,
|
|
"loss": 0.8545,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.3451230447344824,
|
|
"grad_norm": 0.38427213586073594,
|
|
"learning_rate": 2.9728513259098784e-05,
|
|
"loss": 0.8161,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.3455215701902959,
|
|
"grad_norm": 0.3916565010304088,
|
|
"learning_rate": 2.9706396106317675e-05,
|
|
"loss": 0.8419,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.34592009564610937,
|
|
"grad_norm": 0.3709069418845533,
|
|
"learning_rate": 2.96842634152095e-05,
|
|
"loss": 0.846,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.3463186211019229,
|
|
"grad_norm": 0.3703215359984664,
|
|
"learning_rate": 2.9662115221205015e-05,
|
|
"loss": 0.8222,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.3467171465577364,
|
|
"grad_norm": 0.3464063836842463,
|
|
"learning_rate": 2.9639951559759802e-05,
|
|
"loss": 0.8036,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.34711567201354987,
|
|
"grad_norm": 0.35048924216820243,
|
|
"learning_rate": 2.9617772466354192e-05,
|
|
"loss": 0.818,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.34751419746936335,
|
|
"grad_norm": 0.374548515628163,
|
|
"learning_rate": 2.9595577976493238e-05,
|
|
"loss": 0.8199,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.34791272292517683,
|
|
"grad_norm": 0.37643952630682037,
|
|
"learning_rate": 2.9573368125706624e-05,
|
|
"loss": 0.825,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.3483112483809903,
|
|
"grad_norm": 0.3873605831737666,
|
|
"learning_rate": 2.9551142949548634e-05,
|
|
"loss": 0.8183,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.34870977383680385,
|
|
"grad_norm": 0.3639676705380599,
|
|
"learning_rate": 2.9528902483598076e-05,
|
|
"loss": 0.8536,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.34910829929261733,
|
|
"grad_norm": 0.3239254348822666,
|
|
"learning_rate": 2.950664676345824e-05,
|
|
"loss": 0.7855,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.3495068247484308,
|
|
"grad_norm": 0.4333203945657134,
|
|
"learning_rate": 2.9484375824756845e-05,
|
|
"loss": 0.8377,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.3499053502042443,
|
|
"grad_norm": 0.366000478962248,
|
|
"learning_rate": 2.946208970314595e-05,
|
|
"loss": 0.841,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.3503038756600578,
|
|
"grad_norm": 0.36232443933919917,
|
|
"learning_rate": 2.943978843430194e-05,
|
|
"loss": 0.8415,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.35070240111587125,
|
|
"grad_norm": 0.3632587538915808,
|
|
"learning_rate": 2.9417472053925435e-05,
|
|
"loss": 0.833,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.3511009265716848,
|
|
"grad_norm": 0.34528067844688565,
|
|
"learning_rate": 2.939514059774126e-05,
|
|
"loss": 0.8089,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.35149945202749827,
|
|
"grad_norm": 0.3186838935536136,
|
|
"learning_rate": 2.9372794101498353e-05,
|
|
"loss": 0.8112,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.35189797748331175,
|
|
"grad_norm": 0.33496956021034613,
|
|
"learning_rate": 2.935043260096975e-05,
|
|
"loss": 0.8421,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.35229650293912523,
|
|
"grad_norm": 0.33411225546854484,
|
|
"learning_rate": 2.932805613195249e-05,
|
|
"loss": 0.8113,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.3526950283949387,
|
|
"grad_norm": 0.32478642663480967,
|
|
"learning_rate": 2.9305664730267586e-05,
|
|
"loss": 0.8046,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.3530935538507522,
|
|
"grad_norm": 0.3631121635365864,
|
|
"learning_rate": 2.9283258431759954e-05,
|
|
"loss": 0.8173,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.35349207930656573,
|
|
"grad_norm": 0.3429622024570721,
|
|
"learning_rate": 2.926083727229835e-05,
|
|
"loss": 0.8583,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.3538906047623792,
|
|
"grad_norm": 0.345044521347691,
|
|
"learning_rate": 2.923840128777532e-05,
|
|
"loss": 0.813,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.3542891302181927,
|
|
"grad_norm": 0.3694760550020032,
|
|
"learning_rate": 2.9215950514107155e-05,
|
|
"loss": 0.8315,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.3546876556740062,
|
|
"grad_norm": 0.34900971672785386,
|
|
"learning_rate": 2.9193484987233804e-05,
|
|
"loss": 0.8251,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.35508618112981966,
|
|
"grad_norm": 0.36620900329612915,
|
|
"learning_rate": 2.917100474311885e-05,
|
|
"loss": 0.8243,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.35548470658563314,
|
|
"grad_norm": 0.3732972879676541,
|
|
"learning_rate": 2.9148509817749424e-05,
|
|
"loss": 0.8263,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.3558832320414467,
|
|
"grad_norm": 0.3754066448612361,
|
|
"learning_rate": 2.9126000247136162e-05,
|
|
"loss": 0.8549,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.35628175749726015,
|
|
"grad_norm": 0.37766294343524515,
|
|
"learning_rate": 2.910347606731315e-05,
|
|
"loss": 0.8642,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.35668028295307364,
|
|
"grad_norm": 0.3335713482308801,
|
|
"learning_rate": 2.9080937314337853e-05,
|
|
"loss": 0.8261,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.3570788084088871,
|
|
"grad_norm": 0.3586058859524884,
|
|
"learning_rate": 2.9058384024291064e-05,
|
|
"loss": 0.8299,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.3574773338647006,
|
|
"grad_norm": 0.35518778170798426,
|
|
"learning_rate": 2.9035816233276866e-05,
|
|
"loss": 0.8664,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.3578758593205141,
|
|
"grad_norm": 0.3226292379642851,
|
|
"learning_rate": 2.901323397742253e-05,
|
|
"loss": 0.8176,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.3582743847763276,
|
|
"grad_norm": 0.2963818087079733,
|
|
"learning_rate": 2.8990637292878495e-05,
|
|
"loss": 0.8379,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.3586729102321411,
|
|
"grad_norm": 0.330128684962309,
|
|
"learning_rate": 2.896802621581831e-05,
|
|
"loss": 0.8069,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.3590714356879546,
|
|
"grad_norm": 0.30550512523931456,
|
|
"learning_rate": 2.8945400782438536e-05,
|
|
"loss": 0.8098,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.35946996114376806,
|
|
"grad_norm": 0.3225722537828969,
|
|
"learning_rate": 2.8922761028958735e-05,
|
|
"loss": 0.8256,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.35986848659958154,
|
|
"grad_norm": 0.32436626447460576,
|
|
"learning_rate": 2.89001069916214e-05,
|
|
"loss": 0.8697,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.360267012055395,
|
|
"grad_norm": 0.3248090965744356,
|
|
"learning_rate": 2.8877438706691876e-05,
|
|
"loss": 0.7905,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.36066553751120856,
|
|
"grad_norm": 0.3423557906931257,
|
|
"learning_rate": 2.8854756210458305e-05,
|
|
"loss": 0.808,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.36106406296702204,
|
|
"grad_norm": 0.3533066672835484,
|
|
"learning_rate": 2.8832059539231612e-05,
|
|
"loss": 0.8158,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.3614625884228355,
|
|
"grad_norm": 0.3274286434791991,
|
|
"learning_rate": 2.88093487293454e-05,
|
|
"loss": 0.7964,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.361861113878649,
|
|
"grad_norm": 0.3549517407326649,
|
|
"learning_rate": 2.8786623817155875e-05,
|
|
"loss": 0.8459,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.3622596393344625,
|
|
"grad_norm": 0.3179414770046732,
|
|
"learning_rate": 2.8763884839041876e-05,
|
|
"loss": 0.8141,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.36265816479027596,
|
|
"grad_norm": 0.34921190558386694,
|
|
"learning_rate": 2.87411318314047e-05,
|
|
"loss": 0.8319,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.36305669024608944,
|
|
"grad_norm": 0.46547909862633313,
|
|
"learning_rate": 2.8718364830668153e-05,
|
|
"loss": 0.8386,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.363455215701903,
|
|
"grad_norm": 0.3362430896899564,
|
|
"learning_rate": 2.8695583873278402e-05,
|
|
"loss": 0.8087,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.36385374115771646,
|
|
"grad_norm": 0.3421880254638392,
|
|
"learning_rate": 2.8672788995703985e-05,
|
|
"loss": 0.8288,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.36425226661352994,
|
|
"grad_norm": 0.33774819740594564,
|
|
"learning_rate": 2.864998023443571e-05,
|
|
"loss": 0.8284,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.3646507920693434,
|
|
"grad_norm": 0.32177729327477683,
|
|
"learning_rate": 2.862715762598662e-05,
|
|
"loss": 0.8086,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.3650493175251569,
|
|
"grad_norm": 0.31718396437386565,
|
|
"learning_rate": 2.8604321206891904e-05,
|
|
"loss": 0.8077,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.3654478429809704,
|
|
"grad_norm": 0.3078535072758799,
|
|
"learning_rate": 2.858147101370888e-05,
|
|
"loss": 0.815,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.3658463684367839,
|
|
"grad_norm": 0.3251261011534896,
|
|
"learning_rate": 2.855860708301692e-05,
|
|
"loss": 0.8154,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.3662448938925974,
|
|
"grad_norm": 0.32646080328089405,
|
|
"learning_rate": 2.8535729451417354e-05,
|
|
"loss": 0.8495,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.3666434193484109,
|
|
"grad_norm": 0.32013473579432894,
|
|
"learning_rate": 2.851283815553349e-05,
|
|
"loss": 0.8257,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.36704194480422436,
|
|
"grad_norm": 0.3404460262778686,
|
|
"learning_rate": 2.8489933232010486e-05,
|
|
"loss": 0.8274,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.36744047026003784,
|
|
"grad_norm": 0.3179214806128248,
|
|
"learning_rate": 2.8467014717515303e-05,
|
|
"loss": 0.8221,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.3678389957158513,
|
|
"grad_norm": 0.3686956431219607,
|
|
"learning_rate": 2.8444082648736695e-05,
|
|
"loss": 0.8577,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.36823752117166486,
|
|
"grad_norm": 0.3319571070853765,
|
|
"learning_rate": 2.8421137062385077e-05,
|
|
"loss": 0.8472,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.36863604662747834,
|
|
"grad_norm": 0.33391728985772273,
|
|
"learning_rate": 2.839817799519252e-05,
|
|
"loss": 0.8407,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.3690345720832918,
|
|
"grad_norm": 0.36377333064615536,
|
|
"learning_rate": 2.8375205483912683e-05,
|
|
"loss": 0.8062,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.3694330975391053,
|
|
"grad_norm": 0.3192797421529141,
|
|
"learning_rate": 2.8352219565320734e-05,
|
|
"loss": 0.8198,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.3698316229949188,
|
|
"grad_norm": 0.34072810185050395,
|
|
"learning_rate": 2.8329220276213312e-05,
|
|
"loss": 0.8553,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.37023014845073227,
|
|
"grad_norm": 0.3510179405385589,
|
|
"learning_rate": 2.8306207653408452e-05,
|
|
"loss": 0.803,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.3706286739065458,
|
|
"grad_norm": 0.33046352991412514,
|
|
"learning_rate": 2.8283181733745545e-05,
|
|
"loss": 0.8196,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.3710271993623593,
|
|
"grad_norm": 0.3296330314721836,
|
|
"learning_rate": 2.826014255408525e-05,
|
|
"loss": 0.8113,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.37142572481817276,
|
|
"grad_norm": 0.32819051407453925,
|
|
"learning_rate": 2.823709015130948e-05,
|
|
"loss": 0.8363,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.37182425027398625,
|
|
"grad_norm": 0.32244270165621963,
|
|
"learning_rate": 2.8214024562321288e-05,
|
|
"loss": 0.8159,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.3722227757297997,
|
|
"grad_norm": 0.33554287954574435,
|
|
"learning_rate": 2.8190945824044854e-05,
|
|
"loss": 0.8275,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.3726213011856132,
|
|
"grad_norm": 0.31619676372667777,
|
|
"learning_rate": 2.8167853973425408e-05,
|
|
"loss": 0.8237,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.37301982664142674,
|
|
"grad_norm": 0.3145096541701049,
|
|
"learning_rate": 2.8144749047429155e-05,
|
|
"loss": 0.8112,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.3734183520972402,
|
|
"grad_norm": 0.3733084988221381,
|
|
"learning_rate": 2.812163108304325e-05,
|
|
"loss": 0.8492,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.3738168775530537,
|
|
"grad_norm": 0.3271910427372345,
|
|
"learning_rate": 2.8098500117275708e-05,
|
|
"loss": 0.8409,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.3742154030088672,
|
|
"grad_norm": 0.3506373095855538,
|
|
"learning_rate": 2.8075356187155357e-05,
|
|
"loss": 0.8255,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.37461392846468067,
|
|
"grad_norm": 0.3523796388032185,
|
|
"learning_rate": 2.805219932973179e-05,
|
|
"loss": 0.8198,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.37501245392049415,
|
|
"grad_norm": 0.31630826125781786,
|
|
"learning_rate": 2.8029029582075286e-05,
|
|
"loss": 0.8279,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.3754109793763077,
|
|
"grad_norm": 0.31383140189055664,
|
|
"learning_rate": 2.8005846981276758e-05,
|
|
"loss": 0.84,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.37580950483212117,
|
|
"grad_norm": 0.3308152244077927,
|
|
"learning_rate": 2.79826515644477e-05,
|
|
"loss": 0.8551,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.37620803028793465,
|
|
"grad_norm": 0.3183707047927005,
|
|
"learning_rate": 2.795944336872012e-05,
|
|
"loss": 0.835,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.37660655574374813,
|
|
"grad_norm": 0.34065129082815276,
|
|
"learning_rate": 2.7936222431246478e-05,
|
|
"loss": 0.8194,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.3770050811995616,
|
|
"grad_norm": 0.33055758193564483,
|
|
"learning_rate": 2.791298878919964e-05,
|
|
"loss": 0.8295,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.3774036066553751,
|
|
"grad_norm": 0.3178548706287361,
|
|
"learning_rate": 2.7889742479772793e-05,
|
|
"loss": 0.8487,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.3778021321111886,
|
|
"grad_norm": 0.34056866287653254,
|
|
"learning_rate": 2.7866483540179438e-05,
|
|
"loss": 0.822,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.3782006575670021,
|
|
"grad_norm": 0.3530872392015572,
|
|
"learning_rate": 2.784321200765326e-05,
|
|
"loss": 0.7945,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.3785991830228156,
|
|
"grad_norm": 0.34823844388780467,
|
|
"learning_rate": 2.781992791944811e-05,
|
|
"loss": 0.8343,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.37899770847862907,
|
|
"grad_norm": 0.32473433019889203,
|
|
"learning_rate": 2.779663131283795e-05,
|
|
"loss": 0.7889,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.37939623393444255,
|
|
"grad_norm": 0.3440773152101907,
|
|
"learning_rate": 2.7773322225116774e-05,
|
|
"loss": 0.8085,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.37979475939025603,
|
|
"grad_norm": 0.3136356275301238,
|
|
"learning_rate": 2.7750000693598557e-05,
|
|
"loss": 0.7984,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.38019328484606957,
|
|
"grad_norm": 0.36010994273938446,
|
|
"learning_rate": 2.7726666755617198e-05,
|
|
"loss": 0.8176,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.38059181030188305,
|
|
"grad_norm": 0.4042048335792527,
|
|
"learning_rate": 2.770332044852645e-05,
|
|
"loss": 0.8298,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.38099033575769653,
|
|
"grad_norm": 0.33696767739158523,
|
|
"learning_rate": 2.7679961809699878e-05,
|
|
"loss": 0.7998,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.38138886121351,
|
|
"grad_norm": 0.32263411827838845,
|
|
"learning_rate": 2.765659087653077e-05,
|
|
"loss": 0.8234,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.3817873866693235,
|
|
"grad_norm": 0.3199567939883172,
|
|
"learning_rate": 2.7633207686432113e-05,
|
|
"loss": 0.8108,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.382185912125137,
|
|
"grad_norm": 0.33168910588991024,
|
|
"learning_rate": 2.760981227683651e-05,
|
|
"loss": 0.8313,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.3825844375809505,
|
|
"grad_norm": 0.3238687202666879,
|
|
"learning_rate": 2.758640468519611e-05,
|
|
"loss": 0.8321,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.382982963036764,
|
|
"grad_norm": 0.3478685120540082,
|
|
"learning_rate": 2.7562984948982595e-05,
|
|
"loss": 0.824,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.38338148849257747,
|
|
"grad_norm": 0.4127997530905888,
|
|
"learning_rate": 2.7539553105687063e-05,
|
|
"loss": 0.8061,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.38378001394839095,
|
|
"grad_norm": 0.3571852104724218,
|
|
"learning_rate": 2.7516109192820003e-05,
|
|
"loss": 0.8401,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.38417853940420443,
|
|
"grad_norm": 0.33227253978050236,
|
|
"learning_rate": 2.749265324791122e-05,
|
|
"loss": 0.8522,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.3845770648600179,
|
|
"grad_norm": 0.5247271121688866,
|
|
"learning_rate": 2.7469185308509786e-05,
|
|
"loss": 0.8134,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.38497559031583145,
|
|
"grad_norm": 0.3470222523911159,
|
|
"learning_rate": 2.744570541218397e-05,
|
|
"loss": 0.7991,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.38537411577164493,
|
|
"grad_norm": 0.34151142631527753,
|
|
"learning_rate": 2.7422213596521183e-05,
|
|
"loss": 0.8467,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.3857726412274584,
|
|
"grad_norm": 0.519889333298418,
|
|
"learning_rate": 2.7398709899127927e-05,
|
|
"loss": 0.8306,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.3861711666832719,
|
|
"grad_norm": 0.3258609895102337,
|
|
"learning_rate": 2.7375194357629696e-05,
|
|
"loss": 0.7873,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.3865696921390854,
|
|
"grad_norm": 0.4295037852575729,
|
|
"learning_rate": 2.7351667009670993e-05,
|
|
"loss": 0.8403,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.38696821759489886,
|
|
"grad_norm": 0.36998924298526037,
|
|
"learning_rate": 2.732812789291516e-05,
|
|
"loss": 0.8075,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.38736674305071234,
|
|
"grad_norm": 0.32705437276780996,
|
|
"learning_rate": 2.7304577045044433e-05,
|
|
"loss": 0.8282,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.3877652685065259,
|
|
"grad_norm": 0.3340699092845928,
|
|
"learning_rate": 2.72810145037598e-05,
|
|
"loss": 0.7963,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.38816379396233935,
|
|
"grad_norm": 0.3503260696592739,
|
|
"learning_rate": 2.7257440306780968e-05,
|
|
"loss": 0.8606,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.38856231941815284,
|
|
"grad_norm": 0.3459980112053063,
|
|
"learning_rate": 2.7233854491846314e-05,
|
|
"loss": 0.7951,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.3889608448739663,
|
|
"grad_norm": 0.319254119951506,
|
|
"learning_rate": 2.721025709671281e-05,
|
|
"loss": 0.8032,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.3893593703297798,
|
|
"grad_norm": 0.4897236117125459,
|
|
"learning_rate": 2.7186648159155962e-05,
|
|
"loss": 0.8315,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.3897578957855933,
|
|
"grad_norm": 0.3087529107037527,
|
|
"learning_rate": 2.7163027716969755e-05,
|
|
"loss": 0.8117,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.3901564212414068,
|
|
"grad_norm": 0.3275439817021243,
|
|
"learning_rate": 2.7139395807966588e-05,
|
|
"loss": 0.8346,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.3905549466972203,
|
|
"grad_norm": 0.3083375926780146,
|
|
"learning_rate": 2.7115752469977224e-05,
|
|
"loss": 0.8136,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.3909534721530338,
|
|
"grad_norm": 0.3069416211569783,
|
|
"learning_rate": 2.7092097740850712e-05,
|
|
"loss": 0.8213,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.39135199760884726,
|
|
"grad_norm": 0.3138396694972504,
|
|
"learning_rate": 2.7068431658454355e-05,
|
|
"loss": 0.8405,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.39175052306466074,
|
|
"grad_norm": 0.3236054977163557,
|
|
"learning_rate": 2.7044754260673607e-05,
|
|
"loss": 0.8085,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.3921490485204742,
|
|
"grad_norm": 0.31483246013918365,
|
|
"learning_rate": 2.702106558541205e-05,
|
|
"loss": 0.8244,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.39254757397628776,
|
|
"grad_norm": 0.3541307522351268,
|
|
"learning_rate": 2.699736567059132e-05,
|
|
"loss": 0.8002,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.39294609943210124,
|
|
"grad_norm": 0.32364536612849215,
|
|
"learning_rate": 2.6973654554151028e-05,
|
|
"loss": 0.8198,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.3933446248879147,
|
|
"grad_norm": 0.33398363230386113,
|
|
"learning_rate": 2.694993227404875e-05,
|
|
"loss": 0.8393,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.3937431503437282,
|
|
"grad_norm": 0.349530991319565,
|
|
"learning_rate": 2.69261988682599e-05,
|
|
"loss": 0.821,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.3941416757995417,
|
|
"grad_norm": 0.3679139832318692,
|
|
"learning_rate": 2.690245437477772e-05,
|
|
"loss": 0.815,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.39454020125535516,
|
|
"grad_norm": 0.31671482584430505,
|
|
"learning_rate": 2.6878698831613202e-05,
|
|
"loss": 0.8636,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.3949387267111687,
|
|
"grad_norm": 0.3452241320073205,
|
|
"learning_rate": 2.6854932276795026e-05,
|
|
"loss": 0.8111,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.3953372521669822,
|
|
"grad_norm": 0.3018394208024079,
|
|
"learning_rate": 2.6831154748369485e-05,
|
|
"loss": 0.8273,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.39573577762279566,
|
|
"grad_norm": 0.322632592726802,
|
|
"learning_rate": 2.6807366284400457e-05,
|
|
"loss": 0.8038,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.39613430307860914,
|
|
"grad_norm": 0.34241476868414766,
|
|
"learning_rate": 2.6783566922969318e-05,
|
|
"loss": 0.8158,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.3965328285344226,
|
|
"grad_norm": 0.35584217533454204,
|
|
"learning_rate": 2.675975670217489e-05,
|
|
"loss": 0.83,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.3969313539902361,
|
|
"grad_norm": 0.29169575061351766,
|
|
"learning_rate": 2.673593566013338e-05,
|
|
"loss": 0.8124,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.39732987944604964,
|
|
"grad_norm": 0.3705964533467081,
|
|
"learning_rate": 2.671210383497832e-05,
|
|
"loss": 0.8304,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.3977284049018631,
|
|
"grad_norm": 0.33331825287941125,
|
|
"learning_rate": 2.66882612648605e-05,
|
|
"loss": 0.8232,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.3981269303576766,
|
|
"grad_norm": 0.3379785793208752,
|
|
"learning_rate": 2.666440798794791e-05,
|
|
"loss": 0.8113,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.3985254558134901,
|
|
"grad_norm": 0.47824925692484593,
|
|
"learning_rate": 2.6640544042425685e-05,
|
|
"loss": 0.8411,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.39892398126930356,
|
|
"grad_norm": 0.33431552475555065,
|
|
"learning_rate": 2.6616669466496037e-05,
|
|
"loss": 0.8468,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.39932250672511704,
|
|
"grad_norm": 0.34137387833760563,
|
|
"learning_rate": 2.6592784298378188e-05,
|
|
"loss": 0.8418,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.3997210321809306,
|
|
"grad_norm": 0.39365755246331835,
|
|
"learning_rate": 2.656888857630833e-05,
|
|
"loss": 0.8224,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.40011955763674406,
|
|
"grad_norm": 0.49873669187777425,
|
|
"learning_rate": 2.654498233853954e-05,
|
|
"loss": 0.808,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.40051808309255754,
|
|
"grad_norm": 0.3248564941543554,
|
|
"learning_rate": 2.652106562334173e-05,
|
|
"loss": 0.8139,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.400916608548371,
|
|
"grad_norm": 0.37674381602697304,
|
|
"learning_rate": 2.649713846900159e-05,
|
|
"loss": 0.8295,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.4013151340041845,
|
|
"grad_norm": 0.39334509517210275,
|
|
"learning_rate": 2.6473200913822514e-05,
|
|
"loss": 0.8131,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.401713659459998,
|
|
"grad_norm": 0.3224088785864611,
|
|
"learning_rate": 2.644925299612455e-05,
|
|
"loss": 0.7975,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.4021121849158115,
|
|
"grad_norm": 0.4490995597319525,
|
|
"learning_rate": 2.642529475424433e-05,
|
|
"loss": 0.8337,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.402510710371625,
|
|
"grad_norm": 0.34775476784856935,
|
|
"learning_rate": 2.6401326226535037e-05,
|
|
"loss": 0.81,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.4029092358274385,
|
|
"grad_norm": 0.3424684175535075,
|
|
"learning_rate": 2.6377347451366278e-05,
|
|
"loss": 0.7884,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.40330776128325196,
|
|
"grad_norm": 0.32706343083018596,
|
|
"learning_rate": 2.6353358467124094e-05,
|
|
"loss": 0.8105,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.40370628673906545,
|
|
"grad_norm": 0.3531111527591312,
|
|
"learning_rate": 2.632935931221087e-05,
|
|
"loss": 0.8524,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.4041048121948789,
|
|
"grad_norm": 0.3134079131717474,
|
|
"learning_rate": 2.6305350025045257e-05,
|
|
"loss": 0.8258,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.40450333765069246,
|
|
"grad_norm": 0.3041258254708691,
|
|
"learning_rate": 2.6281330644062126e-05,
|
|
"loss": 0.8363,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.40490186310650594,
|
|
"grad_norm": 0.34271831802902314,
|
|
"learning_rate": 2.6257301207712536e-05,
|
|
"loss": 0.8045,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.4053003885623194,
|
|
"grad_norm": 0.31087347936442256,
|
|
"learning_rate": 2.6233261754463605e-05,
|
|
"loss": 0.8331,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.4056989140181329,
|
|
"grad_norm": 0.32439571159756025,
|
|
"learning_rate": 2.62092123227985e-05,
|
|
"loss": 0.839,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.4060974394739464,
|
|
"grad_norm": 0.3077347841509726,
|
|
"learning_rate": 2.6185152951216373e-05,
|
|
"loss": 0.8078,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.40649596492975987,
|
|
"grad_norm": 0.32342264487059186,
|
|
"learning_rate": 2.6161083678232277e-05,
|
|
"loss": 0.8101,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.4068944903855734,
|
|
"grad_norm": 0.4192377215503443,
|
|
"learning_rate": 2.6137004542377122e-05,
|
|
"loss": 0.8333,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.4072930158413869,
|
|
"grad_norm": 0.3193163344884458,
|
|
"learning_rate": 2.611291558219759e-05,
|
|
"loss": 0.8177,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.40769154129720037,
|
|
"grad_norm": 0.36366689035128674,
|
|
"learning_rate": 2.608881683625612e-05,
|
|
"loss": 0.8339,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.40809006675301385,
|
|
"grad_norm": 0.2956336562200817,
|
|
"learning_rate": 2.6064708343130787e-05,
|
|
"loss": 0.8344,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.40848859220882733,
|
|
"grad_norm": 0.35391087494148843,
|
|
"learning_rate": 2.604059014141529e-05,
|
|
"loss": 0.8243,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.4088871176646408,
|
|
"grad_norm": 0.3753489106825966,
|
|
"learning_rate": 2.601646226971885e-05,
|
|
"loss": 0.816,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.40928564312045435,
|
|
"grad_norm": 0.33358896662610243,
|
|
"learning_rate": 2.5992324766666194e-05,
|
|
"loss": 0.8168,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.4096841685762678,
|
|
"grad_norm": 0.3494626801983563,
|
|
"learning_rate": 2.5968177670897447e-05,
|
|
"loss": 0.8158,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.4100826940320813,
|
|
"grad_norm": 0.3311602416729186,
|
|
"learning_rate": 2.5944021021068086e-05,
|
|
"loss": 0.8289,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.4104812194878948,
|
|
"grad_norm": 0.32920620411123275,
|
|
"learning_rate": 2.591985485584891e-05,
|
|
"loss": 0.8462,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.41087974494370827,
|
|
"grad_norm": 0.33365179190960775,
|
|
"learning_rate": 2.589567921392593e-05,
|
|
"loss": 0.8316,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.41127827039952175,
|
|
"grad_norm": 0.2840477218269186,
|
|
"learning_rate": 2.587149413400032e-05,
|
|
"loss": 0.8243,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.4116767958553353,
|
|
"grad_norm": 0.3094870981520638,
|
|
"learning_rate": 2.5847299654788384e-05,
|
|
"loss": 0.8302,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.41207532131114877,
|
|
"grad_norm": 0.33160788932455293,
|
|
"learning_rate": 2.5823095815021458e-05,
|
|
"loss": 0.8047,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.41247384676696225,
|
|
"grad_norm": 0.3296215696895382,
|
|
"learning_rate": 2.579888265344586e-05,
|
|
"loss": 0.8408,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.41287237222277573,
|
|
"grad_norm": 0.31027823213043904,
|
|
"learning_rate": 2.5774660208822854e-05,
|
|
"loss": 0.797,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.4132708976785892,
|
|
"grad_norm": 0.3233755627458931,
|
|
"learning_rate": 2.5750428519928542e-05,
|
|
"loss": 0.8437,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.4136694231344027,
|
|
"grad_norm": 0.3618514564925971,
|
|
"learning_rate": 2.572618762555382e-05,
|
|
"loss": 0.8202,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.4140679485902162,
|
|
"grad_norm": 0.33907339886292404,
|
|
"learning_rate": 2.5701937564504345e-05,
|
|
"loss": 0.8199,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.4144664740460297,
|
|
"grad_norm": 0.3068383167662696,
|
|
"learning_rate": 2.5677678375600436e-05,
|
|
"loss": 0.8301,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.4148649995018432,
|
|
"grad_norm": 0.3790622200712186,
|
|
"learning_rate": 2.565341009767701e-05,
|
|
"loss": 0.8171,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.41526352495765667,
|
|
"grad_norm": 0.3433456715007725,
|
|
"learning_rate": 2.562913276958355e-05,
|
|
"loss": 0.8431,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.41566205041347015,
|
|
"grad_norm": 0.31629971388025424,
|
|
"learning_rate": 2.5604846430184034e-05,
|
|
"loss": 0.8188,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.41606057586928363,
|
|
"grad_norm": 0.36903895294398353,
|
|
"learning_rate": 2.5580551118356842e-05,
|
|
"loss": 0.7884,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.4164591013250971,
|
|
"grad_norm": 0.3504976082604236,
|
|
"learning_rate": 2.5556246872994744e-05,
|
|
"loss": 0.8139,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.41685762678091065,
|
|
"grad_norm": 0.32035221174765094,
|
|
"learning_rate": 2.5531933733004785e-05,
|
|
"loss": 0.8017,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.41725615223672413,
|
|
"grad_norm": 0.3394515589088212,
|
|
"learning_rate": 2.550761173730827e-05,
|
|
"loss": 0.8029,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.4176546776925376,
|
|
"grad_norm": 0.35804587588727005,
|
|
"learning_rate": 2.548328092484067e-05,
|
|
"loss": 0.8015,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.4180532031483511,
|
|
"grad_norm": 0.33858523464707274,
|
|
"learning_rate": 2.5458941334551566e-05,
|
|
"loss": 0.801,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.4184517286041646,
|
|
"grad_norm": 0.3288133650068113,
|
|
"learning_rate": 2.5434593005404605e-05,
|
|
"loss": 0.8036,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.41885025405997806,
|
|
"grad_norm": 0.3424539726833037,
|
|
"learning_rate": 2.5410235976377418e-05,
|
|
"loss": 0.8028,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.4192487795157916,
|
|
"grad_norm": 0.3023013418013977,
|
|
"learning_rate": 2.5385870286461547e-05,
|
|
"loss": 0.8513,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.4196473049716051,
|
|
"grad_norm": 0.34500936623066886,
|
|
"learning_rate": 2.536149597466243e-05,
|
|
"loss": 0.8254,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.42004583042741855,
|
|
"grad_norm": 0.31922631055010225,
|
|
"learning_rate": 2.5337113079999278e-05,
|
|
"loss": 0.8363,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.42044435588323203,
|
|
"grad_norm": 1.3109348539871832,
|
|
"learning_rate": 2.5312721641505054e-05,
|
|
"loss": 0.8507,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.4208428813390455,
|
|
"grad_norm": 0.32480191303613704,
|
|
"learning_rate": 2.5288321698226393e-05,
|
|
"loss": 0.8271,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.421241406794859,
|
|
"grad_norm": 0.37122122754776027,
|
|
"learning_rate": 2.5263913289223567e-05,
|
|
"loss": 0.8461,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.42163993225067253,
|
|
"grad_norm": 0.3268123530148818,
|
|
"learning_rate": 2.523949645357036e-05,
|
|
"loss": 0.8081,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.422038457706486,
|
|
"grad_norm": 0.3751401095220027,
|
|
"learning_rate": 2.5215071230354085e-05,
|
|
"loss": 0.7995,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.4224369831622995,
|
|
"grad_norm": 0.3784425259279124,
|
|
"learning_rate": 2.519063765867546e-05,
|
|
"loss": 0.8189,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.422835508618113,
|
|
"grad_norm": 0.3433963567838051,
|
|
"learning_rate": 2.5166195777648565e-05,
|
|
"loss": 0.8306,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.42323403407392646,
|
|
"grad_norm": 0.3566697956385714,
|
|
"learning_rate": 2.5141745626400804e-05,
|
|
"loss": 0.8073,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.42363255952973994,
|
|
"grad_norm": 0.3406773772854413,
|
|
"learning_rate": 2.511728724407279e-05,
|
|
"loss": 0.8126,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.4240310849855535,
|
|
"grad_norm": 0.3227569499796658,
|
|
"learning_rate": 2.509282066981834e-05,
|
|
"loss": 0.8547,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.42442961044136696,
|
|
"grad_norm": 0.43322555481131175,
|
|
"learning_rate": 2.5068345942804372e-05,
|
|
"loss": 0.8056,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.42482813589718044,
|
|
"grad_norm": 0.3401664677873486,
|
|
"learning_rate": 2.5043863102210854e-05,
|
|
"loss": 0.8301,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.4252266613529939,
|
|
"grad_norm": 0.3308251533254951,
|
|
"learning_rate": 2.5019372187230734e-05,
|
|
"loss": 0.8109,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.4256251868088074,
|
|
"grad_norm": 0.3369938034523319,
|
|
"learning_rate": 2.4994873237069922e-05,
|
|
"loss": 0.8198,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.4260237122646209,
|
|
"grad_norm": 0.3280216989154936,
|
|
"learning_rate": 2.4970366290947145e-05,
|
|
"loss": 0.8119,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.4264222377204344,
|
|
"grad_norm": 0.335124086686642,
|
|
"learning_rate": 2.4945851388093953e-05,
|
|
"loss": 0.8111,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.4268207631762479,
|
|
"grad_norm": 0.32998466833884404,
|
|
"learning_rate": 2.4921328567754643e-05,
|
|
"loss": 0.7979,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.4272192886320614,
|
|
"grad_norm": 0.3245876352671091,
|
|
"learning_rate": 2.489679786918617e-05,
|
|
"loss": 0.8341,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.42761781408787486,
|
|
"grad_norm": 0.339066366216921,
|
|
"learning_rate": 2.4872259331658092e-05,
|
|
"loss": 0.8412,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.42801633954368834,
|
|
"grad_norm": 0.3632142337136183,
|
|
"learning_rate": 2.4847712994452552e-05,
|
|
"loss": 0.8287,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.4284148649995018,
|
|
"grad_norm": 0.31666688457965547,
|
|
"learning_rate": 2.4823158896864138e-05,
|
|
"loss": 0.8108,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.42881339045531536,
|
|
"grad_norm": 0.33156115315753226,
|
|
"learning_rate": 2.479859707819989e-05,
|
|
"loss": 0.8115,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.42921191591112884,
|
|
"grad_norm": 0.3411054033949336,
|
|
"learning_rate": 2.47740275777792e-05,
|
|
"loss": 0.8132,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.4296104413669423,
|
|
"grad_norm": 0.3402952332409344,
|
|
"learning_rate": 2.4749450434933743e-05,
|
|
"loss": 0.8076,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.4300089668227558,
|
|
"grad_norm": 0.3223187993665448,
|
|
"learning_rate": 2.472486568900745e-05,
|
|
"loss": 0.8426,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.4304074922785693,
|
|
"grad_norm": 0.31498257951573805,
|
|
"learning_rate": 2.470027337935641e-05,
|
|
"loss": 0.8166,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.43080601773438276,
|
|
"grad_norm": 0.31243598520804755,
|
|
"learning_rate": 2.4675673545348825e-05,
|
|
"loss": 0.8295,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.4312045431901963,
|
|
"grad_norm": 0.3538419858260921,
|
|
"learning_rate": 2.4651066226364943e-05,
|
|
"loss": 0.8293,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.4316030686460098,
|
|
"grad_norm": 0.31794472665083506,
|
|
"learning_rate": 2.462645146179698e-05,
|
|
"loss": 0.8099,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.43200159410182326,
|
|
"grad_norm": 0.31784933887583533,
|
|
"learning_rate": 2.4601829291049098e-05,
|
|
"loss": 0.7962,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.43240011955763674,
|
|
"grad_norm": 0.42026330060809836,
|
|
"learning_rate": 2.45771997535373e-05,
|
|
"loss": 0.816,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.4327986450134502,
|
|
"grad_norm": 0.33452336084693307,
|
|
"learning_rate": 2.4552562888689376e-05,
|
|
"loss": 0.8075,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.4331971704692637,
|
|
"grad_norm": 0.322413780248328,
|
|
"learning_rate": 2.4527918735944853e-05,
|
|
"loss": 0.7956,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.43359569592507724,
|
|
"grad_norm": 0.32866939422553315,
|
|
"learning_rate": 2.4503267334754925e-05,
|
|
"loss": 0.8368,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.4339942213808907,
|
|
"grad_norm": 0.31934456546936785,
|
|
"learning_rate": 2.447860872458239e-05,
|
|
"loss": 0.8438,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.4343927468367042,
|
|
"grad_norm": 0.32490029875471044,
|
|
"learning_rate": 2.4453942944901575e-05,
|
|
"loss": 0.8056,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.4347912722925177,
|
|
"grad_norm": 0.32929458543358014,
|
|
"learning_rate": 2.4429270035198313e-05,
|
|
"loss": 0.8037,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.43518979774833116,
|
|
"grad_norm": 0.32506473231877164,
|
|
"learning_rate": 2.4404590034969822e-05,
|
|
"loss": 0.8113,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.43558832320414465,
|
|
"grad_norm": 0.29212134247678295,
|
|
"learning_rate": 2.437990298372467e-05,
|
|
"loss": 0.8005,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.4359868486599582,
|
|
"grad_norm": 0.3455754520750264,
|
|
"learning_rate": 2.4355208920982744e-05,
|
|
"loss": 0.7994,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.43638537411577166,
|
|
"grad_norm": 0.30065376764152013,
|
|
"learning_rate": 2.4330507886275122e-05,
|
|
"loss": 0.8164,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.43678389957158514,
|
|
"grad_norm": 0.32131061866768784,
|
|
"learning_rate": 2.4305799919144055e-05,
|
|
"loss": 0.8316,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.4371824250273986,
|
|
"grad_norm": 0.3311309667775356,
|
|
"learning_rate": 2.4281085059142892e-05,
|
|
"loss": 0.8194,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.4375809504832121,
|
|
"grad_norm": 0.32898839768451466,
|
|
"learning_rate": 2.4256363345836026e-05,
|
|
"loss": 0.8321,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.4379794759390256,
|
|
"grad_norm": 0.3065918498699849,
|
|
"learning_rate": 2.4231634818798798e-05,
|
|
"loss": 0.7826,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.4383780013948391,
|
|
"grad_norm": 0.35465547671253245,
|
|
"learning_rate": 2.4206899517617485e-05,
|
|
"loss": 0.8267,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.4387765268506526,
|
|
"grad_norm": 0.3301349903148197,
|
|
"learning_rate": 2.4182157481889183e-05,
|
|
"loss": 0.8022,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.4391750523064661,
|
|
"grad_norm": 0.316437289769763,
|
|
"learning_rate": 2.415740875122178e-05,
|
|
"loss": 0.8036,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.43957357776227957,
|
|
"grad_norm": 0.332243171121802,
|
|
"learning_rate": 2.413265336523389e-05,
|
|
"loss": 0.8352,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.43997210321809305,
|
|
"grad_norm": 0.5376924415941126,
|
|
"learning_rate": 2.4107891363554753e-05,
|
|
"loss": 0.8306,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.44037062867390653,
|
|
"grad_norm": 0.303147057063706,
|
|
"learning_rate": 2.4083122785824236e-05,
|
|
"loss": 0.7916,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.44076915412972,
|
|
"grad_norm": 0.34716257230796316,
|
|
"learning_rate": 2.405834767169271e-05,
|
|
"loss": 0.7974,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.44116767958553355,
|
|
"grad_norm": 0.3205567864972624,
|
|
"learning_rate": 2.403356606082101e-05,
|
|
"loss": 0.8002,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.441566205041347,
|
|
"grad_norm": 0.29598982127864676,
|
|
"learning_rate": 2.400877799288039e-05,
|
|
"loss": 0.8077,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.4419647304971605,
|
|
"grad_norm": 0.3707790401289273,
|
|
"learning_rate": 2.398398350755242e-05,
|
|
"loss": 0.8119,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 0.442363255952974,
|
|
"grad_norm": 0.35724626182329483,
|
|
"learning_rate": 2.3959182644528945e-05,
|
|
"loss": 0.8117,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.44276178140878747,
|
|
"grad_norm": 0.3194532912667194,
|
|
"learning_rate": 2.3934375443512025e-05,
|
|
"loss": 0.8052,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 0.44316030686460095,
|
|
"grad_norm": 0.3897881316911469,
|
|
"learning_rate": 2.3909561944213876e-05,
|
|
"loss": 0.8188,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.4435588323204145,
|
|
"grad_norm": 0.31474565450210384,
|
|
"learning_rate": 2.3884742186356783e-05,
|
|
"loss": 0.8301,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 0.44395735777622797,
|
|
"grad_norm": 0.34893912043486475,
|
|
"learning_rate": 2.385991620967305e-05,
|
|
"loss": 0.7822,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.44435588323204145,
|
|
"grad_norm": 0.34444018169025264,
|
|
"learning_rate": 2.383508405390494e-05,
|
|
"loss": 0.8036,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.44475440868785493,
|
|
"grad_norm": 0.3209220544042362,
|
|
"learning_rate": 2.3810245758804614e-05,
|
|
"loss": 0.7959,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.4451529341436684,
|
|
"grad_norm": 0.3597044151663452,
|
|
"learning_rate": 2.378540136413405e-05,
|
|
"loss": 0.8029,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 0.4455514595994819,
|
|
"grad_norm": 0.5678063532761977,
|
|
"learning_rate": 2.3760550909664987e-05,
|
|
"loss": 0.7966,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.44594998505529543,
|
|
"grad_norm": 0.3399480220411935,
|
|
"learning_rate": 2.373569443517888e-05,
|
|
"loss": 0.8075,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 0.4463485105111089,
|
|
"grad_norm": 0.30860916880522943,
|
|
"learning_rate": 2.3710831980466825e-05,
|
|
"loss": 0.816,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.4467470359669224,
|
|
"grad_norm": 0.30451406346046384,
|
|
"learning_rate": 2.368596358532947e-05,
|
|
"loss": 0.7821,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 0.44714556142273587,
|
|
"grad_norm": 0.3274342257348003,
|
|
"learning_rate": 2.3661089289576973e-05,
|
|
"loss": 0.8099,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.44754408687854935,
|
|
"grad_norm": 0.2990103230908009,
|
|
"learning_rate": 2.3636209133028957e-05,
|
|
"loss": 0.8438,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 0.44794261233436283,
|
|
"grad_norm": 0.33085965104050497,
|
|
"learning_rate": 2.361132315551442e-05,
|
|
"loss": 0.8148,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.44834113779017637,
|
|
"grad_norm": 0.3235378935161311,
|
|
"learning_rate": 2.3586431396871677e-05,
|
|
"loss": 0.816,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.44873966324598985,
|
|
"grad_norm": 0.30982112537132234,
|
|
"learning_rate": 2.3561533896948296e-05,
|
|
"loss": 0.8205,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.44913818870180333,
|
|
"grad_norm": 0.3148765787287355,
|
|
"learning_rate": 2.3536630695601027e-05,
|
|
"loss": 0.7902,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 0.4495367141576168,
|
|
"grad_norm": 0.3794802774217404,
|
|
"learning_rate": 2.3511721832695767e-05,
|
|
"loss": 0.8269,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.4499352396134303,
|
|
"grad_norm": 0.3284627503131426,
|
|
"learning_rate": 2.3486807348107464e-05,
|
|
"loss": 0.8597,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 0.4503337650692438,
|
|
"grad_norm": 0.31901034421618163,
|
|
"learning_rate": 2.3461887281720066e-05,
|
|
"loss": 0.8024,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.4507322905250573,
|
|
"grad_norm": 0.35755058361337694,
|
|
"learning_rate": 2.3436961673426456e-05,
|
|
"loss": 0.8201,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 0.4511308159808708,
|
|
"grad_norm": 0.37055788579790766,
|
|
"learning_rate": 2.3412030563128402e-05,
|
|
"loss": 0.8043,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.4515293414366843,
|
|
"grad_norm": 0.29135675861869104,
|
|
"learning_rate": 2.338709399073645e-05,
|
|
"loss": 0.8151,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 0.45192786689249775,
|
|
"grad_norm": 0.3342416376182507,
|
|
"learning_rate": 2.336215199616992e-05,
|
|
"loss": 0.8368,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.45232639234831123,
|
|
"grad_norm": 0.33393406000623976,
|
|
"learning_rate": 2.33372046193568e-05,
|
|
"loss": 0.8156,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.4527249178041247,
|
|
"grad_norm": 0.2962123245077335,
|
|
"learning_rate": 2.3312251900233687e-05,
|
|
"loss": 0.8133,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.45312344325993825,
|
|
"grad_norm": 0.3252453832873177,
|
|
"learning_rate": 2.3287293878745746e-05,
|
|
"loss": 0.8104,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 0.45352196871575173,
|
|
"grad_norm": 0.31101543033789,
|
|
"learning_rate": 2.3262330594846615e-05,
|
|
"loss": 0.8116,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.4539204941715652,
|
|
"grad_norm": 0.3142215269516538,
|
|
"learning_rate": 2.3237362088498366e-05,
|
|
"loss": 0.8312,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 0.4543190196273787,
|
|
"grad_norm": 0.3156466217062423,
|
|
"learning_rate": 2.3212388399671434e-05,
|
|
"loss": 0.8026,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.4547175450831922,
|
|
"grad_norm": 0.29130193805422705,
|
|
"learning_rate": 2.318740956834453e-05,
|
|
"loss": 0.8208,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 0.45511607053900566,
|
|
"grad_norm": 0.31609767343436057,
|
|
"learning_rate": 2.3162425634504624e-05,
|
|
"loss": 0.8048,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.4555145959948192,
|
|
"grad_norm": 0.30627780545918254,
|
|
"learning_rate": 2.3137436638146838e-05,
|
|
"loss": 0.8256,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 0.4559131214506327,
|
|
"grad_norm": 0.3942343869320896,
|
|
"learning_rate": 2.3112442619274408e-05,
|
|
"loss": 0.8231,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.45631164690644616,
|
|
"grad_norm": 0.30922816387497437,
|
|
"learning_rate": 2.3087443617898585e-05,
|
|
"loss": 0.8128,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.45671017236225964,
|
|
"grad_norm": 0.31257709643441933,
|
|
"learning_rate": 2.3062439674038643e-05,
|
|
"loss": 0.7816,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.4571086978180731,
|
|
"grad_norm": 0.3125099111968418,
|
|
"learning_rate": 2.3037430827721724e-05,
|
|
"loss": 0.8511,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 0.4575072232738866,
|
|
"grad_norm": 0.3259270287494568,
|
|
"learning_rate": 2.3012417118982833e-05,
|
|
"loss": 0.8078,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.45790574872970013,
|
|
"grad_norm": 0.4841424847659405,
|
|
"learning_rate": 2.298739858786477e-05,
|
|
"loss": 0.846,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 0.4583042741855136,
|
|
"grad_norm": 0.30651971893302865,
|
|
"learning_rate": 2.2962375274418042e-05,
|
|
"loss": 0.7836,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.4587027996413271,
|
|
"grad_norm": 0.29130109838002205,
|
|
"learning_rate": 2.2937347218700814e-05,
|
|
"loss": 0.8251,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 0.4591013250971406,
|
|
"grad_norm": 0.29216772346283687,
|
|
"learning_rate": 2.2912314460778838e-05,
|
|
"loss": 0.7934,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.45949985055295406,
|
|
"grad_norm": 0.28659925320048857,
|
|
"learning_rate": 2.2887277040725416e-05,
|
|
"loss": 0.8132,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 0.45989837600876754,
|
|
"grad_norm": 0.2821978280610863,
|
|
"learning_rate": 2.2862234998621276e-05,
|
|
"loss": 0.8018,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.4602969014645811,
|
|
"grad_norm": 0.3022683438134659,
|
|
"learning_rate": 2.2837188374554584e-05,
|
|
"loss": 0.8011,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.46069542692039456,
|
|
"grad_norm": 0.29620670062698495,
|
|
"learning_rate": 2.281213720862081e-05,
|
|
"loss": 0.7884,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.46109395237620804,
|
|
"grad_norm": 0.2804223684367047,
|
|
"learning_rate": 2.2787081540922716e-05,
|
|
"loss": 0.8016,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 0.4614924778320215,
|
|
"grad_norm": 0.30149704387252646,
|
|
"learning_rate": 2.2762021411570254e-05,
|
|
"loss": 0.8044,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.461891003287835,
|
|
"grad_norm": 0.28566950350769055,
|
|
"learning_rate": 2.273695686068053e-05,
|
|
"loss": 0.8113,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 0.4622895287436485,
|
|
"grad_norm": 0.27932263683794883,
|
|
"learning_rate": 2.2711887928377725e-05,
|
|
"loss": 0.8178,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.462688054199462,
|
|
"grad_norm": 0.3504836230780002,
|
|
"learning_rate": 2.2686814654793036e-05,
|
|
"loss": 0.8276,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 0.4630865796552755,
|
|
"grad_norm": 0.31710148422205037,
|
|
"learning_rate": 2.26617370800646e-05,
|
|
"loss": 0.8075,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.463485105111089,
|
|
"grad_norm": 0.288322551014853,
|
|
"learning_rate": 2.2636655244337455e-05,
|
|
"loss": 0.8099,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 0.46388363056690246,
|
|
"grad_norm": 0.30696335215944015,
|
|
"learning_rate": 2.2611569187763448e-05,
|
|
"loss": 0.8167,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.46428215602271594,
|
|
"grad_norm": 0.2740251270995111,
|
|
"learning_rate": 2.258647895050118e-05,
|
|
"loss": 0.8122,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.4646806814785294,
|
|
"grad_norm": 0.30100618811204716,
|
|
"learning_rate": 2.2561384572715957e-05,
|
|
"loss": 0.8124,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.4650792069343429,
|
|
"grad_norm": 0.28921422085766796,
|
|
"learning_rate": 2.2536286094579717e-05,
|
|
"loss": 0.8344,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 0.46547773239015644,
|
|
"grad_norm": 0.30173959947735146,
|
|
"learning_rate": 2.2511183556270937e-05,
|
|
"loss": 0.8326,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.4658762578459699,
|
|
"grad_norm": 0.5060784189623851,
|
|
"learning_rate": 2.2486076997974617e-05,
|
|
"loss": 0.7857,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 0.4662747833017834,
|
|
"grad_norm": 0.29228478601288754,
|
|
"learning_rate": 2.2460966459882184e-05,
|
|
"loss": 0.7995,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.4666733087575969,
|
|
"grad_norm": 0.31868507689912057,
|
|
"learning_rate": 2.2435851982191426e-05,
|
|
"loss": 0.8323,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 0.46707183421341036,
|
|
"grad_norm": 0.27865315868245927,
|
|
"learning_rate": 2.2410733605106462e-05,
|
|
"loss": 0.7983,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.46747035966922384,
|
|
"grad_norm": 0.29759002153633596,
|
|
"learning_rate": 2.238561136883764e-05,
|
|
"loss": 0.8044,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 0.4678688851250374,
|
|
"grad_norm": 0.2846486337810441,
|
|
"learning_rate": 2.236048531360147e-05,
|
|
"loss": 0.8111,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.46826741058085086,
|
|
"grad_norm": 0.3118599392906745,
|
|
"learning_rate": 2.2335355479620605e-05,
|
|
"loss": 0.802,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.46866593603666434,
|
|
"grad_norm": 0.30270097977856236,
|
|
"learning_rate": 2.231022190712373e-05,
|
|
"loss": 0.802,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.4690644614924778,
|
|
"grad_norm": 0.2817261828834847,
|
|
"learning_rate": 2.228508463634551e-05,
|
|
"loss": 0.8007,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 0.4694629869482913,
|
|
"grad_norm": 0.3274731513059302,
|
|
"learning_rate": 2.225994370752655e-05,
|
|
"loss": 0.8138,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.4698615124041048,
|
|
"grad_norm": 0.2968053602546118,
|
|
"learning_rate": 2.2234799160913285e-05,
|
|
"loss": 0.8239,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 0.4702600378599183,
|
|
"grad_norm": 0.9004493930737405,
|
|
"learning_rate": 2.2209651036757965e-05,
|
|
"loss": 0.8121,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.4706585633157318,
|
|
"grad_norm": 0.29343035187513045,
|
|
"learning_rate": 2.218449937531856e-05,
|
|
"loss": 0.8062,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 0.4710570887715453,
|
|
"grad_norm": 0.3251626790620503,
|
|
"learning_rate": 2.2159344216858693e-05,
|
|
"loss": 0.8171,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.47145561422735877,
|
|
"grad_norm": 0.3008660196180082,
|
|
"learning_rate": 2.2134185601647595e-05,
|
|
"loss": 0.8233,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 0.47185413968317225,
|
|
"grad_norm": 0.31587152291948645,
|
|
"learning_rate": 2.2109023569960028e-05,
|
|
"loss": 0.7893,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.4722526651389857,
|
|
"grad_norm": 0.3109368684781642,
|
|
"learning_rate": 2.208385816207622e-05,
|
|
"loss": 0.8351,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.47265119059479926,
|
|
"grad_norm": 0.3585332576145692,
|
|
"learning_rate": 2.2058689418281806e-05,
|
|
"loss": 0.8235,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.47304971605061275,
|
|
"grad_norm": 0.36347361575702536,
|
|
"learning_rate": 2.2033517378867773e-05,
|
|
"loss": 0.8333,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 0.4734482415064262,
|
|
"grad_norm": 0.3104981737491085,
|
|
"learning_rate": 2.2008342084130357e-05,
|
|
"loss": 0.7985,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.4738467669622397,
|
|
"grad_norm": 0.29070707839217663,
|
|
"learning_rate": 2.1983163574371038e-05,
|
|
"loss": 0.8135,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 0.4742452924180532,
|
|
"grad_norm": 0.3019633554231252,
|
|
"learning_rate": 2.1957981889896413e-05,
|
|
"loss": 0.8042,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.47464381787386667,
|
|
"grad_norm": 0.28671960218113185,
|
|
"learning_rate": 2.1932797071018176e-05,
|
|
"loss": 0.7833,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 0.4750423433296802,
|
|
"grad_norm": 0.30296654651092136,
|
|
"learning_rate": 2.1907609158053043e-05,
|
|
"loss": 0.802,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.4754408687854937,
|
|
"grad_norm": 0.30792479960608926,
|
|
"learning_rate": 2.1882418191322667e-05,
|
|
"loss": 0.7874,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 0.47583939424130717,
|
|
"grad_norm": 0.39407347199239423,
|
|
"learning_rate": 2.18572242111536e-05,
|
|
"loss": 0.8171,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.47623791969712065,
|
|
"grad_norm": 0.2981154461238015,
|
|
"learning_rate": 2.183202725787723e-05,
|
|
"loss": 0.8202,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.47663644515293413,
|
|
"grad_norm": 0.2883120319508124,
|
|
"learning_rate": 2.1806827371829686e-05,
|
|
"loss": 0.8354,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.4770349706087476,
|
|
"grad_norm": 0.29569950551843616,
|
|
"learning_rate": 2.1781624593351788e-05,
|
|
"loss": 0.8034,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 0.47743349606456115,
|
|
"grad_norm": 0.2942079747064485,
|
|
"learning_rate": 2.175641896278901e-05,
|
|
"loss": 0.8423,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.47783202152037463,
|
|
"grad_norm": 0.31504833020024914,
|
|
"learning_rate": 2.1731210520491365e-05,
|
|
"loss": 0.7956,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 0.4782305469761881,
|
|
"grad_norm": 0.27602156434261366,
|
|
"learning_rate": 2.1705999306813378e-05,
|
|
"loss": 0.7789,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.4786290724320016,
|
|
"grad_norm": 0.3159340649254405,
|
|
"learning_rate": 2.168078536211403e-05,
|
|
"loss": 0.8196,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 0.47902759788781507,
|
|
"grad_norm": 0.30368372482852835,
|
|
"learning_rate": 2.1655568726756643e-05,
|
|
"loss": 0.8199,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.47942612334362855,
|
|
"grad_norm": 0.3082856381822439,
|
|
"learning_rate": 2.163034944110886e-05,
|
|
"loss": 0.8217,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 0.4798246487994421,
|
|
"grad_norm": 0.30444993184134234,
|
|
"learning_rate": 2.1605127545542572e-05,
|
|
"loss": 0.81,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.48022317425525557,
|
|
"grad_norm": 0.3053503071698002,
|
|
"learning_rate": 2.1579903080433837e-05,
|
|
"loss": 0.7724,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.48062169971106905,
|
|
"grad_norm": 0.2907609764564475,
|
|
"learning_rate": 2.1554676086162827e-05,
|
|
"loss": 0.7939,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.48102022516688253,
|
|
"grad_norm": 0.30438913548426777,
|
|
"learning_rate": 2.152944660311378e-05,
|
|
"loss": 0.8124,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 0.481418750622696,
|
|
"grad_norm": 0.2916803784401073,
|
|
"learning_rate": 2.1504214671674903e-05,
|
|
"loss": 0.8002,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.4818172760785095,
|
|
"grad_norm": 0.3118580484823128,
|
|
"learning_rate": 2.147898033223831e-05,
|
|
"loss": 0.8152,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 0.48221580153432303,
|
|
"grad_norm": 0.30562499279688954,
|
|
"learning_rate": 2.1453743625200004e-05,
|
|
"loss": 0.7978,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.4826143269901365,
|
|
"grad_norm": 0.29452400424891173,
|
|
"learning_rate": 2.142850459095975e-05,
|
|
"loss": 0.8083,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 0.48301285244595,
|
|
"grad_norm": 0.29629314495355424,
|
|
"learning_rate": 2.1403263269921046e-05,
|
|
"loss": 0.8073,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.4834113779017635,
|
|
"grad_norm": 0.32650392294542924,
|
|
"learning_rate": 2.1378019702491054e-05,
|
|
"loss": 0.7924,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 0.48380990335757695,
|
|
"grad_norm": 0.30150320495591154,
|
|
"learning_rate": 2.135277392908053e-05,
|
|
"loss": 0.8531,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.48420842881339043,
|
|
"grad_norm": 0.310619189184776,
|
|
"learning_rate": 2.132752599010376e-05,
|
|
"loss": 0.834,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.48460695426920397,
|
|
"grad_norm": 0.32562782034606635,
|
|
"learning_rate": 2.1302275925978508e-05,
|
|
"loss": 0.7904,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.48500547972501745,
|
|
"grad_norm": 0.3017176154191894,
|
|
"learning_rate": 2.1277023777125915e-05,
|
|
"loss": 0.8194,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 0.48540400518083093,
|
|
"grad_norm": 0.32023476312765164,
|
|
"learning_rate": 2.1251769583970484e-05,
|
|
"loss": 0.7893,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.4858025306366444,
|
|
"grad_norm": 0.28781956877783055,
|
|
"learning_rate": 2.122651338693998e-05,
|
|
"loss": 0.8156,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 0.4862010560924579,
|
|
"grad_norm": 1.2149233879740187,
|
|
"learning_rate": 2.1201255226465375e-05,
|
|
"loss": 0.8266,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.4865995815482714,
|
|
"grad_norm": 0.2992816242260791,
|
|
"learning_rate": 2.1175995142980793e-05,
|
|
"loss": 0.8263,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 0.4869981070040849,
|
|
"grad_norm": 0.3235204400431873,
|
|
"learning_rate": 2.115073317692342e-05,
|
|
"loss": 0.8074,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.4873966324598984,
|
|
"grad_norm": 0.2995871348511909,
|
|
"learning_rate": 2.112546936873347e-05,
|
|
"loss": 0.8347,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 0.4877951579157119,
|
|
"grad_norm": 0.3268455050694444,
|
|
"learning_rate": 2.110020375885411e-05,
|
|
"loss": 0.8104,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.48819368337152536,
|
|
"grad_norm": 0.31345643601355155,
|
|
"learning_rate": 2.1074936387731367e-05,
|
|
"loss": 0.8271,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.48859220882733884,
|
|
"grad_norm": 0.37781746616538014,
|
|
"learning_rate": 2.1049667295814113e-05,
|
|
"loss": 0.8276,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 0.4889907342831523,
|
|
"grad_norm": 0.30667467990270375,
|
|
"learning_rate": 2.1024396523553955e-05,
|
|
"loss": 0.7966,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 0.48938925973896585,
|
|
"grad_norm": 0.3116435731085305,
|
|
"learning_rate": 2.099912411140521e-05,
|
|
"loss": 0.801,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 0.48978778519477933,
|
|
"grad_norm": 0.3045824871287522,
|
|
"learning_rate": 2.0973850099824807e-05,
|
|
"loss": 0.76,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 0.4901863106505928,
|
|
"grad_norm": 0.32180564748889195,
|
|
"learning_rate": 2.094857452927224e-05,
|
|
"loss": 0.8158,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.4905848361064063,
|
|
"grad_norm": 0.9714532194362665,
|
|
"learning_rate": 2.09232974402095e-05,
|
|
"loss": 0.7917,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 0.4909833615622198,
|
|
"grad_norm": 0.4083517397563029,
|
|
"learning_rate": 2.089801887310099e-05,
|
|
"loss": 0.7759,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.49138188701803326,
|
|
"grad_norm": 0.32375580190481257,
|
|
"learning_rate": 2.087273886841351e-05,
|
|
"loss": 0.8225,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 0.49178041247384674,
|
|
"grad_norm": 0.29897291559360073,
|
|
"learning_rate": 2.0847457466616135e-05,
|
|
"loss": 0.8223,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 0.4921789379296603,
|
|
"grad_norm": 0.6264426925966912,
|
|
"learning_rate": 2.08221747081802e-05,
|
|
"loss": 0.806,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.49257746338547376,
|
|
"grad_norm": 0.3393552807659732,
|
|
"learning_rate": 2.079689063357919e-05,
|
|
"loss": 0.808,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 0.49297598884128724,
|
|
"grad_norm": 0.48064261120943,
|
|
"learning_rate": 2.0771605283288716e-05,
|
|
"loss": 0.8028,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 0.4933745142971007,
|
|
"grad_norm": 0.30581132700814045,
|
|
"learning_rate": 2.074631869778641e-05,
|
|
"loss": 0.8067,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 0.4937730397529142,
|
|
"grad_norm": 0.29530312754650695,
|
|
"learning_rate": 2.0721030917551905e-05,
|
|
"loss": 0.8212,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 0.4941715652087277,
|
|
"grad_norm": 0.29055485043935136,
|
|
"learning_rate": 2.0695741983066724e-05,
|
|
"loss": 0.8193,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.4945700906645412,
|
|
"grad_norm": 0.31170603570838856,
|
|
"learning_rate": 2.0670451934814252e-05,
|
|
"loss": 0.7959,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 0.4949686161203547,
|
|
"grad_norm": 0.28393384738922395,
|
|
"learning_rate": 2.0645160813279657e-05,
|
|
"loss": 0.8113,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 0.4953671415761682,
|
|
"grad_norm": 0.31099237786422546,
|
|
"learning_rate": 2.0619868658949818e-05,
|
|
"loss": 0.8277,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 0.49576566703198166,
|
|
"grad_norm": 0.4543341488542098,
|
|
"learning_rate": 2.059457551231327e-05,
|
|
"loss": 0.8053,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 0.49616419248779514,
|
|
"grad_norm": 0.3934508739825585,
|
|
"learning_rate": 2.0569281413860147e-05,
|
|
"loss": 0.821,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.4965627179436086,
|
|
"grad_norm": 0.3041220289880547,
|
|
"learning_rate": 2.054398640408208e-05,
|
|
"loss": 0.7835,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 0.49696124339942216,
|
|
"grad_norm": 0.3121481686636135,
|
|
"learning_rate": 2.0518690523472182e-05,
|
|
"loss": 0.8196,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 0.49735976885523564,
|
|
"grad_norm": 0.29339385739102847,
|
|
"learning_rate": 2.0493393812524967e-05,
|
|
"loss": 0.812,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 0.4977582943110491,
|
|
"grad_norm": 0.6381668064023208,
|
|
"learning_rate": 2.0468096311736247e-05,
|
|
"loss": 0.8051,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 0.4981568197668626,
|
|
"grad_norm": 0.30166068852688105,
|
|
"learning_rate": 2.044279806160313e-05,
|
|
"loss": 0.787,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.4985553452226761,
|
|
"grad_norm": 0.28274022253823955,
|
|
"learning_rate": 2.0417499102623903e-05,
|
|
"loss": 0.8003,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 0.49895387067848956,
|
|
"grad_norm": 0.3796924292206021,
|
|
"learning_rate": 2.0392199475297995e-05,
|
|
"loss": 0.7982,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 0.4993523961343031,
|
|
"grad_norm": 0.2853722232096178,
|
|
"learning_rate": 2.0366899220125903e-05,
|
|
"loss": 0.8013,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 0.4997509215901166,
|
|
"grad_norm": 0.31573490109402036,
|
|
"learning_rate": 2.034159837760914e-05,
|
|
"loss": 0.8147,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 0.50014944704593,
|
|
"grad_norm": 0.2765481712079679,
|
|
"learning_rate": 2.0316296988250138e-05,
|
|
"loss": 0.7995,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.5005479725017435,
|
|
"grad_norm": 0.2994449499838975,
|
|
"learning_rate": 2.029099509255223e-05,
|
|
"loss": 0.7946,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 0.5009464979575571,
|
|
"grad_norm": 0.3207532131664091,
|
|
"learning_rate": 2.026569273101954e-05,
|
|
"loss": 0.8038,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 0.5013450234133705,
|
|
"grad_norm": 0.2829753955420768,
|
|
"learning_rate": 2.0240389944156937e-05,
|
|
"loss": 0.8001,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 0.501743548869184,
|
|
"grad_norm": 0.27998354424049926,
|
|
"learning_rate": 2.021508677246999e-05,
|
|
"loss": 0.791,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 0.5021420743249975,
|
|
"grad_norm": 0.2913911881200998,
|
|
"learning_rate": 2.018978325646486e-05,
|
|
"loss": 0.7914,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.502540599780811,
|
|
"grad_norm": 0.26963096722494334,
|
|
"learning_rate": 2.0164479436648272e-05,
|
|
"loss": 0.8406,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 0.5029391252366245,
|
|
"grad_norm": 0.3010795830435557,
|
|
"learning_rate": 2.0139175353527446e-05,
|
|
"loss": 0.8078,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 0.503337650692438,
|
|
"grad_norm": 0.30960536952730017,
|
|
"learning_rate": 2.0113871047610016e-05,
|
|
"loss": 0.8074,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 0.5037361761482515,
|
|
"grad_norm": 0.26906634414413455,
|
|
"learning_rate": 2.0088566559403953e-05,
|
|
"loss": 0.7935,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 0.5041347016040649,
|
|
"grad_norm": 0.34646731409844644,
|
|
"learning_rate": 2.006326192941755e-05,
|
|
"loss": 0.8442,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.5045332270598785,
|
|
"grad_norm": 0.2726972871873017,
|
|
"learning_rate": 2.003795719815931e-05,
|
|
"loss": 0.7859,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 0.5049317525156919,
|
|
"grad_norm": 0.3143394544398179,
|
|
"learning_rate": 2.0012652406137903e-05,
|
|
"loss": 0.8307,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 0.5053302779715054,
|
|
"grad_norm": 0.2631801881501474,
|
|
"learning_rate": 1.99873475938621e-05,
|
|
"loss": 0.7999,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 0.505728803427319,
|
|
"grad_norm": 0.34508087706819923,
|
|
"learning_rate": 1.9962042801840698e-05,
|
|
"loss": 0.8091,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 0.5061273288831324,
|
|
"grad_norm": 0.27438242812890384,
|
|
"learning_rate": 1.9936738070582455e-05,
|
|
"loss": 0.798,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.5065258543389459,
|
|
"grad_norm": 0.3025634657688614,
|
|
"learning_rate": 1.991143344059605e-05,
|
|
"loss": 0.7952,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 0.5069243797947593,
|
|
"grad_norm": 0.2845789431308592,
|
|
"learning_rate": 1.988612895238999e-05,
|
|
"loss": 0.8374,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 0.5073229052505729,
|
|
"grad_norm": 0.30248035578518695,
|
|
"learning_rate": 1.986082464647255e-05,
|
|
"loss": 0.7864,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 0.5077214307063864,
|
|
"grad_norm": 0.2950710488906475,
|
|
"learning_rate": 1.9835520563351735e-05,
|
|
"loss": 0.8288,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 0.5081199561621998,
|
|
"grad_norm": 0.26824757799025784,
|
|
"learning_rate": 1.9810216743535146e-05,
|
|
"loss": 0.8364,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.5085184816180134,
|
|
"grad_norm": 0.2849419128102798,
|
|
"learning_rate": 1.9784913227530024e-05,
|
|
"loss": 0.8236,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 0.5089170070738268,
|
|
"grad_norm": 0.3103889603819969,
|
|
"learning_rate": 1.975961005584307e-05,
|
|
"loss": 0.8136,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 0.5093155325296403,
|
|
"grad_norm": 0.6303290018451543,
|
|
"learning_rate": 1.9734307268980467e-05,
|
|
"loss": 0.8311,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 0.5097140579854538,
|
|
"grad_norm": 0.32732619537234586,
|
|
"learning_rate": 1.9709004907447774e-05,
|
|
"loss": 0.8221,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 0.5101125834412673,
|
|
"grad_norm": 0.28339108969670607,
|
|
"learning_rate": 1.9683703011749862e-05,
|
|
"loss": 0.7966,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.5105111088970808,
|
|
"grad_norm": 0.3203578395612973,
|
|
"learning_rate": 1.965840162239087e-05,
|
|
"loss": 0.8137,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 0.5109096343528943,
|
|
"grad_norm": 0.27176747745707136,
|
|
"learning_rate": 1.96331007798741e-05,
|
|
"loss": 0.8078,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 0.5113081598087078,
|
|
"grad_norm": 0.29516820299549673,
|
|
"learning_rate": 1.9607800524702015e-05,
|
|
"loss": 0.8209,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 0.5117066852645212,
|
|
"grad_norm": 0.26212656038325677,
|
|
"learning_rate": 1.9582500897376104e-05,
|
|
"loss": 0.8141,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 0.5121052107203348,
|
|
"grad_norm": 0.28250007105261504,
|
|
"learning_rate": 1.955720193839687e-05,
|
|
"loss": 0.8278,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.5125037361761483,
|
|
"grad_norm": 0.26685759222958566,
|
|
"learning_rate": 1.953190368826376e-05,
|
|
"loss": 0.8339,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 0.5129022616319617,
|
|
"grad_norm": 0.29022333673533535,
|
|
"learning_rate": 1.9506606187475036e-05,
|
|
"loss": 0.8315,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 0.5133007870877753,
|
|
"grad_norm": 0.2670289567076886,
|
|
"learning_rate": 1.9481309476527825e-05,
|
|
"loss": 0.801,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 0.5136993125435887,
|
|
"grad_norm": 0.289510280019879,
|
|
"learning_rate": 1.9456013595917928e-05,
|
|
"loss": 0.812,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 0.5140978379994022,
|
|
"grad_norm": 0.3128114319953551,
|
|
"learning_rate": 1.9430718586139863e-05,
|
|
"loss": 0.8095,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.5144963634552157,
|
|
"grad_norm": 0.2888978962753298,
|
|
"learning_rate": 1.9405424487686732e-05,
|
|
"loss": 0.79,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 0.5148948889110292,
|
|
"grad_norm": 0.30521651593807825,
|
|
"learning_rate": 1.9380131341050185e-05,
|
|
"loss": 0.8137,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 0.5152934143668427,
|
|
"grad_norm": 0.2722787387877988,
|
|
"learning_rate": 1.935483918672035e-05,
|
|
"loss": 0.8291,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 0.5156919398226562,
|
|
"grad_norm": 0.2863692337341115,
|
|
"learning_rate": 1.932954806518575e-05,
|
|
"loss": 0.7981,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 0.5160904652784697,
|
|
"grad_norm": 0.2759777323624655,
|
|
"learning_rate": 1.9304258016933282e-05,
|
|
"loss": 0.8272,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.5164889907342831,
|
|
"grad_norm": 0.27713843658608434,
|
|
"learning_rate": 1.92789690824481e-05,
|
|
"loss": 0.8079,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 0.5168875161900967,
|
|
"grad_norm": 0.2877077017647955,
|
|
"learning_rate": 1.92536813022136e-05,
|
|
"loss": 0.7918,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 0.5172860416459101,
|
|
"grad_norm": 0.28949094300241585,
|
|
"learning_rate": 1.9228394716711288e-05,
|
|
"loss": 0.7969,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 0.5176845671017236,
|
|
"grad_norm": 0.29697989743375497,
|
|
"learning_rate": 1.9203109366420812e-05,
|
|
"loss": 0.7928,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 0.5180830925575372,
|
|
"grad_norm": 0.27889648874882045,
|
|
"learning_rate": 1.917782529181981e-05,
|
|
"loss": 0.8233,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.5184816180133506,
|
|
"grad_norm": 0.3023364181088352,
|
|
"learning_rate": 1.9152542533383872e-05,
|
|
"loss": 0.8312,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 0.5188801434691641,
|
|
"grad_norm": 0.28357607259449,
|
|
"learning_rate": 1.9127261131586503e-05,
|
|
"loss": 0.7801,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 0.5192786689249775,
|
|
"grad_norm": 0.2869887242640123,
|
|
"learning_rate": 1.910198112689902e-05,
|
|
"loss": 0.7965,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 0.5196771943807911,
|
|
"grad_norm": 0.28743333022015244,
|
|
"learning_rate": 1.9076702559790514e-05,
|
|
"loss": 0.8146,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 0.5200757198366046,
|
|
"grad_norm": 0.284017183782701,
|
|
"learning_rate": 1.9051425470727766e-05,
|
|
"loss": 0.7865,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.520474245292418,
|
|
"grad_norm": 0.29268712998816515,
|
|
"learning_rate": 1.9026149900175193e-05,
|
|
"loss": 0.7996,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 0.5208727707482316,
|
|
"grad_norm": 0.28999045805168566,
|
|
"learning_rate": 1.9000875888594792e-05,
|
|
"loss": 0.849,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 0.521271296204045,
|
|
"grad_norm": 0.30459398540455407,
|
|
"learning_rate": 1.8975603476446048e-05,
|
|
"loss": 0.7935,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 0.5216698216598585,
|
|
"grad_norm": 0.2838650093705641,
|
|
"learning_rate": 1.89503327041859e-05,
|
|
"loss": 0.8034,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 0.522068347115672,
|
|
"grad_norm": 0.304766254772995,
|
|
"learning_rate": 1.8925063612268637e-05,
|
|
"loss": 0.846,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.5224668725714855,
|
|
"grad_norm": 0.27645008842126473,
|
|
"learning_rate": 1.8899796241145903e-05,
|
|
"loss": 0.8269,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 0.522865398027299,
|
|
"grad_norm": 0.2952376158549396,
|
|
"learning_rate": 1.8874530631266536e-05,
|
|
"loss": 0.8369,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 0.5232639234831125,
|
|
"grad_norm": 0.33296861229967156,
|
|
"learning_rate": 1.8849266823076578e-05,
|
|
"loss": 0.8134,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 0.523662448938926,
|
|
"grad_norm": 0.2866595965213398,
|
|
"learning_rate": 1.8824004857019217e-05,
|
|
"loss": 0.8192,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 0.5240609743947394,
|
|
"grad_norm": 0.4924611590945922,
|
|
"learning_rate": 1.879874477353463e-05,
|
|
"loss": 0.7903,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.524459499850553,
|
|
"grad_norm": 0.27677116299415827,
|
|
"learning_rate": 1.877348661306003e-05,
|
|
"loss": 0.8102,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 0.5248580253063665,
|
|
"grad_norm": 0.28883962158261584,
|
|
"learning_rate": 1.8748230416029522e-05,
|
|
"loss": 0.7984,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 0.5252565507621799,
|
|
"grad_norm": 0.281009978014599,
|
|
"learning_rate": 1.8722976222874095e-05,
|
|
"loss": 0.8045,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 0.5256550762179935,
|
|
"grad_norm": 0.3095342467124618,
|
|
"learning_rate": 1.8697724074021502e-05,
|
|
"loss": 0.767,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 0.5260536016738069,
|
|
"grad_norm": 0.29319346123143347,
|
|
"learning_rate": 1.8672474009896242e-05,
|
|
"loss": 0.8372,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.5264521271296204,
|
|
"grad_norm": 0.47782909290265757,
|
|
"learning_rate": 1.8647226070919474e-05,
|
|
"loss": 0.8488,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 0.5268506525854338,
|
|
"grad_norm": 0.3110245262948928,
|
|
"learning_rate": 1.862198029750895e-05,
|
|
"loss": 0.7963,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 0.5272491780412474,
|
|
"grad_norm": 0.2917881624752996,
|
|
"learning_rate": 1.8596736730078967e-05,
|
|
"loss": 0.7952,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 0.5276477034970609,
|
|
"grad_norm": 0.33165379448294435,
|
|
"learning_rate": 1.857149540904026e-05,
|
|
"loss": 0.8076,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 0.5280462289528743,
|
|
"grad_norm": 0.4239553010821896,
|
|
"learning_rate": 1.8546256374800006e-05,
|
|
"loss": 0.8028,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.5284447544086879,
|
|
"grad_norm": 0.2734465341467207,
|
|
"learning_rate": 1.8521019667761697e-05,
|
|
"loss": 0.794,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 0.5288432798645013,
|
|
"grad_norm": 0.2629858746393782,
|
|
"learning_rate": 1.8495785328325104e-05,
|
|
"loss": 0.8112,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 0.5292418053203148,
|
|
"grad_norm": 0.28632746629019823,
|
|
"learning_rate": 1.8470553396886222e-05,
|
|
"loss": 0.8052,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 0.5296403307761284,
|
|
"grad_norm": 0.2693728963637755,
|
|
"learning_rate": 1.8445323913837173e-05,
|
|
"loss": 0.797,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 0.5300388562319418,
|
|
"grad_norm": 0.29114792078325186,
|
|
"learning_rate": 1.8420096919566173e-05,
|
|
"loss": 0.8199,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.5304373816877553,
|
|
"grad_norm": 0.2806667770430771,
|
|
"learning_rate": 1.8394872454457434e-05,
|
|
"loss": 0.7832,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 0.5308359071435688,
|
|
"grad_norm": 0.28182635320788874,
|
|
"learning_rate": 1.836965055889115e-05,
|
|
"loss": 0.7998,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 0.5312344325993823,
|
|
"grad_norm": 0.3254325490129574,
|
|
"learning_rate": 1.8344431273243364e-05,
|
|
"loss": 0.8112,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 0.5316329580551957,
|
|
"grad_norm": 0.29483982391186925,
|
|
"learning_rate": 1.8319214637885975e-05,
|
|
"loss": 0.8025,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 0.5320314835110093,
|
|
"grad_norm": 0.2552432370606682,
|
|
"learning_rate": 1.829400069318663e-05,
|
|
"loss": 0.7978,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.5324300089668228,
|
|
"grad_norm": 0.2923821069068519,
|
|
"learning_rate": 1.826878947950864e-05,
|
|
"loss": 0.7833,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 0.5328285344226362,
|
|
"grad_norm": 0.26602672952480433,
|
|
"learning_rate": 1.8243581037211005e-05,
|
|
"loss": 0.7893,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 0.5332270598784498,
|
|
"grad_norm": 0.26880063097474627,
|
|
"learning_rate": 1.821837540664822e-05,
|
|
"loss": 0.7862,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 0.5336255853342632,
|
|
"grad_norm": 0.2708329335402036,
|
|
"learning_rate": 1.8193172628170324e-05,
|
|
"loss": 0.8108,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 0.5340241107900767,
|
|
"grad_norm": 0.28229072758383317,
|
|
"learning_rate": 1.8167972742122773e-05,
|
|
"loss": 0.8675,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.5344226362458903,
|
|
"grad_norm": 0.2741069117172231,
|
|
"learning_rate": 1.81427757888464e-05,
|
|
"loss": 0.8261,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 0.5348211617017037,
|
|
"grad_norm": 0.27606049985568326,
|
|
"learning_rate": 1.811758180867734e-05,
|
|
"loss": 0.8128,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 0.5352196871575172,
|
|
"grad_norm": 0.27575883416758074,
|
|
"learning_rate": 1.8092390841946964e-05,
|
|
"loss": 0.7975,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 0.5356182126133306,
|
|
"grad_norm": 0.27470419217590547,
|
|
"learning_rate": 1.8067202928981827e-05,
|
|
"loss": 0.801,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 0.5360167380691442,
|
|
"grad_norm": 0.2682028369114076,
|
|
"learning_rate": 1.804201811010359e-05,
|
|
"loss": 0.7992,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.5364152635249576,
|
|
"grad_norm": 0.29741163933246206,
|
|
"learning_rate": 1.8016836425628972e-05,
|
|
"loss": 0.7863,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 0.5368137889807711,
|
|
"grad_norm": 0.2879307582320043,
|
|
"learning_rate": 1.7991657915869646e-05,
|
|
"loss": 0.7912,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 0.5372123144365847,
|
|
"grad_norm": 0.26970303031329906,
|
|
"learning_rate": 1.7966482621132227e-05,
|
|
"loss": 0.83,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 0.5376108398923981,
|
|
"grad_norm": 0.2899878970961642,
|
|
"learning_rate": 1.7941310581718197e-05,
|
|
"loss": 0.8143,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 0.5380093653482116,
|
|
"grad_norm": 0.2749787514839584,
|
|
"learning_rate": 1.7916141837923787e-05,
|
|
"loss": 0.7954,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.5384078908040251,
|
|
"grad_norm": 0.27467702468985844,
|
|
"learning_rate": 1.7890976430039982e-05,
|
|
"loss": 0.7982,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 0.5388064162598386,
|
|
"grad_norm": 0.2618705303695261,
|
|
"learning_rate": 1.786581439835241e-05,
|
|
"loss": 0.8195,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 0.5392049417156521,
|
|
"grad_norm": 0.2714594323337975,
|
|
"learning_rate": 1.7840655783141313e-05,
|
|
"loss": 0.796,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 0.5396034671714656,
|
|
"grad_norm": 0.28811188495556306,
|
|
"learning_rate": 1.7815500624681444e-05,
|
|
"loss": 0.7994,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 0.5400019926272791,
|
|
"grad_norm": 0.2720623478220906,
|
|
"learning_rate": 1.779034896324204e-05,
|
|
"loss": 0.8153,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.5404005180830925,
|
|
"grad_norm": 0.26375062989547793,
|
|
"learning_rate": 1.7765200839086722e-05,
|
|
"loss": 0.8091,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 0.5407990435389061,
|
|
"grad_norm": 0.2692041660964484,
|
|
"learning_rate": 1.774005629247346e-05,
|
|
"loss": 0.8079,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 0.5411975689947195,
|
|
"grad_norm": 0.26724517612106163,
|
|
"learning_rate": 1.77149153636545e-05,
|
|
"loss": 0.8255,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 0.541596094450533,
|
|
"grad_norm": 0.2767757640601006,
|
|
"learning_rate": 1.7689778092876276e-05,
|
|
"loss": 0.7899,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 0.5419946199063466,
|
|
"grad_norm": 0.4018120080677502,
|
|
"learning_rate": 1.7664644520379398e-05,
|
|
"loss": 0.8113,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.54239314536216,
|
|
"grad_norm": 0.31258004159467684,
|
|
"learning_rate": 1.7639514686398537e-05,
|
|
"loss": 0.8172,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 0.5427916708179735,
|
|
"grad_norm": 0.3470011840822337,
|
|
"learning_rate": 1.7614388631162365e-05,
|
|
"loss": 0.7933,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 0.543190196273787,
|
|
"grad_norm": 0.3043763377673315,
|
|
"learning_rate": 1.758926639489354e-05,
|
|
"loss": 0.8135,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 0.5435887217296005,
|
|
"grad_norm": 0.2923964849291302,
|
|
"learning_rate": 1.7564148017808578e-05,
|
|
"loss": 0.7818,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 0.5439872471854139,
|
|
"grad_norm": 0.3065609901064694,
|
|
"learning_rate": 1.753903354011783e-05,
|
|
"loss": 0.8423,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.5443857726412275,
|
|
"grad_norm": 0.2985623055209066,
|
|
"learning_rate": 1.751392300202539e-05,
|
|
"loss": 0.8157,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 0.544784298097041,
|
|
"grad_norm": 0.2786406179918027,
|
|
"learning_rate": 1.7488816443729066e-05,
|
|
"loss": 0.8133,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 0.5451828235528544,
|
|
"grad_norm": 0.30926673491457163,
|
|
"learning_rate": 1.746371390542029e-05,
|
|
"loss": 0.8133,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 0.545581349008668,
|
|
"grad_norm": 0.2641540209794052,
|
|
"learning_rate": 1.743861542728404e-05,
|
|
"loss": 0.7962,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 0.5459798744644814,
|
|
"grad_norm": 0.29034836879196485,
|
|
"learning_rate": 1.7413521049498823e-05,
|
|
"loss": 0.8176,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.5463783999202949,
|
|
"grad_norm": 0.2768072644524204,
|
|
"learning_rate": 1.7388430812236556e-05,
|
|
"loss": 0.7693,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 0.5467769253761084,
|
|
"grad_norm": 0.2769206801693697,
|
|
"learning_rate": 1.7363344755662555e-05,
|
|
"loss": 0.8047,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 0.5471754508319219,
|
|
"grad_norm": 0.36766327627843176,
|
|
"learning_rate": 1.733826291993541e-05,
|
|
"loss": 0.8223,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 0.5475739762877354,
|
|
"grad_norm": 0.40226420420015246,
|
|
"learning_rate": 1.7313185345206968e-05,
|
|
"loss": 0.7996,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 0.5479725017435488,
|
|
"grad_norm": 0.2964909563746245,
|
|
"learning_rate": 1.728811207162228e-05,
|
|
"loss": 0.809,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.5483710271993624,
|
|
"grad_norm": 0.28906435974471956,
|
|
"learning_rate": 1.7263043139319476e-05,
|
|
"loss": 0.755,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 0.5487695526551758,
|
|
"grad_norm": 0.292890255157397,
|
|
"learning_rate": 1.7237978588429753e-05,
|
|
"loss": 0.8009,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 0.5491680781109893,
|
|
"grad_norm": 0.2781433781639577,
|
|
"learning_rate": 1.721291845907729e-05,
|
|
"loss": 0.7944,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 0.5495666035668029,
|
|
"grad_norm": 0.2937009571551766,
|
|
"learning_rate": 1.7187862791379198e-05,
|
|
"loss": 0.8135,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 0.5499651290226163,
|
|
"grad_norm": 0.2912565732468286,
|
|
"learning_rate": 1.7162811625445423e-05,
|
|
"loss": 0.8388,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.5503636544784298,
|
|
"grad_norm": 0.2748876016189558,
|
|
"learning_rate": 1.7137765001378724e-05,
|
|
"loss": 0.836,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 0.5507621799342433,
|
|
"grad_norm": 0.3163822046309509,
|
|
"learning_rate": 1.711272295927459e-05,
|
|
"loss": 0.8288,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 0.5511607053900568,
|
|
"grad_norm": 0.27254752708037466,
|
|
"learning_rate": 1.7087685539221162e-05,
|
|
"loss": 0.8161,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 0.5515592308458703,
|
|
"grad_norm": 0.3125729789680171,
|
|
"learning_rate": 1.70626527812992e-05,
|
|
"loss": 0.8181,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 0.5519577563016838,
|
|
"grad_norm": 0.29916353607545526,
|
|
"learning_rate": 1.703762472558196e-05,
|
|
"loss": 0.776,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.5523562817574973,
|
|
"grad_norm": 0.333298444535358,
|
|
"learning_rate": 1.7012601412135237e-05,
|
|
"loss": 0.8271,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 0.5527548072133107,
|
|
"grad_norm": 0.26574557176935226,
|
|
"learning_rate": 1.6987582881017173e-05,
|
|
"loss": 0.7903,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 0.5531533326691243,
|
|
"grad_norm": 0.30640181668201066,
|
|
"learning_rate": 1.6962569172278283e-05,
|
|
"loss": 0.8029,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 0.5535518581249377,
|
|
"grad_norm": 0.27259308701491025,
|
|
"learning_rate": 1.6937560325961364e-05,
|
|
"loss": 0.8145,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 0.5539503835807512,
|
|
"grad_norm": 0.29936679527497784,
|
|
"learning_rate": 1.6912556382101415e-05,
|
|
"loss": 0.791,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.5543489090365648,
|
|
"grad_norm": 0.2708401911735976,
|
|
"learning_rate": 1.6887557380725602e-05,
|
|
"loss": 0.8067,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 0.5547474344923782,
|
|
"grad_norm": 0.2744964958311244,
|
|
"learning_rate": 1.6862563361853165e-05,
|
|
"loss": 0.8082,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 0.5551459599481917,
|
|
"grad_norm": 0.27774556322816,
|
|
"learning_rate": 1.6837574365495383e-05,
|
|
"loss": 0.8201,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 0.5555444854040051,
|
|
"grad_norm": 0.2860333592628782,
|
|
"learning_rate": 1.6812590431655473e-05,
|
|
"loss": 0.8132,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 0.5559430108598187,
|
|
"grad_norm": 0.2874026887492097,
|
|
"learning_rate": 1.678761160032857e-05,
|
|
"loss": 0.8031,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.5563415363156322,
|
|
"grad_norm": 0.28106720251341816,
|
|
"learning_rate": 1.676263791150164e-05,
|
|
"loss": 0.8094,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 0.5567400617714456,
|
|
"grad_norm": 0.29522074096111917,
|
|
"learning_rate": 1.6737669405153388e-05,
|
|
"loss": 0.794,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 0.5571385872272592,
|
|
"grad_norm": 0.273137049734289,
|
|
"learning_rate": 1.6712706121254264e-05,
|
|
"loss": 0.7904,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 0.5575371126830726,
|
|
"grad_norm": 0.2938729039193004,
|
|
"learning_rate": 1.668774809976632e-05,
|
|
"loss": 0.8211,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 0.5579356381388861,
|
|
"grad_norm": 0.27893542802339405,
|
|
"learning_rate": 1.6662795380643212e-05,
|
|
"loss": 0.7831,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.5583341635946996,
|
|
"grad_norm": 0.31771721535476655,
|
|
"learning_rate": 1.6637848003830086e-05,
|
|
"loss": 0.78,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 0.5587326890505131,
|
|
"grad_norm": 0.27599058172210705,
|
|
"learning_rate": 1.6612906009263553e-05,
|
|
"loss": 0.7996,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 0.5591312145063266,
|
|
"grad_norm": 0.26309299304248956,
|
|
"learning_rate": 1.6587969436871608e-05,
|
|
"loss": 0.8273,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 0.5595297399621401,
|
|
"grad_norm": 0.2658663776464135,
|
|
"learning_rate": 1.6563038326573544e-05,
|
|
"loss": 0.7803,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 0.5599282654179536,
|
|
"grad_norm": 0.27453871016555076,
|
|
"learning_rate": 1.6538112718279937e-05,
|
|
"loss": 0.8192,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.560326790873767,
|
|
"grad_norm": 0.30380713206643706,
|
|
"learning_rate": 1.651319265189254e-05,
|
|
"loss": 0.7841,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 0.5607253163295806,
|
|
"grad_norm": 0.2745314071899381,
|
|
"learning_rate": 1.6488278167304243e-05,
|
|
"loss": 0.7966,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 0.5611238417853941,
|
|
"grad_norm": 0.27106784806374307,
|
|
"learning_rate": 1.6463369304398976e-05,
|
|
"loss": 0.782,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 0.5615223672412075,
|
|
"grad_norm": 0.26824801623885447,
|
|
"learning_rate": 1.6438466103051708e-05,
|
|
"loss": 0.7975,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 0.5619208926970211,
|
|
"grad_norm": 0.315466445265476,
|
|
"learning_rate": 1.641356860312833e-05,
|
|
"loss": 0.8375,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.5623194181528345,
|
|
"grad_norm": 0.26586433303215745,
|
|
"learning_rate": 1.6388676844485583e-05,
|
|
"loss": 0.7963,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 0.562717943608648,
|
|
"grad_norm": 0.26384331857538773,
|
|
"learning_rate": 1.636379086697105e-05,
|
|
"loss": 0.811,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 0.5631164690644614,
|
|
"grad_norm": 0.2743841871460786,
|
|
"learning_rate": 1.6338910710423034e-05,
|
|
"loss": 0.7687,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 0.563514994520275,
|
|
"grad_norm": 0.2598827208531272,
|
|
"learning_rate": 1.6314036414670544e-05,
|
|
"loss": 0.7926,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 0.5639135199760885,
|
|
"grad_norm": 0.2631333168836199,
|
|
"learning_rate": 1.6289168019533182e-05,
|
|
"loss": 0.8233,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.5643120454319019,
|
|
"grad_norm": 0.274009439927925,
|
|
"learning_rate": 1.626430556482112e-05,
|
|
"loss": 0.8093,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 0.5647105708877155,
|
|
"grad_norm": 0.2815241084799363,
|
|
"learning_rate": 1.623944909033502e-05,
|
|
"loss": 0.8386,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 0.5651090963435289,
|
|
"grad_norm": 0.2693426340478129,
|
|
"learning_rate": 1.621459863586596e-05,
|
|
"loss": 0.7934,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 0.5655076217993424,
|
|
"grad_norm": 0.28640728418548206,
|
|
"learning_rate": 1.61897542411954e-05,
|
|
"loss": 0.7605,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 0.565906147255156,
|
|
"grad_norm": 0.28566808429395685,
|
|
"learning_rate": 1.6164915946095063e-05,
|
|
"loss": 0.7836,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.5663046727109694,
|
|
"grad_norm": 0.2703972532532415,
|
|
"learning_rate": 1.6140083790326963e-05,
|
|
"loss": 0.8089,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 0.5667031981667829,
|
|
"grad_norm": 0.2792579130299739,
|
|
"learning_rate": 1.6115257813643227e-05,
|
|
"loss": 0.8133,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 0.5671017236225964,
|
|
"grad_norm": 0.2729454606681309,
|
|
"learning_rate": 1.6090438055786123e-05,
|
|
"loss": 0.8097,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 0.5675002490784099,
|
|
"grad_norm": 0.2915157005944316,
|
|
"learning_rate": 1.606562455648798e-05,
|
|
"loss": 0.8078,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 0.5678987745342233,
|
|
"grad_norm": 0.29032778472704807,
|
|
"learning_rate": 1.6040817355471065e-05,
|
|
"loss": 0.7931,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.5682972999900369,
|
|
"grad_norm": 0.2636401468661431,
|
|
"learning_rate": 1.601601649244759e-05,
|
|
"loss": 0.8162,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 0.5686958254458504,
|
|
"grad_norm": 0.288342129461046,
|
|
"learning_rate": 1.5991222007119614e-05,
|
|
"loss": 0.831,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 0.5690943509016638,
|
|
"grad_norm": 0.25892278113322154,
|
|
"learning_rate": 1.5966433939178992e-05,
|
|
"loss": 0.7956,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 0.5694928763574774,
|
|
"grad_norm": 0.30072057342912867,
|
|
"learning_rate": 1.5941652328307296e-05,
|
|
"loss": 0.777,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 0.5698914018132908,
|
|
"grad_norm": 0.26806489233741043,
|
|
"learning_rate": 1.5916877214175768e-05,
|
|
"loss": 0.8291,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.5702899272691043,
|
|
"grad_norm": 0.2905448743699399,
|
|
"learning_rate": 1.589210863644525e-05,
|
|
"loss": 0.8472,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 0.5706884527249177,
|
|
"grad_norm": 0.2982764650867147,
|
|
"learning_rate": 1.586734663476612e-05,
|
|
"loss": 0.8144,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 0.5710869781807313,
|
|
"grad_norm": 0.2872873533319639,
|
|
"learning_rate": 1.584259124877823e-05,
|
|
"loss": 0.8113,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 0.5714855036365448,
|
|
"grad_norm": 0.29449735325312454,
|
|
"learning_rate": 1.5817842518110827e-05,
|
|
"loss": 0.8214,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 0.5718840290923582,
|
|
"grad_norm": 0.39051963343272733,
|
|
"learning_rate": 1.5793100482382525e-05,
|
|
"loss": 0.7799,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.5722825545481718,
|
|
"grad_norm": 0.2616459809836497,
|
|
"learning_rate": 1.5768365181201205e-05,
|
|
"loss": 0.7777,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 0.5726810800039852,
|
|
"grad_norm": 0.28842653622157877,
|
|
"learning_rate": 1.574363665416398e-05,
|
|
"loss": 0.7962,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 0.5730796054597987,
|
|
"grad_norm": 0.2641950748942506,
|
|
"learning_rate": 1.5718914940857114e-05,
|
|
"loss": 0.7991,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 0.5734781309156123,
|
|
"grad_norm": 0.27488209941925706,
|
|
"learning_rate": 1.5694200080855952e-05,
|
|
"loss": 0.7883,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 0.5738766563714257,
|
|
"grad_norm": 0.26045131988579345,
|
|
"learning_rate": 1.5669492113724888e-05,
|
|
"loss": 0.7938,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.5742751818272392,
|
|
"grad_norm": 0.2974260811653572,
|
|
"learning_rate": 1.5644791079017263e-05,
|
|
"loss": 0.8168,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 0.5746737072830527,
|
|
"grad_norm": 0.28973731321680374,
|
|
"learning_rate": 1.562009701627533e-05,
|
|
"loss": 0.7946,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 0.5750722327388662,
|
|
"grad_norm": 0.28100822605068104,
|
|
"learning_rate": 1.5595409965030188e-05,
|
|
"loss": 0.8041,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 0.5754707581946796,
|
|
"grad_norm": 0.2836905042084171,
|
|
"learning_rate": 1.557072996480169e-05,
|
|
"loss": 0.7906,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 0.5758692836504932,
|
|
"grad_norm": 0.265117167660616,
|
|
"learning_rate": 1.554605705509843e-05,
|
|
"loss": 0.8415,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.5762678091063067,
|
|
"grad_norm": 0.26306772688466995,
|
|
"learning_rate": 1.5521391275417613e-05,
|
|
"loss": 0.8292,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 0.5766663345621201,
|
|
"grad_norm": 0.2710950213877723,
|
|
"learning_rate": 1.5496732665245085e-05,
|
|
"loss": 0.8231,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 0.5770648600179337,
|
|
"grad_norm": 0.2788906456071625,
|
|
"learning_rate": 1.5472081264055154e-05,
|
|
"loss": 0.8116,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 0.5774633854737471,
|
|
"grad_norm": 0.27310715767259724,
|
|
"learning_rate": 1.5447437111310624e-05,
|
|
"loss": 0.8271,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 0.5778619109295606,
|
|
"grad_norm": 0.2785035809739301,
|
|
"learning_rate": 1.5422800246462706e-05,
|
|
"loss": 0.7981,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.5782604363853742,
|
|
"grad_norm": 0.27219975804237134,
|
|
"learning_rate": 1.5398170708950902e-05,
|
|
"loss": 0.7965,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 0.5786589618411876,
|
|
"grad_norm": 0.27506447504088605,
|
|
"learning_rate": 1.5373548538203026e-05,
|
|
"loss": 0.8201,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 0.5790574872970011,
|
|
"grad_norm": 0.2946170401264071,
|
|
"learning_rate": 1.5348933773635067e-05,
|
|
"loss": 0.8128,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 0.5794560127528146,
|
|
"grad_norm": 0.3826815086737385,
|
|
"learning_rate": 1.532432645465118e-05,
|
|
"loss": 0.8173,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 0.5798545382086281,
|
|
"grad_norm": 0.2924952233528226,
|
|
"learning_rate": 1.5299726620643595e-05,
|
|
"loss": 0.7775,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.5802530636644415,
|
|
"grad_norm": 0.2642260239005724,
|
|
"learning_rate": 1.5275134310992553e-05,
|
|
"loss": 0.8191,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 0.580651589120255,
|
|
"grad_norm": 0.3149422419473645,
|
|
"learning_rate": 1.5250549565066262e-05,
|
|
"loss": 0.7974,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 0.5810501145760686,
|
|
"grad_norm": 0.27490534215380524,
|
|
"learning_rate": 1.5225972422220804e-05,
|
|
"loss": 0.804,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 0.581448640031882,
|
|
"grad_norm": 0.2755621466065312,
|
|
"learning_rate": 1.5201402921800114e-05,
|
|
"loss": 0.8127,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 0.5818471654876956,
|
|
"grad_norm": 0.3037669691142441,
|
|
"learning_rate": 1.5176841103135867e-05,
|
|
"loss": 0.7912,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.582245690943509,
|
|
"grad_norm": 0.25177796617384035,
|
|
"learning_rate": 1.5152287005547458e-05,
|
|
"loss": 0.8329,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 0.5826442163993225,
|
|
"grad_norm": 0.27341817612876335,
|
|
"learning_rate": 1.512774066834191e-05,
|
|
"loss": 0.7794,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 0.583042741855136,
|
|
"grad_norm": 0.2406762714221454,
|
|
"learning_rate": 1.5103202130813839e-05,
|
|
"loss": 0.7918,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 0.5834412673109495,
|
|
"grad_norm": 0.28482104897292554,
|
|
"learning_rate": 1.5078671432245362e-05,
|
|
"loss": 0.7675,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 0.583839792766763,
|
|
"grad_norm": 0.25741699835096044,
|
|
"learning_rate": 1.5054148611906047e-05,
|
|
"loss": 0.7924,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.5842383182225764,
|
|
"grad_norm": 0.2920808223289217,
|
|
"learning_rate": 1.5029633709052864e-05,
|
|
"loss": 0.8141,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 0.58463684367839,
|
|
"grad_norm": 0.2807331727085593,
|
|
"learning_rate": 1.5005126762930085e-05,
|
|
"loss": 0.7992,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 0.5850353691342034,
|
|
"grad_norm": 0.2785002462676359,
|
|
"learning_rate": 1.4980627812769273e-05,
|
|
"loss": 0.8283,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 0.5854338945900169,
|
|
"grad_norm": 0.48934793357042067,
|
|
"learning_rate": 1.4956136897789155e-05,
|
|
"loss": 0.8011,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 0.5858324200458305,
|
|
"grad_norm": 0.2541832978215571,
|
|
"learning_rate": 1.4931654057195633e-05,
|
|
"loss": 0.7957,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.5862309455016439,
|
|
"grad_norm": 0.28333216989436416,
|
|
"learning_rate": 1.4907179330181667e-05,
|
|
"loss": 0.7933,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 0.5866294709574574,
|
|
"grad_norm": 0.25893200895383417,
|
|
"learning_rate": 1.4882712755927208e-05,
|
|
"loss": 0.8324,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 0.5870279964132709,
|
|
"grad_norm": 0.2964360831302451,
|
|
"learning_rate": 1.4858254373599206e-05,
|
|
"loss": 0.8116,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 0.5874265218690844,
|
|
"grad_norm": 0.2520201190243798,
|
|
"learning_rate": 1.4833804222351437e-05,
|
|
"loss": 0.7728,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 0.5878250473248979,
|
|
"grad_norm": 0.28965585570658003,
|
|
"learning_rate": 1.4809362341324549e-05,
|
|
"loss": 0.8301,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.5882235727807114,
|
|
"grad_norm": 0.2680016094991912,
|
|
"learning_rate": 1.478492876964592e-05,
|
|
"loss": 0.8104,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 0.5886220982365249,
|
|
"grad_norm": 0.29138008709625307,
|
|
"learning_rate": 1.4760503546429642e-05,
|
|
"loss": 0.7939,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 0.5890206236923383,
|
|
"grad_norm": 0.27301356294256424,
|
|
"learning_rate": 1.473608671077644e-05,
|
|
"loss": 0.8017,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 0.5894191491481519,
|
|
"grad_norm": 0.27632908308241927,
|
|
"learning_rate": 1.4711678301773607e-05,
|
|
"loss": 0.7876,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 0.5898176746039653,
|
|
"grad_norm": 0.29739284619714174,
|
|
"learning_rate": 1.4687278358494954e-05,
|
|
"loss": 0.8396,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.5902162000597788,
|
|
"grad_norm": 0.26373275038816285,
|
|
"learning_rate": 1.4662886920000727e-05,
|
|
"loss": 0.7893,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 0.5906147255155924,
|
|
"grad_norm": 0.28819618380315065,
|
|
"learning_rate": 1.463850402533758e-05,
|
|
"loss": 0.8096,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 0.5910132509714058,
|
|
"grad_norm": 0.26086188725806075,
|
|
"learning_rate": 1.4614129713538456e-05,
|
|
"loss": 0.8272,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 0.5914117764272193,
|
|
"grad_norm": 0.2998087493750338,
|
|
"learning_rate": 1.4589764023622585e-05,
|
|
"loss": 0.811,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 0.5918103018830327,
|
|
"grad_norm": 0.28423477916709305,
|
|
"learning_rate": 1.4565406994595402e-05,
|
|
"loss": 0.8314,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.5922088273388463,
|
|
"grad_norm": 0.4714680189752818,
|
|
"learning_rate": 1.4541058665448437e-05,
|
|
"loss": 0.8132,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 0.5926073527946598,
|
|
"grad_norm": 0.2832956819184063,
|
|
"learning_rate": 1.4516719075159342e-05,
|
|
"loss": 0.8201,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 0.5930058782504732,
|
|
"grad_norm": 0.280931582487737,
|
|
"learning_rate": 1.4492388262691737e-05,
|
|
"loss": 0.8104,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 0.5934044037062868,
|
|
"grad_norm": 0.4352514915841819,
|
|
"learning_rate": 1.4468066266995222e-05,
|
|
"loss": 0.7969,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 0.5938029291621002,
|
|
"grad_norm": 0.28686409934998564,
|
|
"learning_rate": 1.4443753127005264e-05,
|
|
"loss": 0.7842,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.5942014546179137,
|
|
"grad_norm": 0.2569294965760903,
|
|
"learning_rate": 1.4419448881643158e-05,
|
|
"loss": 0.8154,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 0.5945999800737272,
|
|
"grad_norm": 0.28382287666623324,
|
|
"learning_rate": 1.4395153569815974e-05,
|
|
"loss": 0.8105,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 0.5949985055295407,
|
|
"grad_norm": 0.2572203424982894,
|
|
"learning_rate": 1.4370867230416451e-05,
|
|
"loss": 0.7826,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 0.5953970309853542,
|
|
"grad_norm": 6.465506917099715,
|
|
"learning_rate": 1.4346589902323003e-05,
|
|
"loss": 0.783,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 0.5957955564411677,
|
|
"grad_norm": 0.39706235846696825,
|
|
"learning_rate": 1.432232162439957e-05,
|
|
"loss": 0.8166,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.5961940818969812,
|
|
"grad_norm": 0.26404445452409736,
|
|
"learning_rate": 1.4298062435495661e-05,
|
|
"loss": 0.7826,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 0.5965926073527946,
|
|
"grad_norm": 0.3308104505575439,
|
|
"learning_rate": 1.4273812374446183e-05,
|
|
"loss": 0.795,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 0.5969911328086082,
|
|
"grad_norm": 0.3026458263801191,
|
|
"learning_rate": 1.4249571480071467e-05,
|
|
"loss": 0.7715,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 0.5973896582644216,
|
|
"grad_norm": 0.28588534412959155,
|
|
"learning_rate": 1.4225339791177151e-05,
|
|
"loss": 0.7987,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 0.5977881837202351,
|
|
"grad_norm": 0.32101230875160675,
|
|
"learning_rate": 1.4201117346554144e-05,
|
|
"loss": 0.8046,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.5981867091760487,
|
|
"grad_norm": 0.290897264466864,
|
|
"learning_rate": 1.4176904184978552e-05,
|
|
"loss": 0.8004,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 0.5985852346318621,
|
|
"grad_norm": 0.3026009841483658,
|
|
"learning_rate": 1.4152700345211626e-05,
|
|
"loss": 0.8065,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 0.5989837600876756,
|
|
"grad_norm": 0.44263950851966477,
|
|
"learning_rate": 1.412850586599969e-05,
|
|
"loss": 0.8096,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 0.599382285543489,
|
|
"grad_norm": 0.3248532926102643,
|
|
"learning_rate": 1.4104320786074078e-05,
|
|
"loss": 0.8377,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 0.5997808109993026,
|
|
"grad_norm": 0.28575595840318735,
|
|
"learning_rate": 1.408014514415109e-05,
|
|
"loss": 0.78,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.6001793364551161,
|
|
"grad_norm": 0.2794084216593132,
|
|
"learning_rate": 1.4055978978931919e-05,
|
|
"loss": 0.784,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 0.6005778619109295,
|
|
"grad_norm": 0.2796315632479643,
|
|
"learning_rate": 1.4031822329102558e-05,
|
|
"loss": 0.7991,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 0.6009763873667431,
|
|
"grad_norm": 0.29082183486321656,
|
|
"learning_rate": 1.4007675233333812e-05,
|
|
"loss": 0.7593,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 0.6013749128225565,
|
|
"grad_norm": 0.27442890679937104,
|
|
"learning_rate": 1.3983537730281153e-05,
|
|
"loss": 0.82,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 0.60177343827837,
|
|
"grad_norm": 0.28240777195387234,
|
|
"learning_rate": 1.3959409858584718e-05,
|
|
"loss": 0.7895,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.6021719637341835,
|
|
"grad_norm": 0.28640189626735446,
|
|
"learning_rate": 1.3935291656869216e-05,
|
|
"loss": 0.8065,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 0.602570489189997,
|
|
"grad_norm": 0.27042843088562313,
|
|
"learning_rate": 1.3911183163743883e-05,
|
|
"loss": 0.7875,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 0.6029690146458105,
|
|
"grad_norm": 0.3230930753709,
|
|
"learning_rate": 1.3887084417802412e-05,
|
|
"loss": 0.7854,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 0.603367540101624,
|
|
"grad_norm": 0.26957683695591095,
|
|
"learning_rate": 1.3862995457622883e-05,
|
|
"loss": 0.8231,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 0.6037660655574375,
|
|
"grad_norm": 0.2814390906832594,
|
|
"learning_rate": 1.3838916321767726e-05,
|
|
"loss": 0.8048,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.6041645910132509,
|
|
"grad_norm": 0.2654808310179734,
|
|
"learning_rate": 1.381484704878363e-05,
|
|
"loss": 0.8074,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 0.6045631164690645,
|
|
"grad_norm": 0.26170541781453055,
|
|
"learning_rate": 1.379078767720151e-05,
|
|
"loss": 0.7921,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 0.604961641924878,
|
|
"grad_norm": 0.26340697807382485,
|
|
"learning_rate": 1.3766738245536403e-05,
|
|
"loss": 0.7894,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 0.6053601673806914,
|
|
"grad_norm": 0.34917912033176396,
|
|
"learning_rate": 1.3742698792287467e-05,
|
|
"loss": 0.7979,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 0.605758692836505,
|
|
"grad_norm": 0.2698143223745579,
|
|
"learning_rate": 1.371866935593788e-05,
|
|
"loss": 0.7705,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.6061572182923184,
|
|
"grad_norm": 0.25293807015990133,
|
|
"learning_rate": 1.369464997495475e-05,
|
|
"loss": 0.7881,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 0.6065557437481319,
|
|
"grad_norm": 0.2713270396836266,
|
|
"learning_rate": 1.3670640687789139e-05,
|
|
"loss": 0.7931,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 0.6069542692039454,
|
|
"grad_norm": 0.27034783836116744,
|
|
"learning_rate": 1.3646641532875911e-05,
|
|
"loss": 0.7961,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 0.6073527946597589,
|
|
"grad_norm": 0.27490482613460554,
|
|
"learning_rate": 1.362265254863373e-05,
|
|
"loss": 0.8147,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 0.6077513201155724,
|
|
"grad_norm": 0.2676216739525722,
|
|
"learning_rate": 1.3598673773464972e-05,
|
|
"loss": 0.7853,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.6081498455713858,
|
|
"grad_norm": 1.272473309536001,
|
|
"learning_rate": 1.3574705245755669e-05,
|
|
"loss": 0.8089,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 0.6085483710271994,
|
|
"grad_norm": 0.6571572306931123,
|
|
"learning_rate": 1.3550747003875458e-05,
|
|
"loss": 0.8261,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 0.6089468964830128,
|
|
"grad_norm": 0.2596088192309901,
|
|
"learning_rate": 1.3526799086177494e-05,
|
|
"loss": 0.8193,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 0.6093454219388263,
|
|
"grad_norm": 0.26444135616895786,
|
|
"learning_rate": 1.350286153099842e-05,
|
|
"loss": 0.7892,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 0.6097439473946399,
|
|
"grad_norm": 0.25937034971149103,
|
|
"learning_rate": 1.3478934376658273e-05,
|
|
"loss": 0.8026,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.6101424728504533,
|
|
"grad_norm": 0.7431115276392141,
|
|
"learning_rate": 1.3455017661460464e-05,
|
|
"loss": 0.7932,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 0.6105409983062668,
|
|
"grad_norm": 0.27995622010695853,
|
|
"learning_rate": 1.3431111423691677e-05,
|
|
"loss": 0.7833,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 0.6109395237620803,
|
|
"grad_norm": 0.2464262348021282,
|
|
"learning_rate": 1.3407215701621812e-05,
|
|
"loss": 0.796,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 0.6113380492178938,
|
|
"grad_norm": 0.26689725888573773,
|
|
"learning_rate": 1.3383330533503971e-05,
|
|
"loss": 0.7984,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 0.6117365746737072,
|
|
"grad_norm": 0.281243345103868,
|
|
"learning_rate": 1.335945595757432e-05,
|
|
"loss": 0.8119,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.6121351001295208,
|
|
"grad_norm": 0.2807299105795548,
|
|
"learning_rate": 1.3335592012052096e-05,
|
|
"loss": 0.8208,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 0.6125336255853343,
|
|
"grad_norm": 0.2822355271519365,
|
|
"learning_rate": 1.3311738735139502e-05,
|
|
"loss": 0.7958,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 0.6129321510411477,
|
|
"grad_norm": 0.2570136422498892,
|
|
"learning_rate": 1.328789616502168e-05,
|
|
"loss": 0.7798,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 0.6133306764969613,
|
|
"grad_norm": 0.2602381753045998,
|
|
"learning_rate": 1.3264064339866622e-05,
|
|
"loss": 0.7952,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 0.6137292019527747,
|
|
"grad_norm": 0.27124645437474926,
|
|
"learning_rate": 1.3240243297825112e-05,
|
|
"loss": 0.8447,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.6141277274085882,
|
|
"grad_norm": 0.2614506972170479,
|
|
"learning_rate": 1.3216433077030689e-05,
|
|
"loss": 0.8067,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 0.6145262528644018,
|
|
"grad_norm": 0.273112140897487,
|
|
"learning_rate": 1.3192633715599548e-05,
|
|
"loss": 0.8041,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 0.6149247783202152,
|
|
"grad_norm": 0.24587524256890503,
|
|
"learning_rate": 1.3168845251630527e-05,
|
|
"loss": 0.7969,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 0.6153233037760287,
|
|
"grad_norm": 0.2931074811806814,
|
|
"learning_rate": 1.3145067723204979e-05,
|
|
"loss": 0.7919,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 0.6157218292318422,
|
|
"grad_norm": 0.23408431837644428,
|
|
"learning_rate": 1.3121301168386796e-05,
|
|
"loss": 0.7974,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.6161203546876557,
|
|
"grad_norm": 0.2885214636424266,
|
|
"learning_rate": 1.3097545625222284e-05,
|
|
"loss": 0.8183,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 0.6165188801434691,
|
|
"grad_norm": 0.2565866864664869,
|
|
"learning_rate": 1.3073801131740104e-05,
|
|
"loss": 0.8187,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 0.6169174055992827,
|
|
"grad_norm": 0.3070425063241222,
|
|
"learning_rate": 1.3050067725951258e-05,
|
|
"loss": 0.8084,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 0.6173159310550962,
|
|
"grad_norm": 0.3551888980070755,
|
|
"learning_rate": 1.3026345445848976e-05,
|
|
"loss": 0.7969,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 0.6177144565109096,
|
|
"grad_norm": 0.3309087361846915,
|
|
"learning_rate": 1.3002634329408692e-05,
|
|
"loss": 0.7573,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.6181129819667232,
|
|
"grad_norm": 0.2685150964208705,
|
|
"learning_rate": 1.2978934414587955e-05,
|
|
"loss": 0.8077,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 0.6185115074225366,
|
|
"grad_norm": 0.28733052685665156,
|
|
"learning_rate": 1.2955245739326397e-05,
|
|
"loss": 0.807,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 0.6189100328783501,
|
|
"grad_norm": 0.25727837605034215,
|
|
"learning_rate": 1.2931568341545649e-05,
|
|
"loss": 0.8055,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 0.6193085583341637,
|
|
"grad_norm": 0.28129842927276943,
|
|
"learning_rate": 1.2907902259149287e-05,
|
|
"loss": 0.8003,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 0.6197070837899771,
|
|
"grad_norm": 0.2650304078824774,
|
|
"learning_rate": 1.2884247530022786e-05,
|
|
"loss": 0.7906,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.6201056092457906,
|
|
"grad_norm": 0.36649121713601185,
|
|
"learning_rate": 1.2860604192033414e-05,
|
|
"loss": 0.7765,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 0.620504134701604,
|
|
"grad_norm": 0.25867200718505207,
|
|
"learning_rate": 1.2836972283030256e-05,
|
|
"loss": 0.8186,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 0.6209026601574176,
|
|
"grad_norm": 0.2720817068824379,
|
|
"learning_rate": 1.2813351840844046e-05,
|
|
"loss": 0.7753,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 0.621301185613231,
|
|
"grad_norm": 0.27600718946732516,
|
|
"learning_rate": 1.2789742903287187e-05,
|
|
"loss": 0.8002,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 0.6216997110690445,
|
|
"grad_norm": 0.26210695686216645,
|
|
"learning_rate": 1.2766145508153689e-05,
|
|
"loss": 0.7726,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.6220982365248581,
|
|
"grad_norm": 0.27148047901992983,
|
|
"learning_rate": 1.2742559693219035e-05,
|
|
"loss": 0.8221,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 0.6224967619806715,
|
|
"grad_norm": 0.2506440715577259,
|
|
"learning_rate": 1.2718985496240209e-05,
|
|
"loss": 0.8161,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 0.622895287436485,
|
|
"grad_norm": 0.2562550466452998,
|
|
"learning_rate": 1.2695422954955569e-05,
|
|
"loss": 0.812,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 0.6232938128922985,
|
|
"grad_norm": 0.273331861541004,
|
|
"learning_rate": 1.2671872107084844e-05,
|
|
"loss": 0.7746,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 0.623692338348112,
|
|
"grad_norm": 0.24027870818880687,
|
|
"learning_rate": 1.2648332990329016e-05,
|
|
"loss": 0.783,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.6240908638039254,
|
|
"grad_norm": 0.2751061681477381,
|
|
"learning_rate": 1.2624805642370302e-05,
|
|
"loss": 0.8006,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 0.624489389259739,
|
|
"grad_norm": 0.2603821217505175,
|
|
"learning_rate": 1.2601290100872081e-05,
|
|
"loss": 0.8093,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 0.6248879147155525,
|
|
"grad_norm": 0.3093537763083936,
|
|
"learning_rate": 1.2577786403478815e-05,
|
|
"loss": 0.8071,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 0.6252864401713659,
|
|
"grad_norm": 0.25834846435694175,
|
|
"learning_rate": 1.2554294587816039e-05,
|
|
"loss": 0.8046,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 0.6256849656271795,
|
|
"grad_norm": 0.2614225968860621,
|
|
"learning_rate": 1.253081469149022e-05,
|
|
"loss": 0.809,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.6260834910829929,
|
|
"grad_norm": 0.2641571048713672,
|
|
"learning_rate": 1.2507346752088788e-05,
|
|
"loss": 0.8151,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 0.6264820165388064,
|
|
"grad_norm": 0.2570556300174585,
|
|
"learning_rate": 1.2483890807180003e-05,
|
|
"loss": 0.7807,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 0.62688054199462,
|
|
"grad_norm": 0.25821601421943596,
|
|
"learning_rate": 1.2460446894312938e-05,
|
|
"loss": 0.8099,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 0.6272790674504334,
|
|
"grad_norm": 0.2631395054682711,
|
|
"learning_rate": 1.243701505101741e-05,
|
|
"loss": 0.8161,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 0.6276775929062469,
|
|
"grad_norm": 0.24766766238334142,
|
|
"learning_rate": 1.2413595314803892e-05,
|
|
"loss": 0.7707,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.6280761183620603,
|
|
"grad_norm": 0.24707466931883929,
|
|
"learning_rate": 1.2390187723163503e-05,
|
|
"loss": 0.804,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 0.6284746438178739,
|
|
"grad_norm": 0.2621376069815184,
|
|
"learning_rate": 1.2366792313567895e-05,
|
|
"loss": 0.8055,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 0.6288731692736873,
|
|
"grad_norm": 0.2455537279746612,
|
|
"learning_rate": 1.2343409123469244e-05,
|
|
"loss": 0.8099,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 0.6292716947295008,
|
|
"grad_norm": 0.27105059580537544,
|
|
"learning_rate": 1.232003819030013e-05,
|
|
"loss": 0.7965,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 0.6296702201853144,
|
|
"grad_norm": 0.24578937265717318,
|
|
"learning_rate": 1.2296679551473551e-05,
|
|
"loss": 0.7871,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.6300687456411278,
|
|
"grad_norm": 0.24084765272449513,
|
|
"learning_rate": 1.227333324438281e-05,
|
|
"loss": 0.7965,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 0.6304672710969413,
|
|
"grad_norm": 0.23922572705746703,
|
|
"learning_rate": 1.2249999306401445e-05,
|
|
"loss": 0.7936,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 0.6308657965527548,
|
|
"grad_norm": 0.269202817136775,
|
|
"learning_rate": 1.2226677774883236e-05,
|
|
"loss": 0.8134,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 0.6312643220085683,
|
|
"grad_norm": 0.24194081424246755,
|
|
"learning_rate": 1.2203368687162058e-05,
|
|
"loss": 0.8036,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 0.6316628474643818,
|
|
"grad_norm": 0.2606593476377602,
|
|
"learning_rate": 1.2180072080551899e-05,
|
|
"loss": 0.8057,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.6320613729201953,
|
|
"grad_norm": 0.25284920681339745,
|
|
"learning_rate": 1.215678799234675e-05,
|
|
"loss": 0.7793,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 0.6324598983760088,
|
|
"grad_norm": 0.26507641296686857,
|
|
"learning_rate": 1.2133516459820565e-05,
|
|
"loss": 0.7942,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 0.6328584238318222,
|
|
"grad_norm": 0.25208081960776024,
|
|
"learning_rate": 1.2110257520227208e-05,
|
|
"loss": 0.8054,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 0.6332569492876358,
|
|
"grad_norm": 0.27064673184332666,
|
|
"learning_rate": 1.2087011210800368e-05,
|
|
"loss": 0.8022,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 0.6336554747434492,
|
|
"grad_norm": 0.2586090399717606,
|
|
"learning_rate": 1.206377756875353e-05,
|
|
"loss": 0.7962,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.6340540001992627,
|
|
"grad_norm": 0.2758486757724476,
|
|
"learning_rate": 1.2040556631279885e-05,
|
|
"loss": 0.8141,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 0.6344525256550763,
|
|
"grad_norm": 0.25007000963272646,
|
|
"learning_rate": 1.2017348435552308e-05,
|
|
"loss": 0.7876,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 0.6348510511108897,
|
|
"grad_norm": 0.28045825131568236,
|
|
"learning_rate": 1.1994153018723247e-05,
|
|
"loss": 0.7782,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 0.6352495765667032,
|
|
"grad_norm": 0.2559398025371776,
|
|
"learning_rate": 1.1970970417924715e-05,
|
|
"loss": 0.8016,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 0.6356481020225166,
|
|
"grad_norm": 0.2910472724027498,
|
|
"learning_rate": 1.1947800670268218e-05,
|
|
"loss": 0.8057,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.6360466274783302,
|
|
"grad_norm": 0.26090925545251104,
|
|
"learning_rate": 1.1924643812844648e-05,
|
|
"loss": 0.8074,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 0.6364451529341437,
|
|
"grad_norm": 0.26077758902957177,
|
|
"learning_rate": 1.1901499882724302e-05,
|
|
"loss": 0.8125,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 0.6368436783899571,
|
|
"grad_norm": 0.27193415193529746,
|
|
"learning_rate": 1.1878368916956758e-05,
|
|
"loss": 0.8205,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 0.6372422038457707,
|
|
"grad_norm": 0.24868413662213312,
|
|
"learning_rate": 1.1855250952570852e-05,
|
|
"loss": 0.8046,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 0.6376407293015841,
|
|
"grad_norm": 0.25516205225914074,
|
|
"learning_rate": 1.1832146026574597e-05,
|
|
"loss": 0.7823,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.6380392547573976,
|
|
"grad_norm": 0.2444397059280007,
|
|
"learning_rate": 1.1809054175955148e-05,
|
|
"loss": 0.8074,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 0.6384377802132111,
|
|
"grad_norm": 0.2406561292975351,
|
|
"learning_rate": 1.1785975437678716e-05,
|
|
"loss": 0.7995,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 0.6388363056690246,
|
|
"grad_norm": 0.25213243022945864,
|
|
"learning_rate": 1.1762909848690525e-05,
|
|
"loss": 0.794,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 0.6392348311248381,
|
|
"grad_norm": 0.250582196145571,
|
|
"learning_rate": 1.1739857445914757e-05,
|
|
"loss": 0.8081,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 0.6396333565806516,
|
|
"grad_norm": 0.24639126507572728,
|
|
"learning_rate": 1.1716818266254462e-05,
|
|
"loss": 0.8223,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.6400318820364651,
|
|
"grad_norm": 0.2341044085916874,
|
|
"learning_rate": 1.169379234659156e-05,
|
|
"loss": 0.8122,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 0.6404304074922785,
|
|
"grad_norm": 0.34128549774390465,
|
|
"learning_rate": 1.1670779723786697e-05,
|
|
"loss": 0.8032,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 0.6408289329480921,
|
|
"grad_norm": 0.33588417532052334,
|
|
"learning_rate": 1.1647780434679273e-05,
|
|
"loss": 0.7921,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 0.6412274584039056,
|
|
"grad_norm": 0.25140600726539664,
|
|
"learning_rate": 1.1624794516087322e-05,
|
|
"loss": 0.7937,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 0.641625983859719,
|
|
"grad_norm": 0.23449581497433394,
|
|
"learning_rate": 1.160182200480748e-05,
|
|
"loss": 0.7835,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.6420245093155326,
|
|
"grad_norm": 0.24952525378723442,
|
|
"learning_rate": 1.1578862937614935e-05,
|
|
"loss": 0.7802,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 0.642423034771346,
|
|
"grad_norm": 0.24961214587481048,
|
|
"learning_rate": 1.1555917351263313e-05,
|
|
"loss": 0.7823,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 0.6428215602271595,
|
|
"grad_norm": 0.243896806000912,
|
|
"learning_rate": 1.1532985282484694e-05,
|
|
"loss": 0.7699,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 0.643220085682973,
|
|
"grad_norm": 0.2704485294167498,
|
|
"learning_rate": 1.1510066767989522e-05,
|
|
"loss": 0.7942,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 0.6436186111387865,
|
|
"grad_norm": 0.24876368726137116,
|
|
"learning_rate": 1.1487161844466513e-05,
|
|
"loss": 0.8,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.6440171365946,
|
|
"grad_norm": 0.24032636669948387,
|
|
"learning_rate": 1.1464270548582648e-05,
|
|
"loss": 0.7968,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 0.6444156620504135,
|
|
"grad_norm": 0.26676359276330697,
|
|
"learning_rate": 1.1441392916983088e-05,
|
|
"loss": 0.8146,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 0.644814187506227,
|
|
"grad_norm": 0.24000233827708323,
|
|
"learning_rate": 1.1418528986291126e-05,
|
|
"loss": 0.813,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 0.6452127129620404,
|
|
"grad_norm": 0.2384982360045188,
|
|
"learning_rate": 1.1395678793108106e-05,
|
|
"loss": 0.7664,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 0.645611238417854,
|
|
"grad_norm": 0.25233152858510866,
|
|
"learning_rate": 1.1372842374013389e-05,
|
|
"loss": 0.791,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.6460097638736674,
|
|
"grad_norm": 0.23424267270162125,
|
|
"learning_rate": 1.135001976556429e-05,
|
|
"loss": 0.7872,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 0.6464082893294809,
|
|
"grad_norm": 0.3476922887656111,
|
|
"learning_rate": 1.1327211004296013e-05,
|
|
"loss": 0.8117,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 0.6468068147852944,
|
|
"grad_norm": 0.25999768296030096,
|
|
"learning_rate": 1.1304416126721604e-05,
|
|
"loss": 0.8016,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 0.6472053402411079,
|
|
"grad_norm": 0.2386900544989497,
|
|
"learning_rate": 1.1281635169331855e-05,
|
|
"loss": 0.816,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 0.6476038656969214,
|
|
"grad_norm": 0.23919854850884364,
|
|
"learning_rate": 1.1258868168595309e-05,
|
|
"loss": 0.7672,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.6480023911527348,
|
|
"grad_norm": 0.24292253081996207,
|
|
"learning_rate": 1.1236115160958137e-05,
|
|
"loss": 0.7876,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 0.6484009166085484,
|
|
"grad_norm": 0.2573324955094864,
|
|
"learning_rate": 1.1213376182844118e-05,
|
|
"loss": 0.8105,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 0.6487994420643619,
|
|
"grad_norm": 0.2374878968994724,
|
|
"learning_rate": 1.1190651270654608e-05,
|
|
"loss": 0.7956,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 0.6491979675201753,
|
|
"grad_norm": 0.22808064108307496,
|
|
"learning_rate": 1.1167940460768384e-05,
|
|
"loss": 0.778,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 0.6495964929759889,
|
|
"grad_norm": 0.24975995163182776,
|
|
"learning_rate": 1.11452437895417e-05,
|
|
"loss": 0.7927,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.6499950184318023,
|
|
"grad_norm": 0.2496302640812307,
|
|
"learning_rate": 1.1122561293308134e-05,
|
|
"loss": 0.8093,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 0.6503935438876158,
|
|
"grad_norm": 0.2382600490081852,
|
|
"learning_rate": 1.1099893008378602e-05,
|
|
"loss": 0.7989,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 0.6507920693434293,
|
|
"grad_norm": 0.23966141846275127,
|
|
"learning_rate": 1.1077238971041265e-05,
|
|
"loss": 0.7737,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 0.6511905947992428,
|
|
"grad_norm": 0.2658481127884238,
|
|
"learning_rate": 1.1054599217561466e-05,
|
|
"loss": 0.8161,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 0.6515891202550563,
|
|
"grad_norm": 0.23310988570227098,
|
|
"learning_rate": 1.10319737841817e-05,
|
|
"loss": 0.7965,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.6519876457108698,
|
|
"grad_norm": 0.2593756062996178,
|
|
"learning_rate": 1.1009362707121506e-05,
|
|
"loss": 0.8034,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 0.6523861711666833,
|
|
"grad_norm": 0.25538154058327805,
|
|
"learning_rate": 1.098676602257748e-05,
|
|
"loss": 0.8041,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 0.6527846966224967,
|
|
"grad_norm": 0.253312859294886,
|
|
"learning_rate": 1.0964183766723142e-05,
|
|
"loss": 0.8418,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 0.6531832220783103,
|
|
"grad_norm": 0.2492955855138997,
|
|
"learning_rate": 1.0941615975708939e-05,
|
|
"loss": 0.7821,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 0.6535817475341238,
|
|
"grad_norm": 0.24807305513899183,
|
|
"learning_rate": 1.0919062685662154e-05,
|
|
"loss": 0.8218,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.6539802729899372,
|
|
"grad_norm": 0.24157259403786543,
|
|
"learning_rate": 1.0896523932686853e-05,
|
|
"loss": 0.8093,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 0.6543787984457508,
|
|
"grad_norm": 0.2887027342486142,
|
|
"learning_rate": 1.0873999752863846e-05,
|
|
"loss": 0.7708,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 0.6547773239015642,
|
|
"grad_norm": 0.2516367839521763,
|
|
"learning_rate": 1.085149018225058e-05,
|
|
"loss": 0.8102,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 0.6551758493573777,
|
|
"grad_norm": 0.24924932650750312,
|
|
"learning_rate": 1.0828995256881151e-05,
|
|
"loss": 0.8155,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 0.6555743748131911,
|
|
"grad_norm": 0.2794672477405356,
|
|
"learning_rate": 1.0806515012766196e-05,
|
|
"loss": 0.7793,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.6559729002690047,
|
|
"grad_norm": 0.2573710085448088,
|
|
"learning_rate": 1.0784049485892853e-05,
|
|
"loss": 0.7823,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 0.6563714257248182,
|
|
"grad_norm": 0.22754413318247524,
|
|
"learning_rate": 1.0761598712224686e-05,
|
|
"loss": 0.8244,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 0.6567699511806316,
|
|
"grad_norm": 0.2529075220091104,
|
|
"learning_rate": 1.0739162727701655e-05,
|
|
"loss": 0.8248,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 0.6571684766364452,
|
|
"grad_norm": 0.23442166283314864,
|
|
"learning_rate": 1.0716741568240056e-05,
|
|
"loss": 0.7863,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 0.6575670020922586,
|
|
"grad_norm": 0.2310467368157676,
|
|
"learning_rate": 1.0694335269732412e-05,
|
|
"loss": 0.7935,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.6579655275480721,
|
|
"grad_norm": 0.2519609841775046,
|
|
"learning_rate": 1.0671943868047514e-05,
|
|
"loss": 0.8174,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 0.6583640530038857,
|
|
"grad_norm": 0.23381769850197567,
|
|
"learning_rate": 1.0649567399030256e-05,
|
|
"loss": 0.8125,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 0.6587625784596991,
|
|
"grad_norm": 0.2248688496445257,
|
|
"learning_rate": 1.0627205898501658e-05,
|
|
"loss": 0.7631,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 0.6591611039155126,
|
|
"grad_norm": 0.24042601112993525,
|
|
"learning_rate": 1.0604859402258749e-05,
|
|
"loss": 0.8093,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 0.6595596293713261,
|
|
"grad_norm": 0.23829888619576395,
|
|
"learning_rate": 1.0582527946074568e-05,
|
|
"loss": 0.757,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.6599581548271396,
|
|
"grad_norm": 0.24849887674234067,
|
|
"learning_rate": 1.0560211565698065e-05,
|
|
"loss": 0.7925,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 0.660356680282953,
|
|
"grad_norm": 0.23966740664443098,
|
|
"learning_rate": 1.053791029685405e-05,
|
|
"loss": 0.7956,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 0.6607552057387666,
|
|
"grad_norm": 0.2326370782463841,
|
|
"learning_rate": 1.0515624175243162e-05,
|
|
"loss": 0.7662,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 0.6611537311945801,
|
|
"grad_norm": 0.31722454033580055,
|
|
"learning_rate": 1.0493353236541762e-05,
|
|
"loss": 0.7802,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 0.6615522566503935,
|
|
"grad_norm": 0.25707007749842065,
|
|
"learning_rate": 1.0471097516401936e-05,
|
|
"loss": 0.8621,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.6619507821062071,
|
|
"grad_norm": 0.24902572963184474,
|
|
"learning_rate": 1.0448857050451378e-05,
|
|
"loss": 0.7842,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 0.6623493075620205,
|
|
"grad_norm": 0.24955167998517547,
|
|
"learning_rate": 1.0426631874293375e-05,
|
|
"loss": 0.8294,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 0.662747833017834,
|
|
"grad_norm": 0.23384165302801938,
|
|
"learning_rate": 1.0404422023506769e-05,
|
|
"loss": 0.79,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 0.6631463584736476,
|
|
"grad_norm": 0.2392972325732434,
|
|
"learning_rate": 1.038222753364581e-05,
|
|
"loss": 0.8006,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 0.663544883929461,
|
|
"grad_norm": 0.24206783576164856,
|
|
"learning_rate": 1.0360048440240211e-05,
|
|
"loss": 0.8027,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.6639434093852745,
|
|
"grad_norm": 0.22839516664163145,
|
|
"learning_rate": 1.0337884778794993e-05,
|
|
"loss": 0.7948,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 0.6643419348410879,
|
|
"grad_norm": 0.2402973963775374,
|
|
"learning_rate": 1.0315736584790507e-05,
|
|
"loss": 0.8151,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 0.6647404602969015,
|
|
"grad_norm": 0.2343262068157496,
|
|
"learning_rate": 1.0293603893682327e-05,
|
|
"loss": 0.7982,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 0.6651389857527149,
|
|
"grad_norm": 0.23763455582566587,
|
|
"learning_rate": 1.0271486740901215e-05,
|
|
"loss": 0.8202,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 0.6655375112085284,
|
|
"grad_norm": 0.22857617190624355,
|
|
"learning_rate": 1.0249385161853064e-05,
|
|
"loss": 0.8043,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.665936036664342,
|
|
"grad_norm": 0.23554787626388524,
|
|
"learning_rate": 1.0227299191918818e-05,
|
|
"loss": 0.7754,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 0.6663345621201554,
|
|
"grad_norm": 0.24185097085110915,
|
|
"learning_rate": 1.0205228866454452e-05,
|
|
"loss": 0.8149,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 0.6667330875759689,
|
|
"grad_norm": 0.24371976817956506,
|
|
"learning_rate": 1.018317422079087e-05,
|
|
"loss": 0.7953,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 0.6671316130317824,
|
|
"grad_norm": 0.22651548749239922,
|
|
"learning_rate": 1.0161135290233928e-05,
|
|
"loss": 0.7856,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 0.6675301384875959,
|
|
"grad_norm": 0.23694878438384515,
|
|
"learning_rate": 1.0139112110064265e-05,
|
|
"loss": 0.7917,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.6679286639434094,
|
|
"grad_norm": 0.23479229889643258,
|
|
"learning_rate": 1.0117104715537338e-05,
|
|
"loss": 0.7941,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 0.6683271893992229,
|
|
"grad_norm": 0.24124394146663952,
|
|
"learning_rate": 1.009511314188334e-05,
|
|
"loss": 0.8183,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 0.6687257148550364,
|
|
"grad_norm": 0.22678268771998955,
|
|
"learning_rate": 1.0073137424307109e-05,
|
|
"loss": 0.785,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 0.6691242403108498,
|
|
"grad_norm": 0.2477335220816568,
|
|
"learning_rate": 1.0051177597988122e-05,
|
|
"loss": 0.8033,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 0.6695227657666634,
|
|
"grad_norm": 0.23625778900717528,
|
|
"learning_rate": 1.0029233698080415e-05,
|
|
"loss": 0.8033,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.6699212912224768,
|
|
"grad_norm": 0.23825465079514177,
|
|
"learning_rate": 1.0007305759712533e-05,
|
|
"loss": 0.7735,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 0.6703198166782903,
|
|
"grad_norm": 0.22035417715886807,
|
|
"learning_rate": 9.985393817987444e-06,
|
|
"loss": 0.8073,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 0.6707183421341039,
|
|
"grad_norm": 0.23849505686477043,
|
|
"learning_rate": 9.963497907982532e-06,
|
|
"loss": 0.8026,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 0.6711168675899173,
|
|
"grad_norm": 0.2337573641381328,
|
|
"learning_rate": 9.94161806474951e-06,
|
|
"loss": 0.7889,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 0.6715153930457308,
|
|
"grad_norm": 0.9103894523595338,
|
|
"learning_rate": 9.919754323314372e-06,
|
|
"loss": 0.792,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.6719139185015442,
|
|
"grad_norm": 0.22893455291621617,
|
|
"learning_rate": 9.897906718677344e-06,
|
|
"loss": 0.782,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 0.6723124439573578,
|
|
"grad_norm": 0.2372100351252991,
|
|
"learning_rate": 9.87607528581279e-06,
|
|
"loss": 0.8011,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 0.6727109694131712,
|
|
"grad_norm": 0.23111628536958412,
|
|
"learning_rate": 9.854260059669225e-06,
|
|
"loss": 0.8025,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 0.6731094948689847,
|
|
"grad_norm": 0.2368927356235449,
|
|
"learning_rate": 9.832461075169184e-06,
|
|
"loss": 0.8033,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 0.6735080203247983,
|
|
"grad_norm": 0.22855325082673575,
|
|
"learning_rate": 9.810678367209227e-06,
|
|
"loss": 0.7911,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.6739065457806117,
|
|
"grad_norm": 0.27522701488615475,
|
|
"learning_rate": 9.788911970659848e-06,
|
|
"loss": 0.7916,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 0.6743050712364252,
|
|
"grad_norm": 0.24022760398565116,
|
|
"learning_rate": 9.767161920365431e-06,
|
|
"loss": 0.8037,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 0.6747035966922387,
|
|
"grad_norm": 0.22778160452010449,
|
|
"learning_rate": 9.7454282511442e-06,
|
|
"loss": 0.8169,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 0.6751021221480522,
|
|
"grad_norm": 0.2307169634206417,
|
|
"learning_rate": 9.723710997788134e-06,
|
|
"loss": 0.7951,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 0.6755006476038657,
|
|
"grad_norm": 0.2278130241658777,
|
|
"learning_rate": 9.702010195062957e-06,
|
|
"loss": 0.804,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.6758991730596792,
|
|
"grad_norm": 0.23860918505971207,
|
|
"learning_rate": 9.68032587770803e-06,
|
|
"loss": 0.7775,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 0.6762976985154927,
|
|
"grad_norm": 0.23206722403706048,
|
|
"learning_rate": 9.65865808043636e-06,
|
|
"loss": 0.7717,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 0.6766962239713061,
|
|
"grad_norm": 0.2424939487602499,
|
|
"learning_rate": 9.637006837934491e-06,
|
|
"loss": 0.8284,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 0.6770947494271197,
|
|
"grad_norm": 0.2422935170368267,
|
|
"learning_rate": 9.61537218486245e-06,
|
|
"loss": 0.7982,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 0.6774932748829331,
|
|
"grad_norm": 0.268912315082055,
|
|
"learning_rate": 9.593754155853736e-06,
|
|
"loss": 0.8025,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.6778918003387466,
|
|
"grad_norm": 0.24641465322988168,
|
|
"learning_rate": 9.572152785515206e-06,
|
|
"loss": 0.796,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 0.6782903257945602,
|
|
"grad_norm": 0.23523832181072415,
|
|
"learning_rate": 9.550568108427067e-06,
|
|
"loss": 0.7945,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 0.6786888512503736,
|
|
"grad_norm": 0.23985080041043766,
|
|
"learning_rate": 9.529000159142806e-06,
|
|
"loss": 0.7967,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 0.6790873767061871,
|
|
"grad_norm": 0.24109034221158648,
|
|
"learning_rate": 9.507448972189124e-06,
|
|
"loss": 0.809,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 0.6794859021620006,
|
|
"grad_norm": 0.3279682419994762,
|
|
"learning_rate": 9.485914582065893e-06,
|
|
"loss": 0.7976,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.6798844276178141,
|
|
"grad_norm": 0.24600000203117356,
|
|
"learning_rate": 9.464397023246086e-06,
|
|
"loss": 0.798,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 0.6802829530736276,
|
|
"grad_norm": 0.25198127703741363,
|
|
"learning_rate": 9.442896330175736e-06,
|
|
"loss": 0.7666,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 0.680681478529441,
|
|
"grad_norm": 0.24602873394094937,
|
|
"learning_rate": 9.421412537273888e-06,
|
|
"loss": 0.8296,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 0.6810800039852546,
|
|
"grad_norm": 0.2462861171716341,
|
|
"learning_rate": 9.399945678932518e-06,
|
|
"loss": 0.7671,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 0.681478529441068,
|
|
"grad_norm": 0.2356910774374406,
|
|
"learning_rate": 9.378495789516511e-06,
|
|
"loss": 0.8005,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.6818770548968816,
|
|
"grad_norm": 0.26676136395934497,
|
|
"learning_rate": 9.357062903363559e-06,
|
|
"loss": 0.7966,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 0.682275580352695,
|
|
"grad_norm": 0.22412533500879198,
|
|
"learning_rate": 9.335647054784163e-06,
|
|
"loss": 0.7837,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 0.6826741058085085,
|
|
"grad_norm": 0.24899863246739254,
|
|
"learning_rate": 9.314248278061524e-06,
|
|
"loss": 0.8113,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 0.683072631264322,
|
|
"grad_norm": 0.2518131395877076,
|
|
"learning_rate": 9.292866607451534e-06,
|
|
"loss": 0.7868,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 0.6834711567201355,
|
|
"grad_norm": 0.2384173486107651,
|
|
"learning_rate": 9.271502077182697e-06,
|
|
"loss": 0.7748,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.683869682175949,
|
|
"grad_norm": 0.5967497241397911,
|
|
"learning_rate": 9.250154721456075e-06,
|
|
"loss": 0.7962,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 0.6842682076317624,
|
|
"grad_norm": 0.24269806832216176,
|
|
"learning_rate": 9.22882457444524e-06,
|
|
"loss": 0.8026,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 0.684666733087576,
|
|
"grad_norm": 0.23438959649008212,
|
|
"learning_rate": 9.207511670296204e-06,
|
|
"loss": 0.795,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 0.6850652585433895,
|
|
"grad_norm": 0.24041761239392234,
|
|
"learning_rate": 9.186216043127388e-06,
|
|
"loss": 0.8214,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 0.6854637839992029,
|
|
"grad_norm": 0.2415192222064715,
|
|
"learning_rate": 9.16493772702955e-06,
|
|
"loss": 0.7907,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.6858623094550165,
|
|
"grad_norm": 0.25457580261405643,
|
|
"learning_rate": 9.143676756065752e-06,
|
|
"loss": 0.7912,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 0.6862608349108299,
|
|
"grad_norm": 0.24138741526314378,
|
|
"learning_rate": 9.122433164271252e-06,
|
|
"loss": 0.7952,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 0.6866593603666434,
|
|
"grad_norm": 0.23982959026182568,
|
|
"learning_rate": 9.101206985653523e-06,
|
|
"loss": 0.8109,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 0.6870578858224569,
|
|
"grad_norm": 0.23128247905861088,
|
|
"learning_rate": 9.079998254192157e-06,
|
|
"loss": 0.7996,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 0.6874564112782704,
|
|
"grad_norm": 0.23257176458111745,
|
|
"learning_rate": 9.058807003838792e-06,
|
|
"loss": 0.7959,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.6878549367340839,
|
|
"grad_norm": 0.2514299885659865,
|
|
"learning_rate": 9.037633268517105e-06,
|
|
"loss": 0.8007,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 0.6882534621898974,
|
|
"grad_norm": 0.2296427095516536,
|
|
"learning_rate": 9.016477082122727e-06,
|
|
"loss": 0.7671,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 0.6886519876457109,
|
|
"grad_norm": 0.24370730489409603,
|
|
"learning_rate": 8.995338478523206e-06,
|
|
"loss": 0.8123,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 0.6890505131015243,
|
|
"grad_norm": 0.23578511930028617,
|
|
"learning_rate": 8.974217491557916e-06,
|
|
"loss": 0.7964,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 0.6894490385573379,
|
|
"grad_norm": 0.23684202240770086,
|
|
"learning_rate": 8.953114155038059e-06,
|
|
"loss": 0.7808,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.6898475640131514,
|
|
"grad_norm": 0.22699784086777558,
|
|
"learning_rate": 8.932028502746563e-06,
|
|
"loss": 0.7959,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 0.6902460894689648,
|
|
"grad_norm": 0.24063862708544978,
|
|
"learning_rate": 8.910960568438058e-06,
|
|
"loss": 0.789,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 0.6906446149247784,
|
|
"grad_norm": 0.22874206732454588,
|
|
"learning_rate": 8.889910385838813e-06,
|
|
"loss": 0.7826,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 0.6910431403805918,
|
|
"grad_norm": 0.2250049276809127,
|
|
"learning_rate": 8.868877988646656e-06,
|
|
"loss": 0.7941,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 0.6914416658364053,
|
|
"grad_norm": 0.22799809229676088,
|
|
"learning_rate": 8.847863410530973e-06,
|
|
"loss": 0.8039,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.6918401912922187,
|
|
"grad_norm": 0.22068818384437014,
|
|
"learning_rate": 8.826866685132597e-06,
|
|
"loss": 0.764,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 0.6922387167480323,
|
|
"grad_norm": 0.23302636532036256,
|
|
"learning_rate": 8.805887846063793e-06,
|
|
"loss": 0.7814,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 0.6926372422038458,
|
|
"grad_norm": 0.2235081586612528,
|
|
"learning_rate": 8.784926926908228e-06,
|
|
"loss": 0.7906,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 0.6930357676596592,
|
|
"grad_norm": 0.23695689079275012,
|
|
"learning_rate": 8.763983961220818e-06,
|
|
"loss": 0.7948,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 0.6934342931154728,
|
|
"grad_norm": 0.24343892771165315,
|
|
"learning_rate": 8.74305898252779e-06,
|
|
"loss": 0.777,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.6938328185712862,
|
|
"grad_norm": 0.2403895498767754,
|
|
"learning_rate": 8.72215202432654e-06,
|
|
"loss": 0.8093,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 0.6942313440270997,
|
|
"grad_norm": 0.23104547501067635,
|
|
"learning_rate": 8.701263120085643e-06,
|
|
"loss": 0.7747,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 0.6946298694829133,
|
|
"grad_norm": 0.2399257360677753,
|
|
"learning_rate": 8.680392303244762e-06,
|
|
"loss": 0.7887,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 0.6950283949387267,
|
|
"grad_norm": 0.2298960897757004,
|
|
"learning_rate": 8.659539607214609e-06,
|
|
"loss": 0.805,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 0.6954269203945402,
|
|
"grad_norm": 0.22209674980320604,
|
|
"learning_rate": 8.638705065376887e-06,
|
|
"loss": 0.7882,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.6958254458503537,
|
|
"grad_norm": 0.22996129591563572,
|
|
"learning_rate": 8.617888711084225e-06,
|
|
"loss": 0.7907,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 0.6962239713061672,
|
|
"grad_norm": 0.23756147299275276,
|
|
"learning_rate": 8.597090577660158e-06,
|
|
"loss": 0.8248,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 0.6966224967619806,
|
|
"grad_norm": 0.23089712940348142,
|
|
"learning_rate": 8.576310698399031e-06,
|
|
"loss": 0.7827,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 0.6970210222177942,
|
|
"grad_norm": 0.22154445039007642,
|
|
"learning_rate": 8.555549106565981e-06,
|
|
"loss": 0.7987,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 0.6974195476736077,
|
|
"grad_norm": 0.2331241726825461,
|
|
"learning_rate": 8.534805835396866e-06,
|
|
"loss": 0.8262,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.6978180731294211,
|
|
"grad_norm": 0.22789526498273438,
|
|
"learning_rate": 8.514080918098218e-06,
|
|
"loss": 0.7886,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 0.6982165985852347,
|
|
"grad_norm": 0.21821146925663867,
|
|
"learning_rate": 8.49337438784719e-06,
|
|
"loss": 0.801,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 0.6986151240410481,
|
|
"grad_norm": 0.23508205049301503,
|
|
"learning_rate": 8.472686277791485e-06,
|
|
"loss": 0.7643,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 0.6990136494968616,
|
|
"grad_norm": 0.22461888065681415,
|
|
"learning_rate": 8.452016621049333e-06,
|
|
"loss": 0.7991,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 0.699412174952675,
|
|
"grad_norm": 0.21803368130601183,
|
|
"learning_rate": 8.431365450709419e-06,
|
|
"loss": 0.7987,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.6998107004084886,
|
|
"grad_norm": 0.23740898039198863,
|
|
"learning_rate": 8.410732799830845e-06,
|
|
"loss": 0.7915,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 0.7002092258643021,
|
|
"grad_norm": 0.261735854629893,
|
|
"learning_rate": 8.39011870144304e-06,
|
|
"loss": 0.7955,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 0.7006077513201155,
|
|
"grad_norm": 0.2180685253328265,
|
|
"learning_rate": 8.369523188545756e-06,
|
|
"loss": 0.8028,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 0.7010062767759291,
|
|
"grad_norm": 0.2301419951414697,
|
|
"learning_rate": 8.348946294108996e-06,
|
|
"loss": 0.8103,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 0.7014048022317425,
|
|
"grad_norm": 0.22024932183589127,
|
|
"learning_rate": 8.328388051072922e-06,
|
|
"loss": 0.7928,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.701803327687556,
|
|
"grad_norm": 0.4020336814790439,
|
|
"learning_rate": 8.307848492347899e-06,
|
|
"loss": 0.8011,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 0.7022018531433696,
|
|
"grad_norm": 0.22024662257821778,
|
|
"learning_rate": 8.287327650814323e-06,
|
|
"loss": 0.8119,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 0.702600378599183,
|
|
"grad_norm": 0.27996952780116363,
|
|
"learning_rate": 8.266825559322667e-06,
|
|
"loss": 0.7987,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 0.7029989040549965,
|
|
"grad_norm": 0.22630541171175222,
|
|
"learning_rate": 8.246342250693354e-06,
|
|
"loss": 0.817,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 0.70339742951081,
|
|
"grad_norm": 0.2290021039403852,
|
|
"learning_rate": 8.225877757716768e-06,
|
|
"loss": 0.7959,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.7037959549666235,
|
|
"grad_norm": 0.2216297139655694,
|
|
"learning_rate": 8.205432113153158e-06,
|
|
"loss": 0.7791,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 0.7041944804224369,
|
|
"grad_norm": 0.22804574889964005,
|
|
"learning_rate": 8.185005349732605e-06,
|
|
"loss": 0.8041,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 0.7045930058782505,
|
|
"grad_norm": 0.21915038552906846,
|
|
"learning_rate": 8.16459750015497e-06,
|
|
"loss": 0.7919,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 0.704991531334064,
|
|
"grad_norm": 0.23641715849802888,
|
|
"learning_rate": 8.144208597089814e-06,
|
|
"loss": 0.7684,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 0.7053900567898774,
|
|
"grad_norm": 0.23150971294969083,
|
|
"learning_rate": 8.123838673176396e-06,
|
|
"loss": 0.8268,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.705788582245691,
|
|
"grad_norm": 0.22487777470325962,
|
|
"learning_rate": 8.103487761023559e-06,
|
|
"loss": 0.7952,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 0.7061871077015044,
|
|
"grad_norm": 0.22359225895687845,
|
|
"learning_rate": 8.08315589320975e-06,
|
|
"loss": 0.7942,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 0.7065856331573179,
|
|
"grad_norm": 0.22728777425623412,
|
|
"learning_rate": 8.062843102282916e-06,
|
|
"loss": 0.7979,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 0.7069841586131315,
|
|
"grad_norm": 0.32242287769373923,
|
|
"learning_rate": 8.042549420760437e-06,
|
|
"loss": 0.7758,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 0.7073826840689449,
|
|
"grad_norm": 0.23211148591348726,
|
|
"learning_rate": 8.022274881129146e-06,
|
|
"loss": 0.7932,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.7077812095247584,
|
|
"grad_norm": 0.23149548150957583,
|
|
"learning_rate": 8.002019515845194e-06,
|
|
"loss": 0.781,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 0.7081797349805719,
|
|
"grad_norm": 0.23571260576059858,
|
|
"learning_rate": 7.981783357334061e-06,
|
|
"loss": 0.8099,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 0.7085782604363854,
|
|
"grad_norm": 0.23684120441719464,
|
|
"learning_rate": 7.961566437990475e-06,
|
|
"loss": 0.7925,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 0.7089767858921988,
|
|
"grad_norm": 0.23808835745048676,
|
|
"learning_rate": 7.941368790178365e-06,
|
|
"loss": 0.8035,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 0.7093753113480123,
|
|
"grad_norm": 0.24734022897944857,
|
|
"learning_rate": 7.921190446230813e-06,
|
|
"loss": 0.7797,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.7097738368038259,
|
|
"grad_norm": 0.2453484186566751,
|
|
"learning_rate": 7.901031438449982e-06,
|
|
"loss": 0.819,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 0.7101723622596393,
|
|
"grad_norm": 0.22709522154253955,
|
|
"learning_rate": 7.880891799107108e-06,
|
|
"loss": 0.8394,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 0.7105708877154528,
|
|
"grad_norm": 0.24346320063244078,
|
|
"learning_rate": 7.860771560442384e-06,
|
|
"loss": 0.8114,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 0.7109694131712663,
|
|
"grad_norm": 0.23923932846526716,
|
|
"learning_rate": 7.84067075466499e-06,
|
|
"loss": 0.7866,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 0.7113679386270798,
|
|
"grad_norm": 0.24156935661046483,
|
|
"learning_rate": 7.820589413952976e-06,
|
|
"loss": 0.7792,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.7117664640828933,
|
|
"grad_norm": 0.24507452424550918,
|
|
"learning_rate": 7.800527570453215e-06,
|
|
"loss": 0.7986,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 0.7121649895387068,
|
|
"grad_norm": 0.22251550647565904,
|
|
"learning_rate": 7.780485256281402e-06,
|
|
"loss": 0.7733,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 0.7125635149945203,
|
|
"grad_norm": 0.2426455233626753,
|
|
"learning_rate": 7.760462503521933e-06,
|
|
"loss": 0.7954,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 0.7129620404503337,
|
|
"grad_norm": 0.23577702373705983,
|
|
"learning_rate": 7.740459344227918e-06,
|
|
"loss": 0.7985,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 0.7133605659061473,
|
|
"grad_norm": 0.24472106889910925,
|
|
"learning_rate": 7.720475810421088e-06,
|
|
"loss": 0.7924,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.7137590913619607,
|
|
"grad_norm": 0.23276012167993276,
|
|
"learning_rate": 7.700511934091763e-06,
|
|
"loss": 0.8098,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 0.7141576168177742,
|
|
"grad_norm": 0.2227128937074685,
|
|
"learning_rate": 7.680567747198797e-06,
|
|
"loss": 0.8368,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 0.7145561422735878,
|
|
"grad_norm": 0.35446105784971366,
|
|
"learning_rate": 7.660643281669502e-06,
|
|
"loss": 0.7913,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 0.7149546677294012,
|
|
"grad_norm": 0.23973566003992375,
|
|
"learning_rate": 7.640738569399645e-06,
|
|
"loss": 0.8357,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 0.7153531931852147,
|
|
"grad_norm": 0.2391665089124275,
|
|
"learning_rate": 7.620853642253363e-06,
|
|
"loss": 0.8133,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.7157517186410282,
|
|
"grad_norm": 0.23522226900870816,
|
|
"learning_rate": 7.600988532063125e-06,
|
|
"loss": 0.7926,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 0.7161502440968417,
|
|
"grad_norm": 0.24554684562043907,
|
|
"learning_rate": 7.58114327062966e-06,
|
|
"loss": 0.7709,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 0.7165487695526552,
|
|
"grad_norm": 0.22714750497856911,
|
|
"learning_rate": 7.561317889721937e-06,
|
|
"loss": 0.7818,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 0.7169472950084687,
|
|
"grad_norm": 0.23827899963595306,
|
|
"learning_rate": 7.541512421077106e-06,
|
|
"loss": 0.7728,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 0.7173458204642822,
|
|
"grad_norm": 0.48457590255842975,
|
|
"learning_rate": 7.521726896400414e-06,
|
|
"loss": 0.7739,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.7177443459200956,
|
|
"grad_norm": 0.23582475280902745,
|
|
"learning_rate": 7.50196134736521e-06,
|
|
"loss": 0.8168,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 0.7181428713759092,
|
|
"grad_norm": 0.2380146361056826,
|
|
"learning_rate": 7.482215805612847e-06,
|
|
"loss": 0.7779,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 0.7185413968317226,
|
|
"grad_norm": 0.2360276005567584,
|
|
"learning_rate": 7.462490302752665e-06,
|
|
"loss": 0.7864,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 0.7189399222875361,
|
|
"grad_norm": 0.22814213757245871,
|
|
"learning_rate": 7.442784870361903e-06,
|
|
"loss": 0.8191,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 0.7193384477433497,
|
|
"grad_norm": 0.24107281393643026,
|
|
"learning_rate": 7.42309953998569e-06,
|
|
"loss": 0.7838,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.7197369731991631,
|
|
"grad_norm": 0.24232433035462758,
|
|
"learning_rate": 7.4034343431369685e-06,
|
|
"loss": 0.7977,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 0.7201354986549766,
|
|
"grad_norm": 0.38084230051806445,
|
|
"learning_rate": 7.38378931129645e-06,
|
|
"loss": 0.8043,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 0.72053402411079,
|
|
"grad_norm": 0.24397034947179694,
|
|
"learning_rate": 7.364164475912572e-06,
|
|
"loss": 0.8068,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 0.7209325495666036,
|
|
"grad_norm": 0.4613176607526505,
|
|
"learning_rate": 7.344559868401422e-06,
|
|
"loss": 0.7877,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 0.7213310750224171,
|
|
"grad_norm": 0.23005075594522995,
|
|
"learning_rate": 7.3249755201467335e-06,
|
|
"loss": 0.7722,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.7217296004782305,
|
|
"grad_norm": 0.2387695579592527,
|
|
"learning_rate": 7.305411462499776e-06,
|
|
"loss": 0.8201,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 0.7221281259340441,
|
|
"grad_norm": 0.2344269204447853,
|
|
"learning_rate": 7.2858677267793635e-06,
|
|
"loss": 0.7815,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 0.7225266513898575,
|
|
"grad_norm": 0.2280635583340256,
|
|
"learning_rate": 7.26634434427177e-06,
|
|
"loss": 0.7814,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 0.722925176845671,
|
|
"grad_norm": 0.2328509307005202,
|
|
"learning_rate": 7.246841346230684e-06,
|
|
"loss": 0.7695,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 0.7233237023014845,
|
|
"grad_norm": 0.2237984273349448,
|
|
"learning_rate": 7.227358763877172e-06,
|
|
"loss": 0.8082,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.723722227757298,
|
|
"grad_norm": 0.24293928069372236,
|
|
"learning_rate": 7.207896628399598e-06,
|
|
"loss": 0.8018,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 0.7241207532131115,
|
|
"grad_norm": 0.22708584207065824,
|
|
"learning_rate": 7.1884549709536115e-06,
|
|
"loss": 0.788,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 0.724519278668925,
|
|
"grad_norm": 0.23024391469364716,
|
|
"learning_rate": 7.169033822662077e-06,
|
|
"loss": 0.7722,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 0.7249178041247385,
|
|
"grad_norm": 0.21908469252061188,
|
|
"learning_rate": 7.149633214615022e-06,
|
|
"loss": 0.7757,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 0.7253163295805519,
|
|
"grad_norm": 0.23374912363797343,
|
|
"learning_rate": 7.130253177869606e-06,
|
|
"loss": 0.8123,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.7257148550363655,
|
|
"grad_norm": 0.23339945263366027,
|
|
"learning_rate": 7.1108937434500335e-06,
|
|
"loss": 0.8145,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 0.7261133804921789,
|
|
"grad_norm": 0.22566815004670457,
|
|
"learning_rate": 7.091554942347551e-06,
|
|
"loss": 0.7879,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 0.7265119059479924,
|
|
"grad_norm": 0.22495869682272615,
|
|
"learning_rate": 7.072236805520358e-06,
|
|
"loss": 0.7979,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 0.726910431403806,
|
|
"grad_norm": 0.2376828902036485,
|
|
"learning_rate": 7.052939363893583e-06,
|
|
"loss": 0.8208,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 0.7273089568596194,
|
|
"grad_norm": 0.23450024068687056,
|
|
"learning_rate": 7.033662648359225e-06,
|
|
"loss": 0.7824,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.7277074823154329,
|
|
"grad_norm": 0.22685374818541473,
|
|
"learning_rate": 7.014406689776101e-06,
|
|
"loss": 0.7876,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 0.7281060077712463,
|
|
"grad_norm": 0.23011276016836252,
|
|
"learning_rate": 6.995171518969808e-06,
|
|
"loss": 0.8075,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 0.7285045332270599,
|
|
"grad_norm": 0.24933093286417946,
|
|
"learning_rate": 6.975957166732645e-06,
|
|
"loss": 0.7662,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 0.7289030586828734,
|
|
"grad_norm": 0.22506531353014372,
|
|
"learning_rate": 6.956763663823602e-06,
|
|
"loss": 0.7808,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 0.7293015841386868,
|
|
"grad_norm": 0.23401655584722747,
|
|
"learning_rate": 6.937591040968288e-06,
|
|
"loss": 0.8209,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.7297001095945004,
|
|
"grad_norm": 0.24774972767529824,
|
|
"learning_rate": 6.918439328858892e-06,
|
|
"loss": 0.7712,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 0.7300986350503138,
|
|
"grad_norm": 0.23342909513340782,
|
|
"learning_rate": 6.89930855815411e-06,
|
|
"loss": 0.7994,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 0.7304971605061273,
|
|
"grad_norm": 0.22394459360997282,
|
|
"learning_rate": 6.880198759479133e-06,
|
|
"loss": 0.8042,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 0.7308956859619408,
|
|
"grad_norm": 0.23360743949550875,
|
|
"learning_rate": 6.861109963425578e-06,
|
|
"loss": 0.7916,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 0.7312942114177543,
|
|
"grad_norm": 0.22281906219641856,
|
|
"learning_rate": 6.8420422005514266e-06,
|
|
"loss": 0.8137,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.7316927368735678,
|
|
"grad_norm": 0.22014312278105563,
|
|
"learning_rate": 6.822995501380998e-06,
|
|
"loss": 0.8021,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 0.7320912623293813,
|
|
"grad_norm": 0.2257715944227968,
|
|
"learning_rate": 6.803969896404896e-06,
|
|
"loss": 0.784,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 0.7324897877851948,
|
|
"grad_norm": 0.24155855616319677,
|
|
"learning_rate": 6.784965416079961e-06,
|
|
"loss": 0.7933,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 0.7328883132410082,
|
|
"grad_norm": 0.22107207590046762,
|
|
"learning_rate": 6.765982090829189e-06,
|
|
"loss": 0.784,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 0.7332868386968218,
|
|
"grad_norm": 0.21216318175362134,
|
|
"learning_rate": 6.74701995104174e-06,
|
|
"loss": 0.8023,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.7336853641526353,
|
|
"grad_norm": 0.2513348774684416,
|
|
"learning_rate": 6.728079027072847e-06,
|
|
"loss": 0.8255,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 0.7340838896084487,
|
|
"grad_norm": 0.23421026990778565,
|
|
"learning_rate": 6.709159349243781e-06,
|
|
"loss": 0.8255,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 0.7344824150642623,
|
|
"grad_norm": 0.20679965719103174,
|
|
"learning_rate": 6.690260947841809e-06,
|
|
"loss": 0.7863,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 0.7348809405200757,
|
|
"grad_norm": 0.24196895097156834,
|
|
"learning_rate": 6.671383853120117e-06,
|
|
"loss": 0.8162,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 0.7352794659758892,
|
|
"grad_norm": 0.23539184150189893,
|
|
"learning_rate": 6.652528095297812e-06,
|
|
"loss": 0.7788,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.7356779914317026,
|
|
"grad_norm": 0.2158639231432844,
|
|
"learning_rate": 6.633693704559814e-06,
|
|
"loss": 0.8077,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 0.7360765168875162,
|
|
"grad_norm": 0.23071528135591446,
|
|
"learning_rate": 6.614880711056853e-06,
|
|
"loss": 0.7774,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 0.7364750423433297,
|
|
"grad_norm": 0.22552702501791788,
|
|
"learning_rate": 6.596089144905422e-06,
|
|
"loss": 0.7794,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 0.7368735677991431,
|
|
"grad_norm": 0.2330734404526342,
|
|
"learning_rate": 6.577319036187679e-06,
|
|
"loss": 0.79,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 0.7372720932549567,
|
|
"grad_norm": 0.2265375246131879,
|
|
"learning_rate": 6.558570414951462e-06,
|
|
"loss": 0.7922,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.7376706187107701,
|
|
"grad_norm": 0.22667338696640402,
|
|
"learning_rate": 6.539843311210181e-06,
|
|
"loss": 0.7796,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 0.7380691441665836,
|
|
"grad_norm": 0.23040531636916783,
|
|
"learning_rate": 6.521137754942828e-06,
|
|
"loss": 0.8163,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 0.7384676696223972,
|
|
"grad_norm": 0.22397477455791673,
|
|
"learning_rate": 6.5024537760938886e-06,
|
|
"loss": 0.8049,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 0.7388661950782106,
|
|
"grad_norm": 0.21837702568211942,
|
|
"learning_rate": 6.483791404573305e-06,
|
|
"loss": 0.7899,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 0.7392647205340241,
|
|
"grad_norm": 0.23621768578628966,
|
|
"learning_rate": 6.465150670256441e-06,
|
|
"loss": 0.8131,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.7396632459898376,
|
|
"grad_norm": 0.22441226758524066,
|
|
"learning_rate": 6.446531602984003e-06,
|
|
"loss": 0.8044,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 0.7400617714456511,
|
|
"grad_norm": 0.21742047573106374,
|
|
"learning_rate": 6.427934232562034e-06,
|
|
"loss": 0.7779,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 0.7404602969014645,
|
|
"grad_norm": 0.2177698894735104,
|
|
"learning_rate": 6.409358588761814e-06,
|
|
"loss": 0.7894,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 0.7408588223572781,
|
|
"grad_norm": 0.22916632915750462,
|
|
"learning_rate": 6.39080470131989e-06,
|
|
"loss": 0.7928,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 0.7412573478130916,
|
|
"grad_norm": 0.22082966691884467,
|
|
"learning_rate": 6.37227259993793e-06,
|
|
"loss": 0.7915,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.741655873268905,
|
|
"grad_norm": 0.2241200766337397,
|
|
"learning_rate": 6.353762314282757e-06,
|
|
"loss": 0.7779,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 0.7420543987247186,
|
|
"grad_norm": 0.23702387172593264,
|
|
"learning_rate": 6.335273873986267e-06,
|
|
"loss": 0.7829,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 0.742452924180532,
|
|
"grad_norm": 0.2527038905168017,
|
|
"learning_rate": 6.316807308645367e-06,
|
|
"loss": 0.7829,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 0.7428514496363455,
|
|
"grad_norm": 0.23475628446887611,
|
|
"learning_rate": 6.2983626478219695e-06,
|
|
"loss": 0.7999,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 0.7432499750921591,
|
|
"grad_norm": 0.23416030882805897,
|
|
"learning_rate": 6.279939921042906e-06,
|
|
"loss": 0.8085,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.7436485005479725,
|
|
"grad_norm": 0.23262020269941716,
|
|
"learning_rate": 6.261539157799912e-06,
|
|
"loss": 0.8256,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 0.744047026003786,
|
|
"grad_norm": 0.217504432107485,
|
|
"learning_rate": 6.243160387549534e-06,
|
|
"loss": 0.7919,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 0.7444455514595995,
|
|
"grad_norm": 0.22220778420283688,
|
|
"learning_rate": 6.224803639713138e-06,
|
|
"loss": 0.7531,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 0.744844076915413,
|
|
"grad_norm": 0.21437200486409036,
|
|
"learning_rate": 6.206468943676831e-06,
|
|
"loss": 0.7965,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 0.7452426023712264,
|
|
"grad_norm": 0.23487795253335572,
|
|
"learning_rate": 6.188156328791397e-06,
|
|
"loss": 0.8301,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.74564112782704,
|
|
"grad_norm": 0.21763886551801245,
|
|
"learning_rate": 6.169865824372314e-06,
|
|
"loss": 0.7875,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 0.7460396532828535,
|
|
"grad_norm": 0.22604818846373181,
|
|
"learning_rate": 6.151597459699621e-06,
|
|
"loss": 0.8054,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 0.7464381787386669,
|
|
"grad_norm": 0.21771303595209707,
|
|
"learning_rate": 6.133351264017939e-06,
|
|
"loss": 0.7735,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 0.7468367041944804,
|
|
"grad_norm": 0.21715354774157822,
|
|
"learning_rate": 6.115127266536403e-06,
|
|
"loss": 0.7762,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 0.7472352296502939,
|
|
"grad_norm": 0.2157960601894358,
|
|
"learning_rate": 6.0969254964285895e-06,
|
|
"loss": 0.8153,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.7476337551061074,
|
|
"grad_norm": 0.22332780451488388,
|
|
"learning_rate": 6.0787459828325166e-06,
|
|
"loss": 0.8143,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 0.748032280561921,
|
|
"grad_norm": 0.2309153231971099,
|
|
"learning_rate": 6.060588754850562e-06,
|
|
"loss": 0.7899,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 0.7484308060177344,
|
|
"grad_norm": 0.22898127613887323,
|
|
"learning_rate": 6.042453841549438e-06,
|
|
"loss": 0.8309,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 0.7488293314735479,
|
|
"grad_norm": 0.21931059736091962,
|
|
"learning_rate": 6.024341271960112e-06,
|
|
"loss": 0.7921,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 0.7492278569293613,
|
|
"grad_norm": 0.23434936881308505,
|
|
"learning_rate": 6.006251075077809e-06,
|
|
"loss": 0.7799,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.7496263823851749,
|
|
"grad_norm": 0.2372270380137871,
|
|
"learning_rate": 5.988183279861921e-06,
|
|
"loss": 0.7829,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 0.7500249078409883,
|
|
"grad_norm": 0.22942099098861327,
|
|
"learning_rate": 5.970137915235992e-06,
|
|
"loss": 0.7918,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 0.7504234332968018,
|
|
"grad_norm": 0.2355040611383991,
|
|
"learning_rate": 5.952115010087654e-06,
|
|
"loss": 0.835,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 0.7508219587526154,
|
|
"grad_norm": 0.2239708740237137,
|
|
"learning_rate": 5.934114593268572e-06,
|
|
"loss": 0.7781,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 0.7512204842084288,
|
|
"grad_norm": 0.21984896769317516,
|
|
"learning_rate": 5.916136693594434e-06,
|
|
"loss": 0.7862,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.7516190096642423,
|
|
"grad_norm": 0.2197233848994438,
|
|
"learning_rate": 5.898181339844858e-06,
|
|
"loss": 0.8147,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 0.7520175351200558,
|
|
"grad_norm": 0.21853538967964484,
|
|
"learning_rate": 5.880248560763384e-06,
|
|
"loss": 0.7897,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 0.7524160605758693,
|
|
"grad_norm": 0.2251548690545732,
|
|
"learning_rate": 5.862338385057416e-06,
|
|
"loss": 0.7984,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 0.7528145860316827,
|
|
"grad_norm": 0.21585033327673825,
|
|
"learning_rate": 5.844450841398166e-06,
|
|
"loss": 0.7953,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 0.7532131114874963,
|
|
"grad_norm": 0.22933572814422915,
|
|
"learning_rate": 5.826585958420625e-06,
|
|
"loss": 0.8006,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.7536116369433098,
|
|
"grad_norm": 0.22747479613099156,
|
|
"learning_rate": 5.80874376472349e-06,
|
|
"loss": 0.7598,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 0.7540101623991232,
|
|
"grad_norm": 0.21512314765889684,
|
|
"learning_rate": 5.790924288869162e-06,
|
|
"loss": 0.8148,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 0.7544086878549368,
|
|
"grad_norm": 0.33438808323630886,
|
|
"learning_rate": 5.773127559383638e-06,
|
|
"loss": 0.7554,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 0.7548072133107502,
|
|
"grad_norm": 0.22483670938682515,
|
|
"learning_rate": 5.755353604756544e-06,
|
|
"loss": 0.784,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 0.7552057387665637,
|
|
"grad_norm": 0.21592647946477764,
|
|
"learning_rate": 5.737602453441032e-06,
|
|
"loss": 0.7715,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.7556042642223773,
|
|
"grad_norm": 0.21691744670655036,
|
|
"learning_rate": 5.719874133853725e-06,
|
|
"loss": 0.7909,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 0.7560027896781907,
|
|
"grad_norm": 0.23150710281578893,
|
|
"learning_rate": 5.702168674374735e-06,
|
|
"loss": 0.7983,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 0.7564013151340042,
|
|
"grad_norm": 0.22053519786366013,
|
|
"learning_rate": 5.6844861033475466e-06,
|
|
"loss": 0.764,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 0.7567998405898176,
|
|
"grad_norm": 0.21199239099110317,
|
|
"learning_rate": 5.666826449079022e-06,
|
|
"loss": 0.7872,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 0.7571983660456312,
|
|
"grad_norm": 0.33723343359752794,
|
|
"learning_rate": 5.649189739839331e-06,
|
|
"loss": 0.8006,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.7575968915014446,
|
|
"grad_norm": 0.22529144997723208,
|
|
"learning_rate": 5.63157600386192e-06,
|
|
"loss": 0.8264,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 0.7579954169572581,
|
|
"grad_norm": 0.21629640216592316,
|
|
"learning_rate": 5.613985269343456e-06,
|
|
"loss": 0.7854,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 0.7583939424130717,
|
|
"grad_norm": 0.22311405638594484,
|
|
"learning_rate": 5.596417564443768e-06,
|
|
"loss": 0.7773,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 0.7587924678688851,
|
|
"grad_norm": 0.21547315103858006,
|
|
"learning_rate": 5.578872917285838e-06,
|
|
"loss": 0.7626,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 0.7591909933246986,
|
|
"grad_norm": 0.22382658871923508,
|
|
"learning_rate": 5.561351355955733e-06,
|
|
"loss": 0.8059,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.7595895187805121,
|
|
"grad_norm": 0.22341672646153143,
|
|
"learning_rate": 5.543852908502565e-06,
|
|
"loss": 0.7624,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 0.7599880442363256,
|
|
"grad_norm": 0.21972426758841143,
|
|
"learning_rate": 5.526377602938429e-06,
|
|
"loss": 0.8004,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 0.7603865696921391,
|
|
"grad_norm": 0.20999907442340116,
|
|
"learning_rate": 5.508925467238391e-06,
|
|
"loss": 0.7865,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 0.7607850951479526,
|
|
"grad_norm": 0.21874631069378098,
|
|
"learning_rate": 5.491496529340425e-06,
|
|
"loss": 0.782,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 0.7611836206037661,
|
|
"grad_norm": 0.2171739766459026,
|
|
"learning_rate": 5.474090817145352e-06,
|
|
"loss": 0.817,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.7615821460595795,
|
|
"grad_norm": 0.23395913286116207,
|
|
"learning_rate": 5.456708358516833e-06,
|
|
"loss": 0.7909,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 0.7619806715153931,
|
|
"grad_norm": 0.3909635390360292,
|
|
"learning_rate": 5.439349181281293e-06,
|
|
"loss": 0.783,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 0.7623791969712065,
|
|
"grad_norm": 0.21817745960660756,
|
|
"learning_rate": 5.422013313227896e-06,
|
|
"loss": 0.7968,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 0.76277772242702,
|
|
"grad_norm": 0.2129422416400334,
|
|
"learning_rate": 5.404700782108476e-06,
|
|
"loss": 0.7986,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 0.7631762478828336,
|
|
"grad_norm": 0.24894816442926734,
|
|
"learning_rate": 5.387411615637521e-06,
|
|
"loss": 0.7838,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.763574773338647,
|
|
"grad_norm": 0.21380121079954537,
|
|
"learning_rate": 5.370145841492116e-06,
|
|
"loss": 0.8042,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 0.7639732987944605,
|
|
"grad_norm": 0.2145326012345622,
|
|
"learning_rate": 5.352903487311893e-06,
|
|
"loss": 0.7684,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 0.764371824250274,
|
|
"grad_norm": 0.2238740099248399,
|
|
"learning_rate": 5.3356845806990054e-06,
|
|
"loss": 0.7789,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 0.7647703497060875,
|
|
"grad_norm": 0.23241336202019805,
|
|
"learning_rate": 5.318489149218047e-06,
|
|
"loss": 0.7955,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 0.765168875161901,
|
|
"grad_norm": 0.22274065294729253,
|
|
"learning_rate": 5.301317220396056e-06,
|
|
"loss": 0.7971,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.7655674006177144,
|
|
"grad_norm": 0.21450551669208287,
|
|
"learning_rate": 5.284168821722429e-06,
|
|
"loss": 0.8039,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 0.765965926073528,
|
|
"grad_norm": 0.22005637491103672,
|
|
"learning_rate": 5.267043980648905e-06,
|
|
"loss": 0.7785,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 0.7663644515293414,
|
|
"grad_norm": 0.21711685516462279,
|
|
"learning_rate": 5.249942724589508e-06,
|
|
"loss": 0.7748,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 0.7667629769851549,
|
|
"grad_norm": 0.21195870427677962,
|
|
"learning_rate": 5.23286508092051e-06,
|
|
"loss": 0.7791,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 0.7671615024409684,
|
|
"grad_norm": 0.2215540780948147,
|
|
"learning_rate": 5.215811076980384e-06,
|
|
"loss": 0.7867,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.7675600278967819,
|
|
"grad_norm": 0.2134811799235333,
|
|
"learning_rate": 5.1987807400697465e-06,
|
|
"loss": 0.8204,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 0.7679585533525954,
|
|
"grad_norm": 0.21126480142948123,
|
|
"learning_rate": 5.1817740974513394e-06,
|
|
"loss": 0.7744,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 0.7683570788084089,
|
|
"grad_norm": 0.21093921074309108,
|
|
"learning_rate": 5.164791176349975e-06,
|
|
"loss": 0.7804,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 0.7687556042642224,
|
|
"grad_norm": 0.22232833723691933,
|
|
"learning_rate": 5.147832003952482e-06,
|
|
"loss": 0.8122,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 0.7691541297200358,
|
|
"grad_norm": 0.21135760176592855,
|
|
"learning_rate": 5.130896607407689e-06,
|
|
"loss": 0.7837,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.7695526551758494,
|
|
"grad_norm": 0.21690410153487147,
|
|
"learning_rate": 5.113985013826337e-06,
|
|
"loss": 0.8333,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 0.7699511806316629,
|
|
"grad_norm": 0.22611226851018745,
|
|
"learning_rate": 5.097097250281089e-06,
|
|
"loss": 0.8336,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 0.7703497060874763,
|
|
"grad_norm": 0.21422680254932244,
|
|
"learning_rate": 5.080233343806435e-06,
|
|
"loss": 0.7925,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 0.7707482315432899,
|
|
"grad_norm": 0.21725411912202952,
|
|
"learning_rate": 5.063393321398693e-06,
|
|
"loss": 0.7682,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 0.7711467569991033,
|
|
"grad_norm": 0.20486094819815992,
|
|
"learning_rate": 5.046577210015941e-06,
|
|
"loss": 0.7698,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.7715452824549168,
|
|
"grad_norm": 0.21116949065534618,
|
|
"learning_rate": 5.029785036577976e-06,
|
|
"loss": 0.7839,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 0.7719438079107303,
|
|
"grad_norm": 0.21365660447596332,
|
|
"learning_rate": 5.013016827966289e-06,
|
|
"loss": 0.794,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 0.7723423333665438,
|
|
"grad_norm": 0.21986116163132582,
|
|
"learning_rate": 4.996272611023978e-06,
|
|
"loss": 0.8004,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 0.7727408588223573,
|
|
"grad_norm": 0.21667082564742637,
|
|
"learning_rate": 4.979552412555757e-06,
|
|
"loss": 0.7955,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 0.7731393842781707,
|
|
"grad_norm": 0.2131311718527391,
|
|
"learning_rate": 4.962856259327888e-06,
|
|
"loss": 0.8222,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.7735379097339843,
|
|
"grad_norm": 0.20312498370931167,
|
|
"learning_rate": 4.946184178068145e-06,
|
|
"loss": 0.7777,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 0.7739364351897977,
|
|
"grad_norm": 0.21157244173886958,
|
|
"learning_rate": 4.929536195465743e-06,
|
|
"loss": 0.7674,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 0.7743349606456112,
|
|
"grad_norm": 0.21401144119856197,
|
|
"learning_rate": 4.9129123381713426e-06,
|
|
"loss": 0.8245,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 0.7747334861014247,
|
|
"grad_norm": 0.21771908112415073,
|
|
"learning_rate": 4.8963126327969844e-06,
|
|
"loss": 0.8122,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 0.7751320115572382,
|
|
"grad_norm": 0.21187987139599745,
|
|
"learning_rate": 4.879737105916021e-06,
|
|
"loss": 0.8179,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.7755305370130517,
|
|
"grad_norm": 0.20845520286257718,
|
|
"learning_rate": 4.863185784063136e-06,
|
|
"loss": 0.7991,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 0.7759290624688652,
|
|
"grad_norm": 0.21881307944899714,
|
|
"learning_rate": 4.8466586937342315e-06,
|
|
"loss": 0.7715,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 0.7763275879246787,
|
|
"grad_norm": 0.22037508987905377,
|
|
"learning_rate": 4.830155861386441e-06,
|
|
"loss": 0.8178,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 0.7767261133804921,
|
|
"grad_norm": 0.2188466732998409,
|
|
"learning_rate": 4.813677313438045e-06,
|
|
"loss": 0.7931,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 0.7771246388363057,
|
|
"grad_norm": 0.22029271333920605,
|
|
"learning_rate": 4.7972230762684695e-06,
|
|
"loss": 0.7962,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.7775231642921192,
|
|
"grad_norm": 0.21586985458048003,
|
|
"learning_rate": 4.78079317621821e-06,
|
|
"loss": 0.8035,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 0.7779216897479326,
|
|
"grad_norm": 0.2122373168935699,
|
|
"learning_rate": 4.7643876395888076e-06,
|
|
"loss": 0.7668,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 0.7783202152037462,
|
|
"grad_norm": 0.20775917857186701,
|
|
"learning_rate": 4.748006492642805e-06,
|
|
"loss": 0.7786,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 0.7787187406595596,
|
|
"grad_norm": 0.21569140886208557,
|
|
"learning_rate": 4.731649761603685e-06,
|
|
"loss": 0.8067,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 0.7791172661153731,
|
|
"grad_norm": 0.2131646673455944,
|
|
"learning_rate": 4.715317472655863e-06,
|
|
"loss": 0.7971,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.7795157915711866,
|
|
"grad_norm": 0.2146175074423186,
|
|
"learning_rate": 4.699009651944622e-06,
|
|
"loss": 0.777,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 0.7799143170270001,
|
|
"grad_norm": 0.21312837734855186,
|
|
"learning_rate": 4.682726325576059e-06,
|
|
"loss": 0.7932,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 0.7803128424828136,
|
|
"grad_norm": 0.21781795703518547,
|
|
"learning_rate": 4.666467519617093e-06,
|
|
"loss": 0.8004,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 0.780711367938627,
|
|
"grad_norm": 0.21181093024914874,
|
|
"learning_rate": 4.650233260095354e-06,
|
|
"loss": 0.7586,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 0.7811098933944406,
|
|
"grad_norm": 0.21750201665933414,
|
|
"learning_rate": 4.634023572999207e-06,
|
|
"loss": 0.8103,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.781508418850254,
|
|
"grad_norm": 0.21261609028271256,
|
|
"learning_rate": 4.617838484277654e-06,
|
|
"loss": 0.7794,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 0.7819069443060676,
|
|
"grad_norm": 0.22127702762736784,
|
|
"learning_rate": 4.601678019840339e-06,
|
|
"loss": 0.824,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 0.7823054697618811,
|
|
"grad_norm": 0.21167895347901275,
|
|
"learning_rate": 4.585542205557478e-06,
|
|
"loss": 0.7872,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 0.7827039952176945,
|
|
"grad_norm": 0.20443014284749786,
|
|
"learning_rate": 4.569431067259828e-06,
|
|
"loss": 0.768,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 0.783102520673508,
|
|
"grad_norm": 0.21508398213351645,
|
|
"learning_rate": 4.553344630738654e-06,
|
|
"loss": 0.7972,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.7835010461293215,
|
|
"grad_norm": 0.21284922880197987,
|
|
"learning_rate": 4.5372829217456515e-06,
|
|
"loss": 0.7877,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 0.783899571585135,
|
|
"grad_norm": 0.21149964459483625,
|
|
"learning_rate": 4.5212459659929596e-06,
|
|
"loss": 0.8317,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 0.7842980970409484,
|
|
"grad_norm": 0.20959662240837698,
|
|
"learning_rate": 4.505233789153063e-06,
|
|
"loss": 0.7761,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 0.784696622496762,
|
|
"grad_norm": 0.21566004770178748,
|
|
"learning_rate": 4.489246416858814e-06,
|
|
"loss": 0.7787,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 0.7850951479525755,
|
|
"grad_norm": 0.20948032542954348,
|
|
"learning_rate": 4.473283874703336e-06,
|
|
"loss": 0.8001,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.7854936734083889,
|
|
"grad_norm": 0.21171612340758303,
|
|
"learning_rate": 4.457346188239997e-06,
|
|
"loss": 0.7846,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 0.7858921988642025,
|
|
"grad_norm": 0.211495224788516,
|
|
"learning_rate": 4.4414333829823944e-06,
|
|
"loss": 0.8205,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 0.7862907243200159,
|
|
"grad_norm": 0.21182971426196345,
|
|
"learning_rate": 4.425545484404272e-06,
|
|
"loss": 0.817,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 0.7866892497758294,
|
|
"grad_norm": 0.20652359587837626,
|
|
"learning_rate": 4.409682517939527e-06,
|
|
"loss": 0.7975,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 0.787087775231643,
|
|
"grad_norm": 0.2039383627589195,
|
|
"learning_rate": 4.393844508982124e-06,
|
|
"loss": 0.7934,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.7874863006874564,
|
|
"grad_norm": 0.20780785483145897,
|
|
"learning_rate": 4.3780314828860895e-06,
|
|
"loss": 0.7954,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 0.7878848261432699,
|
|
"grad_norm": 0.2072740025638685,
|
|
"learning_rate": 4.362243464965452e-06,
|
|
"loss": 0.7901,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 0.7882833515990834,
|
|
"grad_norm": 0.19867758615892187,
|
|
"learning_rate": 4.346480480494197e-06,
|
|
"loss": 0.7606,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 0.7886818770548969,
|
|
"grad_norm": 0.21773075945607415,
|
|
"learning_rate": 4.330742554706251e-06,
|
|
"loss": 0.8123,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 0.7890804025107103,
|
|
"grad_norm": 0.20266873734956298,
|
|
"learning_rate": 4.315029712795404e-06,
|
|
"loss": 0.799,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.7894789279665239,
|
|
"grad_norm": 0.20650482471845288,
|
|
"learning_rate": 4.299341979915324e-06,
|
|
"loss": 0.7972,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 0.7898774534223374,
|
|
"grad_norm": 0.20847406865766804,
|
|
"learning_rate": 4.283679381179449e-06,
|
|
"loss": 0.8187,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 0.7902759788781508,
|
|
"grad_norm": 0.2077737716719368,
|
|
"learning_rate": 4.268041941660998e-06,
|
|
"loss": 0.8032,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 0.7906745043339644,
|
|
"grad_norm": 0.20859031258363198,
|
|
"learning_rate": 4.252429686392927e-06,
|
|
"loss": 0.7706,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 0.7910730297897778,
|
|
"grad_norm": 0.20953564600107155,
|
|
"learning_rate": 4.236842640367844e-06,
|
|
"loss": 0.7902,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.7914715552455913,
|
|
"grad_norm": 0.1998647822957012,
|
|
"learning_rate": 4.221280828538028e-06,
|
|
"loss": 0.785,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 0.7918700807014049,
|
|
"grad_norm": 0.2109037742269456,
|
|
"learning_rate": 4.205744275815351e-06,
|
|
"loss": 0.788,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 0.7922686061572183,
|
|
"grad_norm": 0.3093393907121497,
|
|
"learning_rate": 4.19023300707126e-06,
|
|
"loss": 0.8089,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 0.7926671316130318,
|
|
"grad_norm": 0.21256297107207034,
|
|
"learning_rate": 4.174747047136707e-06,
|
|
"loss": 0.7745,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 0.7930656570688452,
|
|
"grad_norm": 0.5160365968905928,
|
|
"learning_rate": 4.159286420802144e-06,
|
|
"loss": 0.7948,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.7934641825246588,
|
|
"grad_norm": 0.21126289660765277,
|
|
"learning_rate": 4.1438511528174665e-06,
|
|
"loss": 0.7918,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 0.7938627079804722,
|
|
"grad_norm": 0.21794744648330014,
|
|
"learning_rate": 4.1284412678919715e-06,
|
|
"loss": 0.7843,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 0.7942612334362857,
|
|
"grad_norm": 0.20868906992268485,
|
|
"learning_rate": 4.11305679069433e-06,
|
|
"loss": 0.8017,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 0.7946597588920993,
|
|
"grad_norm": 0.21719069879632263,
|
|
"learning_rate": 4.097697745852522e-06,
|
|
"loss": 0.7973,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 0.7950582843479127,
|
|
"grad_norm": 0.21142187004817078,
|
|
"learning_rate": 4.08236415795384e-06,
|
|
"loss": 0.7814,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.7954568098037262,
|
|
"grad_norm": 0.2039420161311614,
|
|
"learning_rate": 4.067056051544793e-06,
|
|
"loss": 0.7889,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 0.7958553352595397,
|
|
"grad_norm": 0.24194928974109936,
|
|
"learning_rate": 4.051773451131127e-06,
|
|
"loss": 0.7682,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 0.7962538607153532,
|
|
"grad_norm": 0.2012545890604259,
|
|
"learning_rate": 4.036516381177742e-06,
|
|
"loss": 0.7782,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 0.7966523861711667,
|
|
"grad_norm": 0.20970642629605174,
|
|
"learning_rate": 4.02128486610867e-06,
|
|
"loss": 0.8223,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 0.7970509116269802,
|
|
"grad_norm": 0.20665659488141222,
|
|
"learning_rate": 4.006078930307043e-06,
|
|
"loss": 0.7812,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.7974494370827937,
|
|
"grad_norm": 0.21749421417588286,
|
|
"learning_rate": 3.9908985981150275e-06,
|
|
"loss": 0.7676,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 0.7978479625386071,
|
|
"grad_norm": 0.20888996451808617,
|
|
"learning_rate": 3.975743893833821e-06,
|
|
"loss": 0.8185,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 0.7982464879944207,
|
|
"grad_norm": 0.2704077080536192,
|
|
"learning_rate": 3.960614841723569e-06,
|
|
"loss": 0.7838,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 0.7986450134502341,
|
|
"grad_norm": 0.2088559508207916,
|
|
"learning_rate": 3.945511466003391e-06,
|
|
"loss": 0.8171,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 0.7990435389060476,
|
|
"grad_norm": 0.20661415959125704,
|
|
"learning_rate": 3.930433790851278e-06,
|
|
"loss": 0.7754,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.7994420643618612,
|
|
"grad_norm": 0.20701920533433565,
|
|
"learning_rate": 3.915381840404071e-06,
|
|
"loss": 0.7841,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 0.7998405898176746,
|
|
"grad_norm": 0.21927395552931095,
|
|
"learning_rate": 3.900355638757452e-06,
|
|
"loss": 0.8029,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 0.8002391152734881,
|
|
"grad_norm": 0.20280686560023278,
|
|
"learning_rate": 3.885355209965865e-06,
|
|
"loss": 0.7794,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 0.8006376407293015,
|
|
"grad_norm": 0.22037706389941072,
|
|
"learning_rate": 3.870380578042505e-06,
|
|
"loss": 0.8098,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 0.8010361661851151,
|
|
"grad_norm": 0.22041475186669696,
|
|
"learning_rate": 3.85543176695927e-06,
|
|
"loss": 0.803,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.8014346916409285,
|
|
"grad_norm": 0.20998177604491353,
|
|
"learning_rate": 3.840508800646725e-06,
|
|
"loss": 0.8175,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 0.801833217096742,
|
|
"grad_norm": 0.45165795643816325,
|
|
"learning_rate": 3.825611702994061e-06,
|
|
"loss": 0.8009,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 0.8022317425525556,
|
|
"grad_norm": 0.21072158850784894,
|
|
"learning_rate": 3.810740497849048e-06,
|
|
"loss": 0.7807,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 0.802630268008369,
|
|
"grad_norm": 0.2069068117921759,
|
|
"learning_rate": 3.7958952090180145e-06,
|
|
"loss": 0.8019,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 0.8030287934641825,
|
|
"grad_norm": 0.21068337260203102,
|
|
"learning_rate": 3.781075860265806e-06,
|
|
"loss": 0.7816,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.803427318919996,
|
|
"grad_norm": 0.21398934601155856,
|
|
"learning_rate": 3.766282475315741e-06,
|
|
"loss": 0.7638,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 0.8038258443758095,
|
|
"grad_norm": 0.20441959178687177,
|
|
"learning_rate": 3.7515150778495566e-06,
|
|
"loss": 0.806,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 0.804224369831623,
|
|
"grad_norm": 0.21249378504406466,
|
|
"learning_rate": 3.7367736915074116e-06,
|
|
"loss": 0.7552,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 0.8046228952874365,
|
|
"grad_norm": 0.20661783667193465,
|
|
"learning_rate": 3.7220583398878198e-06,
|
|
"loss": 0.7926,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 0.80502142074325,
|
|
"grad_norm": 0.2077752476136891,
|
|
"learning_rate": 3.7073690465475996e-06,
|
|
"loss": 0.8021,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.8054199461990634,
|
|
"grad_norm": 0.20570938011934367,
|
|
"learning_rate": 3.6927058350018774e-06,
|
|
"loss": 0.7833,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 0.805818471654877,
|
|
"grad_norm": 0.2068378623875997,
|
|
"learning_rate": 3.678068728724018e-06,
|
|
"loss": 0.7916,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 0.8062169971106904,
|
|
"grad_norm": 0.2108307060112381,
|
|
"learning_rate": 3.663457751145598e-06,
|
|
"loss": 0.8342,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 0.8066155225665039,
|
|
"grad_norm": 0.2078448862912843,
|
|
"learning_rate": 3.648872925656357e-06,
|
|
"loss": 0.7984,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 0.8070140480223175,
|
|
"grad_norm": 0.21028048335603441,
|
|
"learning_rate": 3.6343142756041804e-06,
|
|
"loss": 0.8018,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.8074125734781309,
|
|
"grad_norm": 0.20117720599120376,
|
|
"learning_rate": 3.61978182429505e-06,
|
|
"loss": 0.7707,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 0.8078110989339444,
|
|
"grad_norm": 0.20314858168527,
|
|
"learning_rate": 3.6052755949930028e-06,
|
|
"loss": 0.8014,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 0.8082096243897579,
|
|
"grad_norm": 0.20807347591232647,
|
|
"learning_rate": 3.590795610920106e-06,
|
|
"loss": 0.7783,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 0.8086081498455714,
|
|
"grad_norm": 0.20632811448011976,
|
|
"learning_rate": 3.5763418952563964e-06,
|
|
"loss": 0.7887,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 0.8090066753013849,
|
|
"grad_norm": 0.21490462809860467,
|
|
"learning_rate": 3.561914471139887e-06,
|
|
"loss": 0.7844,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.8094052007571984,
|
|
"grad_norm": 0.20507534096776664,
|
|
"learning_rate": 3.547513361666468e-06,
|
|
"loss": 0.7904,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 0.8098037262130119,
|
|
"grad_norm": 0.20644876557134534,
|
|
"learning_rate": 3.5331385898899286e-06,
|
|
"loss": 0.7691,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 0.8102022516688253,
|
|
"grad_norm": 0.21240998726372254,
|
|
"learning_rate": 3.5187901788219005e-06,
|
|
"loss": 0.8199,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 0.8106007771246388,
|
|
"grad_norm": 0.20137624296072554,
|
|
"learning_rate": 3.5044681514317923e-06,
|
|
"loss": 0.7814,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 0.8109993025804523,
|
|
"grad_norm": 0.2073451450199298,
|
|
"learning_rate": 3.4901725306467983e-06,
|
|
"loss": 0.7769,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.8113978280362658,
|
|
"grad_norm": 0.2134160597885788,
|
|
"learning_rate": 3.4759033393518227e-06,
|
|
"loss": 0.7811,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 0.8117963534920793,
|
|
"grad_norm": 0.20469419291818344,
|
|
"learning_rate": 3.461660600389476e-06,
|
|
"loss": 0.7819,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 0.8121948789478928,
|
|
"grad_norm": 0.20376860496093793,
|
|
"learning_rate": 3.447444336560013e-06,
|
|
"loss": 0.7816,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 0.8125934044037063,
|
|
"grad_norm": 0.41207208863994677,
|
|
"learning_rate": 3.4332545706213092e-06,
|
|
"loss": 0.7927,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 0.8129919298595197,
|
|
"grad_norm": 0.21507072465785926,
|
|
"learning_rate": 3.4190913252888304e-06,
|
|
"loss": 0.804,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.8133904553153333,
|
|
"grad_norm": 0.20319740876888007,
|
|
"learning_rate": 3.4049546232355677e-06,
|
|
"loss": 0.7874,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 0.8137889807711468,
|
|
"grad_norm": 0.20241224467511873,
|
|
"learning_rate": 3.3908444870920377e-06,
|
|
"loss": 0.7805,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 0.8141875062269602,
|
|
"grad_norm": 0.21466864150429207,
|
|
"learning_rate": 3.3767609394462177e-06,
|
|
"loss": 0.78,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 0.8145860316827738,
|
|
"grad_norm": 0.20218659511290218,
|
|
"learning_rate": 3.3627040028435266e-06,
|
|
"loss": 0.7801,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 0.8149845571385872,
|
|
"grad_norm": 0.213036870154348,
|
|
"learning_rate": 3.3486736997867973e-06,
|
|
"loss": 0.7824,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.8153830825944007,
|
|
"grad_norm": 0.19949805665039408,
|
|
"learning_rate": 3.3346700527361976e-06,
|
|
"loss": 0.7955,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 0.8157816080502142,
|
|
"grad_norm": 0.20680232683225422,
|
|
"learning_rate": 3.320693084109252e-06,
|
|
"loss": 0.7897,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 0.8161801335060277,
|
|
"grad_norm": 0.2000391282113421,
|
|
"learning_rate": 3.3067428162807524e-06,
|
|
"loss": 0.8005,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 0.8165786589618412,
|
|
"grad_norm": 0.2156772773776592,
|
|
"learning_rate": 3.2928192715827635e-06,
|
|
"loss": 0.8053,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 0.8169771844176547,
|
|
"grad_norm": 0.39867015204161727,
|
|
"learning_rate": 3.2789224723045688e-06,
|
|
"loss": 0.7969,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.8173757098734682,
|
|
"grad_norm": 0.2046620024871545,
|
|
"learning_rate": 3.265052440692633e-06,
|
|
"loss": 0.7926,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 0.8177742353292816,
|
|
"grad_norm": 0.2030808711787401,
|
|
"learning_rate": 3.2512091989505755e-06,
|
|
"loss": 0.7774,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 0.8181727607850952,
|
|
"grad_norm": 0.20949507249814342,
|
|
"learning_rate": 3.2373927692391183e-06,
|
|
"loss": 0.793,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 0.8185712862409087,
|
|
"grad_norm": 0.20772541980987708,
|
|
"learning_rate": 3.2236031736760775e-06,
|
|
"loss": 0.7726,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 0.8189698116967221,
|
|
"grad_norm": 0.21408416045479248,
|
|
"learning_rate": 3.209840434336291e-06,
|
|
"loss": 0.7794,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.8193683371525357,
|
|
"grad_norm": 0.22494235529547763,
|
|
"learning_rate": 3.196104573251633e-06,
|
|
"loss": 0.791,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 0.8197668626083491,
|
|
"grad_norm": 0.20454170412693226,
|
|
"learning_rate": 3.1823956124109245e-06,
|
|
"loss": 0.7862,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 0.8201653880641626,
|
|
"grad_norm": 0.20433874449012537,
|
|
"learning_rate": 3.168713573759934e-06,
|
|
"loss": 0.7666,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 0.820563913519976,
|
|
"grad_norm": 0.20661160157593184,
|
|
"learning_rate": 3.1550584792013384e-06,
|
|
"loss": 0.7433,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 0.8209624389757896,
|
|
"grad_norm": 0.20629809799285342,
|
|
"learning_rate": 3.1414303505946674e-06,
|
|
"loss": 0.7976,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.8213609644316031,
|
|
"grad_norm": 0.2144450649554419,
|
|
"learning_rate": 3.1278292097562902e-06,
|
|
"loss": 0.8333,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 0.8217594898874165,
|
|
"grad_norm": 0.20822166366362016,
|
|
"learning_rate": 3.1142550784593784e-06,
|
|
"loss": 0.8266,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 0.8221580153432301,
|
|
"grad_norm": 0.24188329998112856,
|
|
"learning_rate": 3.100707978433859e-06,
|
|
"loss": 0.7876,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 0.8225565407990435,
|
|
"grad_norm": 0.2048848180047204,
|
|
"learning_rate": 3.087187931366382e-06,
|
|
"loss": 0.7614,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 0.822955066254857,
|
|
"grad_norm": 0.20470377463967024,
|
|
"learning_rate": 3.0736949589003016e-06,
|
|
"loss": 0.7781,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.8233535917106706,
|
|
"grad_norm": 0.20987934787578208,
|
|
"learning_rate": 3.0602290826356264e-06,
|
|
"loss": 0.772,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 0.823752117166484,
|
|
"grad_norm": 0.2113936816052613,
|
|
"learning_rate": 3.046790324128972e-06,
|
|
"loss": 0.7872,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 0.8241506426222975,
|
|
"grad_norm": 0.19957043349861603,
|
|
"learning_rate": 3.0333787048935794e-06,
|
|
"loss": 0.7887,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 0.824549168078111,
|
|
"grad_norm": 0.3857301817498995,
|
|
"learning_rate": 3.019994246399205e-06,
|
|
"loss": 0.7882,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 0.8249476935339245,
|
|
"grad_norm": 0.20789973511441273,
|
|
"learning_rate": 3.006636970072152e-06,
|
|
"loss": 0.8076,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.8253462189897379,
|
|
"grad_norm": 0.2058835362862163,
|
|
"learning_rate": 2.993306897295194e-06,
|
|
"loss": 0.7764,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 0.8257447444455515,
|
|
"grad_norm": 0.20439869423777723,
|
|
"learning_rate": 2.980004049407561e-06,
|
|
"loss": 0.7764,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 0.826143269901365,
|
|
"grad_norm": 0.19876479503616204,
|
|
"learning_rate": 2.9667284477049075e-06,
|
|
"loss": 0.7826,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 0.8265417953571784,
|
|
"grad_norm": 0.1982699447253256,
|
|
"learning_rate": 2.9534801134392644e-06,
|
|
"loss": 0.7757,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 0.826940320812992,
|
|
"grad_norm": 0.20536270507053644,
|
|
"learning_rate": 2.9402590678190134e-06,
|
|
"loss": 0.7943,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.8273388462688054,
|
|
"grad_norm": 0.20479786214195925,
|
|
"learning_rate": 2.927065332008847e-06,
|
|
"loss": 0.796,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 0.8277373717246189,
|
|
"grad_norm": 0.204692054035632,
|
|
"learning_rate": 2.9138989271297525e-06,
|
|
"loss": 0.7757,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 0.8281358971804323,
|
|
"grad_norm": 0.2088750085892623,
|
|
"learning_rate": 2.900759874258938e-06,
|
|
"loss": 0.8125,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 0.8285344226362459,
|
|
"grad_norm": 0.2044102963337698,
|
|
"learning_rate": 2.887648194429862e-06,
|
|
"loss": 0.7641,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 0.8289329480920594,
|
|
"grad_norm": 0.21327563387382853,
|
|
"learning_rate": 2.874563908632142e-06,
|
|
"loss": 0.7994,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.8293314735478728,
|
|
"grad_norm": 0.2046570896223022,
|
|
"learning_rate": 2.8615070378115372e-06,
|
|
"loss": 0.8017,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 0.8297299990036864,
|
|
"grad_norm": 0.19812578410366266,
|
|
"learning_rate": 2.848477602869937e-06,
|
|
"loss": 0.784,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 0.8301285244594998,
|
|
"grad_norm": 0.20601688938227922,
|
|
"learning_rate": 2.8354756246652913e-06,
|
|
"loss": 0.769,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 0.8305270499153133,
|
|
"grad_norm": 0.2057354048825274,
|
|
"learning_rate": 2.822501124011612e-06,
|
|
"loss": 0.7847,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 0.8309255753711269,
|
|
"grad_norm": 0.21168604129063812,
|
|
"learning_rate": 2.809554121678917e-06,
|
|
"loss": 0.8032,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.8313241008269403,
|
|
"grad_norm": 0.2100939254517527,
|
|
"learning_rate": 2.7966346383932076e-06,
|
|
"loss": 0.7874,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 0.8317226262827538,
|
|
"grad_norm": 0.21934203978806813,
|
|
"learning_rate": 2.7837426948364334e-06,
|
|
"loss": 0.79,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 0.8321211517385673,
|
|
"grad_norm": 0.19759229839235726,
|
|
"learning_rate": 2.7708783116464435e-06,
|
|
"loss": 0.7655,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 0.8325196771943808,
|
|
"grad_norm": 0.2086778699301496,
|
|
"learning_rate": 2.7580415094169865e-06,
|
|
"loss": 0.7839,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 0.8329182026501942,
|
|
"grad_norm": 0.21338341723931933,
|
|
"learning_rate": 2.745232308697636e-06,
|
|
"loss": 0.829,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.8333167281060078,
|
|
"grad_norm": 0.21045174950788936,
|
|
"learning_rate": 2.732450729993814e-06,
|
|
"loss": 0.8096,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 0.8337152535618213,
|
|
"grad_norm": 0.2051766400490156,
|
|
"learning_rate": 2.7196967937666865e-06,
|
|
"loss": 0.8039,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 0.8341137790176347,
|
|
"grad_norm": 0.19510414251619265,
|
|
"learning_rate": 2.706970520433192e-06,
|
|
"loss": 0.7793,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 0.8345123044734483,
|
|
"grad_norm": 0.2023242681129976,
|
|
"learning_rate": 2.6942719303659837e-06,
|
|
"loss": 0.781,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 0.8349108299292617,
|
|
"grad_norm": 0.2030427501132859,
|
|
"learning_rate": 2.681601043893387e-06,
|
|
"loss": 0.781,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.8353093553850752,
|
|
"grad_norm": 0.20888874667008847,
|
|
"learning_rate": 2.6689578812993857e-06,
|
|
"loss": 0.7694,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 0.8357078808408888,
|
|
"grad_norm": 0.20077367736979854,
|
|
"learning_rate": 2.6563424628235845e-06,
|
|
"loss": 0.7848,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 0.8361064062967022,
|
|
"grad_norm": 0.21005110509053168,
|
|
"learning_rate": 2.6437548086611765e-06,
|
|
"loss": 0.7988,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 0.8365049317525157,
|
|
"grad_norm": 0.19800915015594286,
|
|
"learning_rate": 2.6311949389628956e-06,
|
|
"loss": 0.8021,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 0.8369034572083291,
|
|
"grad_norm": 0.20692630086537173,
|
|
"learning_rate": 2.618662873835007e-06,
|
|
"loss": 0.796,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.8373019826641427,
|
|
"grad_norm": 0.20999876285414867,
|
|
"learning_rate": 2.6061586333392684e-06,
|
|
"loss": 0.8025,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 0.8377005081199561,
|
|
"grad_norm": 0.20623308075487845,
|
|
"learning_rate": 2.5936822374928894e-06,
|
|
"loss": 0.7815,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 0.8380990335757696,
|
|
"grad_norm": 0.205638179543828,
|
|
"learning_rate": 2.581233706268509e-06,
|
|
"loss": 0.802,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 0.8384975590315832,
|
|
"grad_norm": 0.19752040584951092,
|
|
"learning_rate": 2.5688130595941486e-06,
|
|
"loss": 0.7556,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 0.8388960844873966,
|
|
"grad_norm": 0.20069625765475899,
|
|
"learning_rate": 2.55642031735321e-06,
|
|
"loss": 0.7889,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.8392946099432101,
|
|
"grad_norm": 0.2018781461121737,
|
|
"learning_rate": 2.544055499384406e-06,
|
|
"loss": 0.8142,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 0.8396931353990236,
|
|
"grad_norm": 0.19475379047238844,
|
|
"learning_rate": 2.5317186254817538e-06,
|
|
"loss": 0.7663,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 0.8400916608548371,
|
|
"grad_norm": 0.1969342228912807,
|
|
"learning_rate": 2.519409715394545e-06,
|
|
"loss": 0.7938,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 0.8404901863106506,
|
|
"grad_norm": 0.19895944903191795,
|
|
"learning_rate": 2.5071287888272953e-06,
|
|
"loss": 0.8051,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 0.8408887117664641,
|
|
"grad_norm": 0.20042877149823382,
|
|
"learning_rate": 2.4948758654397342e-06,
|
|
"loss": 0.7833,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.8412872372222776,
|
|
"grad_norm": 0.19887545472768395,
|
|
"learning_rate": 2.4826509648467424e-06,
|
|
"loss": 0.7742,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 0.841685762678091,
|
|
"grad_norm": 0.2011722070087204,
|
|
"learning_rate": 2.470454106618363e-06,
|
|
"loss": 0.7857,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 0.8420842881339046,
|
|
"grad_norm": 0.20180297794597085,
|
|
"learning_rate": 2.458285310279738e-06,
|
|
"loss": 0.7997,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 0.842482813589718,
|
|
"grad_norm": 0.20055121230743078,
|
|
"learning_rate": 2.4461445953110862e-06,
|
|
"loss": 0.8014,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 0.8428813390455315,
|
|
"grad_norm": 0.19868315248272878,
|
|
"learning_rate": 2.43403198114768e-06,
|
|
"loss": 0.774,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.8432798645013451,
|
|
"grad_norm": 0.19770045553158802,
|
|
"learning_rate": 2.4219474871797942e-06,
|
|
"loss": 0.7856,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 0.8436783899571585,
|
|
"grad_norm": 0.20259006469350982,
|
|
"learning_rate": 2.409891132752702e-06,
|
|
"loss": 0.8102,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 0.844076915412972,
|
|
"grad_norm": 0.2013541403832189,
|
|
"learning_rate": 2.3978629371666174e-06,
|
|
"loss": 0.7853,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 0.8444754408687855,
|
|
"grad_norm": 0.20033442757315134,
|
|
"learning_rate": 2.3858629196766846e-06,
|
|
"loss": 0.7877,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 0.844873966324599,
|
|
"grad_norm": 0.21068432536317944,
|
|
"learning_rate": 2.3738910994929353e-06,
|
|
"loss": 0.766,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.8452724917804125,
|
|
"grad_norm": 0.1980119004076494,
|
|
"learning_rate": 2.36194749578027e-06,
|
|
"loss": 0.7731,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 0.845671017236226,
|
|
"grad_norm": 0.19889954520717595,
|
|
"learning_rate": 2.3500321276584103e-06,
|
|
"loss": 0.796,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 0.8460695426920395,
|
|
"grad_norm": 0.29416894294679846,
|
|
"learning_rate": 2.338145014201878e-06,
|
|
"loss": 0.8096,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 0.8464680681478529,
|
|
"grad_norm": 0.19806318324832906,
|
|
"learning_rate": 2.326286174439969e-06,
|
|
"loss": 0.7997,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 0.8468665936036665,
|
|
"grad_norm": 0.19823684897235574,
|
|
"learning_rate": 2.3144556273567132e-06,
|
|
"loss": 0.7607,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.8472651190594799,
|
|
"grad_norm": 0.18966161568344858,
|
|
"learning_rate": 2.30265339189085e-06,
|
|
"loss": 0.7804,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 0.8476636445152934,
|
|
"grad_norm": 0.19521990516259677,
|
|
"learning_rate": 2.2908794869358044e-06,
|
|
"loss": 0.7648,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 0.848062169971107,
|
|
"grad_norm": 0.21019481820981523,
|
|
"learning_rate": 2.27913393133963e-06,
|
|
"loss": 0.801,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 0.8484606954269204,
|
|
"grad_norm": 0.2044393443918899,
|
|
"learning_rate": 2.267416743905018e-06,
|
|
"loss": 0.7998,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 0.8488592208827339,
|
|
"grad_norm": 0.1983161340871745,
|
|
"learning_rate": 2.255727943389232e-06,
|
|
"loss": 0.7829,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.8492577463385473,
|
|
"grad_norm": 0.3883686062566025,
|
|
"learning_rate": 2.244067548504101e-06,
|
|
"loss": 0.7689,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 0.8496562717943609,
|
|
"grad_norm": 0.19823170694060893,
|
|
"learning_rate": 2.232435577915981e-06,
|
|
"loss": 0.7841,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 0.8500547972501744,
|
|
"grad_norm": 0.2011348839077823,
|
|
"learning_rate": 2.2208320502457247e-06,
|
|
"loss": 0.7743,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 0.8504533227059878,
|
|
"grad_norm": 0.2678986826453042,
|
|
"learning_rate": 2.209256984068653e-06,
|
|
"loss": 0.8186,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 0.8508518481618014,
|
|
"grad_norm": 0.38901312200457155,
|
|
"learning_rate": 2.1977103979145144e-06,
|
|
"loss": 0.7873,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.8512503736176148,
|
|
"grad_norm": 0.19801665808383853,
|
|
"learning_rate": 2.186192310267481e-06,
|
|
"loss": 0.7962,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 0.8516488990734283,
|
|
"grad_norm": 0.19959353534388102,
|
|
"learning_rate": 2.174702739566097e-06,
|
|
"loss": 0.7875,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 0.8520474245292418,
|
|
"grad_norm": 0.19906997852364527,
|
|
"learning_rate": 2.1632417042032582e-06,
|
|
"loss": 0.799,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 0.8524459499850553,
|
|
"grad_norm": 0.19383785374266083,
|
|
"learning_rate": 2.151809222526171e-06,
|
|
"loss": 0.8012,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 0.8528444754408688,
|
|
"grad_norm": 0.20008791840830747,
|
|
"learning_rate": 2.140405312836342e-06,
|
|
"loss": 0.8034,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.8532430008966823,
|
|
"grad_norm": 0.5550294238933178,
|
|
"learning_rate": 2.1290299933895375e-06,
|
|
"loss": 0.8056,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 0.8536415263524958,
|
|
"grad_norm": 0.19867486415459287,
|
|
"learning_rate": 2.1176832823957437e-06,
|
|
"loss": 0.7777,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 0.8540400518083092,
|
|
"grad_norm": 0.19676333190679646,
|
|
"learning_rate": 2.1063651980191735e-06,
|
|
"loss": 0.7915,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 0.8544385772641228,
|
|
"grad_norm": 0.1989409125958559,
|
|
"learning_rate": 2.095075758378191e-06,
|
|
"loss": 0.8095,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 0.8548371027199362,
|
|
"grad_norm": 0.21328576722717954,
|
|
"learning_rate": 2.083814981545316e-06,
|
|
"loss": 0.8003,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.8552356281757497,
|
|
"grad_norm": 0.20295493914625967,
|
|
"learning_rate": 2.0725828855471743e-06,
|
|
"loss": 0.8048,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 0.8556341536315633,
|
|
"grad_norm": 0.2074806852443234,
|
|
"learning_rate": 2.06137948836449e-06,
|
|
"loss": 0.8056,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 0.8560326790873767,
|
|
"grad_norm": 0.1970460127714032,
|
|
"learning_rate": 2.0502048079320412e-06,
|
|
"loss": 0.7719,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 0.8564312045431902,
|
|
"grad_norm": 0.20135572980918695,
|
|
"learning_rate": 2.03905886213863e-06,
|
|
"loss": 0.8124,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 0.8568297299990036,
|
|
"grad_norm": 0.19706602719348762,
|
|
"learning_rate": 2.0279416688270714e-06,
|
|
"loss": 0.8042,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.8572282554548172,
|
|
"grad_norm": 0.19351017765851636,
|
|
"learning_rate": 2.0168532457941347e-06,
|
|
"loss": 0.7817,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 0.8576267809106307,
|
|
"grad_norm": 0.19662641436265876,
|
|
"learning_rate": 2.0057936107905496e-06,
|
|
"loss": 0.7872,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 0.8580253063664441,
|
|
"grad_norm": 0.19472713717233617,
|
|
"learning_rate": 1.994762781520947e-06,
|
|
"loss": 0.7959,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 0.8584238318222577,
|
|
"grad_norm": 0.4466872234199686,
|
|
"learning_rate": 1.9837607756438506e-06,
|
|
"loss": 0.7957,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 0.8588223572780711,
|
|
"grad_norm": 0.19598069824689382,
|
|
"learning_rate": 1.972787610771656e-06,
|
|
"loss": 0.7728,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.8592208827338846,
|
|
"grad_norm": 0.20101685010301282,
|
|
"learning_rate": 1.9618433044705653e-06,
|
|
"loss": 0.7943,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 0.8596194081896981,
|
|
"grad_norm": 0.298341423595395,
|
|
"learning_rate": 1.9509278742605998e-06,
|
|
"loss": 0.8152,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 0.8600179336455116,
|
|
"grad_norm": 0.19641318468760852,
|
|
"learning_rate": 1.9400413376155414e-06,
|
|
"loss": 0.7718,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 0.8604164591013251,
|
|
"grad_norm": 0.20359959382775875,
|
|
"learning_rate": 1.929183711962932e-06,
|
|
"loss": 0.8166,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 0.8608149845571386,
|
|
"grad_norm": 0.29285934932172486,
|
|
"learning_rate": 1.918355014684026e-06,
|
|
"loss": 0.8116,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.8612135100129521,
|
|
"grad_norm": 0.20081004118069398,
|
|
"learning_rate": 1.9075552631137673e-06,
|
|
"loss": 0.828,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 0.8616120354687655,
|
|
"grad_norm": 0.19491684359283115,
|
|
"learning_rate": 1.8967844745407649e-06,
|
|
"loss": 0.8162,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 0.8620105609245791,
|
|
"grad_norm": 0.19931801177242742,
|
|
"learning_rate": 1.8860426662072573e-06,
|
|
"loss": 0.7646,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 0.8624090863803926,
|
|
"grad_norm": 0.19469429796070387,
|
|
"learning_rate": 1.8753298553091004e-06,
|
|
"loss": 0.7662,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 0.862807611836206,
|
|
"grad_norm": 0.19523553415875863,
|
|
"learning_rate": 1.8646460589957138e-06,
|
|
"loss": 0.7675,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.8632061372920196,
|
|
"grad_norm": 0.19836255092500826,
|
|
"learning_rate": 1.8539912943700921e-06,
|
|
"loss": 0.8162,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 0.863604662747833,
|
|
"grad_norm": 0.33046612241829804,
|
|
"learning_rate": 1.8433655784887338e-06,
|
|
"loss": 0.786,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 0.8640031882036465,
|
|
"grad_norm": 0.20287140254104755,
|
|
"learning_rate": 1.832768928361648e-06,
|
|
"loss": 0.8033,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 0.86440171365946,
|
|
"grad_norm": 0.19837142562234192,
|
|
"learning_rate": 1.8222013609523138e-06,
|
|
"loss": 0.7856,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 0.8648002391152735,
|
|
"grad_norm": 0.21103666545418504,
|
|
"learning_rate": 1.8116628931776437e-06,
|
|
"loss": 0.8434,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.865198764571087,
|
|
"grad_norm": 0.19867703712237042,
|
|
"learning_rate": 1.801153541907974e-06,
|
|
"loss": 0.7698,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 0.8655972900269004,
|
|
"grad_norm": 0.19825876352724692,
|
|
"learning_rate": 1.7906733239670338e-06,
|
|
"loss": 0.772,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 0.865995815482714,
|
|
"grad_norm": 0.20878459364682986,
|
|
"learning_rate": 1.7802222561319116e-06,
|
|
"loss": 0.7581,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 0.8663943409385274,
|
|
"grad_norm": 0.2958038314902087,
|
|
"learning_rate": 1.7698003551330222e-06,
|
|
"loss": 0.7944,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 0.8667928663943409,
|
|
"grad_norm": 0.20169391290837302,
|
|
"learning_rate": 1.7594076376541025e-06,
|
|
"loss": 0.8066,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.8671913918501545,
|
|
"grad_norm": 0.234034044100227,
|
|
"learning_rate": 1.749044120332164e-06,
|
|
"loss": 0.7721,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 0.8675899173059679,
|
|
"grad_norm": 0.2034910419905341,
|
|
"learning_rate": 1.7387098197574782e-06,
|
|
"loss": 0.8084,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 0.8679884427617814,
|
|
"grad_norm": 0.2073685879363281,
|
|
"learning_rate": 1.7284047524735426e-06,
|
|
"loss": 0.7925,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 0.8683869682175949,
|
|
"grad_norm": 0.20037230019907548,
|
|
"learning_rate": 1.7181289349770547e-06,
|
|
"loss": 0.7811,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 0.8687854936734084,
|
|
"grad_norm": 0.21712284699454534,
|
|
"learning_rate": 1.707882383717896e-06,
|
|
"loss": 0.7678,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.8691840191292218,
|
|
"grad_norm": 0.20117180870370702,
|
|
"learning_rate": 1.697665115099083e-06,
|
|
"loss": 0.7942,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 0.8695825445850354,
|
|
"grad_norm": 0.194101573652863,
|
|
"learning_rate": 1.6874771454767723e-06,
|
|
"loss": 0.7824,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 0.8699810700408489,
|
|
"grad_norm": 0.19921324707773355,
|
|
"learning_rate": 1.677318491160207e-06,
|
|
"loss": 0.7928,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 0.8703795954966623,
|
|
"grad_norm": 0.3229505296718228,
|
|
"learning_rate": 1.6671891684117048e-06,
|
|
"loss": 0.827,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 0.8707781209524759,
|
|
"grad_norm": 0.19497337244902666,
|
|
"learning_rate": 1.6570891934466304e-06,
|
|
"loss": 0.8059,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.8711766464082893,
|
|
"grad_norm": 0.19561470121792823,
|
|
"learning_rate": 1.6470185824333617e-06,
|
|
"loss": 0.7976,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 0.8715751718641028,
|
|
"grad_norm": 0.1969078670974646,
|
|
"learning_rate": 1.6369773514932786e-06,
|
|
"loss": 0.7653,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 0.8719736973199164,
|
|
"grad_norm": 0.19792267780479758,
|
|
"learning_rate": 1.6269655167007136e-06,
|
|
"loss": 0.7824,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 0.8723722227757298,
|
|
"grad_norm": 0.19510256307880908,
|
|
"learning_rate": 1.6169830940829578e-06,
|
|
"loss": 0.8068,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 0.8727707482315433,
|
|
"grad_norm": 0.1960870054521117,
|
|
"learning_rate": 1.6070300996202126e-06,
|
|
"loss": 0.7989,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.8731692736873568,
|
|
"grad_norm": 0.19085815051372912,
|
|
"learning_rate": 1.5971065492455617e-06,
|
|
"loss": 0.7636,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 0.8735677991431703,
|
|
"grad_norm": 0.19882296691960544,
|
|
"learning_rate": 1.5872124588449667e-06,
|
|
"loss": 0.7659,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 0.8739663245989837,
|
|
"grad_norm": 0.20028837148412157,
|
|
"learning_rate": 1.5773478442572154e-06,
|
|
"loss": 0.7934,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 0.8743648500547972,
|
|
"grad_norm": 0.19461902920242444,
|
|
"learning_rate": 1.5675127212739183e-06,
|
|
"loss": 0.7905,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 0.8747633755106108,
|
|
"grad_norm": 0.2016751952111212,
|
|
"learning_rate": 1.5577071056394743e-06,
|
|
"loss": 0.7862,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.8751619009664242,
|
|
"grad_norm": 0.19602147097639658,
|
|
"learning_rate": 1.5479310130510428e-06,
|
|
"loss": 0.7845,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 0.8755604264222377,
|
|
"grad_norm": 0.19583338180249446,
|
|
"learning_rate": 1.5381844591585294e-06,
|
|
"loss": 0.7957,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 0.8759589518780512,
|
|
"grad_norm": 0.19403020064241092,
|
|
"learning_rate": 1.5284674595645376e-06,
|
|
"loss": 0.7963,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 0.8763574773338647,
|
|
"grad_norm": 0.19782089212017984,
|
|
"learning_rate": 1.518780029824376e-06,
|
|
"loss": 0.7782,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 0.8767560027896782,
|
|
"grad_norm": 0.19942984981212644,
|
|
"learning_rate": 1.5091221854460037e-06,
|
|
"loss": 0.7975,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.8771545282454917,
|
|
"grad_norm": 0.19196702353727593,
|
|
"learning_rate": 1.4994939418900334e-06,
|
|
"loss": 0.7829,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 0.8775530537013052,
|
|
"grad_norm": 0.19379377172825363,
|
|
"learning_rate": 1.4898953145696738e-06,
|
|
"loss": 0.7982,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 0.8779515791571186,
|
|
"grad_norm": 0.19506234613903994,
|
|
"learning_rate": 1.4803263188507377e-06,
|
|
"loss": 0.7954,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 0.8783501046129322,
|
|
"grad_norm": 0.1978506554262955,
|
|
"learning_rate": 1.4707869700515965e-06,
|
|
"loss": 0.784,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 0.8787486300687456,
|
|
"grad_norm": 0.1980098585833247,
|
|
"learning_rate": 1.4612772834431566e-06,
|
|
"loss": 0.7569,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.8791471555245591,
|
|
"grad_norm": 0.19286242098132406,
|
|
"learning_rate": 1.4517972742488518e-06,
|
|
"loss": 0.7872,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 0.8795456809803727,
|
|
"grad_norm": 0.19098749250411995,
|
|
"learning_rate": 1.4423469576446002e-06,
|
|
"loss": 0.7815,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 0.8799442064361861,
|
|
"grad_norm": 0.20211925019195784,
|
|
"learning_rate": 1.4329263487587896e-06,
|
|
"loss": 0.8205,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 0.8803427318919996,
|
|
"grad_norm": 0.19532927186278154,
|
|
"learning_rate": 1.4235354626722431e-06,
|
|
"loss": 0.8121,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 0.8807412573478131,
|
|
"grad_norm": 0.1977750810931428,
|
|
"learning_rate": 1.4141743144182153e-06,
|
|
"loss": 0.7813,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.8811397828036266,
|
|
"grad_norm": 0.19358648033690376,
|
|
"learning_rate": 1.4048429189823432e-06,
|
|
"loss": 0.7455,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 0.88153830825944,
|
|
"grad_norm": 0.19846194328922676,
|
|
"learning_rate": 1.3955412913026468e-06,
|
|
"loss": 0.7662,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 0.8819368337152536,
|
|
"grad_norm": 0.19353205579063595,
|
|
"learning_rate": 1.3862694462694836e-06,
|
|
"loss": 0.7835,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 0.8823353591710671,
|
|
"grad_norm": 0.1961760649090444,
|
|
"learning_rate": 1.3770273987255322e-06,
|
|
"loss": 0.7869,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 0.8827338846268805,
|
|
"grad_norm": 0.198917812531222,
|
|
"learning_rate": 1.36781516346578e-06,
|
|
"loss": 0.7903,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.883132410082694,
|
|
"grad_norm": 0.31377432050732995,
|
|
"learning_rate": 1.3586327552374834e-06,
|
|
"loss": 0.7966,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 0.8835309355385075,
|
|
"grad_norm": 0.198947487765649,
|
|
"learning_rate": 1.349480188740151e-06,
|
|
"loss": 0.7845,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 0.883929460994321,
|
|
"grad_norm": 0.19609086834502595,
|
|
"learning_rate": 1.3403574786255203e-06,
|
|
"loss": 0.8267,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 0.8843279864501346,
|
|
"grad_norm": 0.19456541239424982,
|
|
"learning_rate": 1.3312646394975336e-06,
|
|
"loss": 0.7844,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 0.884726511905948,
|
|
"grad_norm": 0.18969287146965966,
|
|
"learning_rate": 1.322201685912321e-06,
|
|
"loss": 0.7561,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.8851250373617615,
|
|
"grad_norm": 0.19860314043543428,
|
|
"learning_rate": 1.3131686323781567e-06,
|
|
"loss": 0.7827,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 0.8855235628175749,
|
|
"grad_norm": 0.19669097960151344,
|
|
"learning_rate": 1.3041654933554627e-06,
|
|
"loss": 0.8035,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 0.8859220882733885,
|
|
"grad_norm": 0.20094073004540627,
|
|
"learning_rate": 1.2951922832567676e-06,
|
|
"loss": 0.7944,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 0.8863206137292019,
|
|
"grad_norm": 0.20272095445679028,
|
|
"learning_rate": 1.28624901644669e-06,
|
|
"loss": 0.8167,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 0.8867191391850154,
|
|
"grad_norm": 0.1953963230612544,
|
|
"learning_rate": 1.2773357072419156e-06,
|
|
"loss": 0.7721,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.887117664640829,
|
|
"grad_norm": 0.19553209878909217,
|
|
"learning_rate": 1.2684523699111683e-06,
|
|
"loss": 0.7898,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 0.8875161900966424,
|
|
"grad_norm": 0.19739783007158812,
|
|
"learning_rate": 1.259599018675197e-06,
|
|
"loss": 0.7751,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 0.8879147155524559,
|
|
"grad_norm": 0.2004207680549029,
|
|
"learning_rate": 1.2507756677067407e-06,
|
|
"loss": 0.7937,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 0.8883132410082694,
|
|
"grad_norm": 0.20274609576106925,
|
|
"learning_rate": 1.241982331130518e-06,
|
|
"loss": 0.7834,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 0.8887117664640829,
|
|
"grad_norm": 0.191410521331001,
|
|
"learning_rate": 1.233219023023211e-06,
|
|
"loss": 0.7964,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.8891102919198964,
|
|
"grad_norm": 0.19149409444639265,
|
|
"learning_rate": 1.2244857574134073e-06,
|
|
"loss": 0.8145,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 0.8895088173757099,
|
|
"grad_norm": 0.18942106902230516,
|
|
"learning_rate": 1.215782548281621e-06,
|
|
"loss": 0.7978,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 0.8899073428315234,
|
|
"grad_norm": 0.1936628231256215,
|
|
"learning_rate": 1.2071094095602388e-06,
|
|
"loss": 0.7688,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 0.8903058682873368,
|
|
"grad_norm": 0.19500787909123946,
|
|
"learning_rate": 1.198466355133514e-06,
|
|
"loss": 0.7985,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 0.8907043937431504,
|
|
"grad_norm": 0.19336207767259123,
|
|
"learning_rate": 1.1898533988375438e-06,
|
|
"loss": 0.7776,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.8911029191989638,
|
|
"grad_norm": 0.19306715682597733,
|
|
"learning_rate": 1.1812705544602387e-06,
|
|
"loss": 0.7781,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 0.8915014446547773,
|
|
"grad_norm": 0.1941966927070721,
|
|
"learning_rate": 1.1727178357413082e-06,
|
|
"loss": 0.7966,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 0.8918999701105909,
|
|
"grad_norm": 0.19470860439438434,
|
|
"learning_rate": 1.1641952563722292e-06,
|
|
"loss": 0.7875,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 0.8922984955664043,
|
|
"grad_norm": 0.19439172234266613,
|
|
"learning_rate": 1.155702829996239e-06,
|
|
"loss": 0.7949,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 0.8926970210222178,
|
|
"grad_norm": 0.19183529464923693,
|
|
"learning_rate": 1.1472405702082966e-06,
|
|
"loss": 0.8169,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.8930955464780312,
|
|
"grad_norm": 0.187219800602026,
|
|
"learning_rate": 1.1388084905550767e-06,
|
|
"loss": 0.7913,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 0.8934940719338448,
|
|
"grad_norm": 0.19725379366605064,
|
|
"learning_rate": 1.1304066045349371e-06,
|
|
"loss": 0.7759,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 0.8938925973896583,
|
|
"grad_norm": 0.1992594593840214,
|
|
"learning_rate": 1.1220349255978991e-06,
|
|
"loss": 0.8375,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 0.8942911228454717,
|
|
"grad_norm": 0.192342482643918,
|
|
"learning_rate": 1.1136934671456356e-06,
|
|
"loss": 0.7732,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 0.8946896483012853,
|
|
"grad_norm": 0.19446382141607246,
|
|
"learning_rate": 1.1053822425314253e-06,
|
|
"loss": 0.7787,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.8950881737570987,
|
|
"grad_norm": 0.21096981672144022,
|
|
"learning_rate": 1.0971012650601653e-06,
|
|
"loss": 0.7856,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 0.8954866992129122,
|
|
"grad_norm": 0.19037453116007338,
|
|
"learning_rate": 1.0888505479883226e-06,
|
|
"loss": 0.8141,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 0.8958852246687257,
|
|
"grad_norm": 0.1936745233440335,
|
|
"learning_rate": 1.0806301045239253e-06,
|
|
"loss": 0.776,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 0.8962837501245392,
|
|
"grad_norm": 0.19445932650167486,
|
|
"learning_rate": 1.0724399478265312e-06,
|
|
"loss": 0.7968,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 0.8966822755803527,
|
|
"grad_norm": 0.1942260109414643,
|
|
"learning_rate": 1.064280091007226e-06,
|
|
"loss": 0.7982,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.8970808010361662,
|
|
"grad_norm": 0.19599907378500261,
|
|
"learning_rate": 1.056150547128585e-06,
|
|
"loss": 0.7812,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 0.8974793264919797,
|
|
"grad_norm": 0.18888785669949568,
|
|
"learning_rate": 1.048051329204649e-06,
|
|
"loss": 0.7749,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 0.8978778519477931,
|
|
"grad_norm": 0.19413389947923068,
|
|
"learning_rate": 1.0399824502009292e-06,
|
|
"loss": 0.817,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 0.8982763774036067,
|
|
"grad_norm": 0.19041901167362632,
|
|
"learning_rate": 1.0319439230343552e-06,
|
|
"loss": 0.7829,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 0.8986749028594202,
|
|
"grad_norm": 0.190265615798965,
|
|
"learning_rate": 1.023935760573278e-06,
|
|
"loss": 0.7854,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.8990734283152336,
|
|
"grad_norm": 0.1917924700076846,
|
|
"learning_rate": 1.0159579756374272e-06,
|
|
"loss": 0.8021,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 0.8994719537710472,
|
|
"grad_norm": 0.19462841904809697,
|
|
"learning_rate": 1.0080105809979134e-06,
|
|
"loss": 0.7983,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 0.8998704792268606,
|
|
"grad_norm": 0.19572994397974086,
|
|
"learning_rate": 1.0000935893771957e-06,
|
|
"loss": 0.7807,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 0.9002690046826741,
|
|
"grad_norm": 0.19368930137185603,
|
|
"learning_rate": 9.922070134490625e-07,
|
|
"loss": 0.8069,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 0.9006675301384875,
|
|
"grad_norm": 0.18858216628151148,
|
|
"learning_rate": 9.843508658386147e-07,
|
|
"loss": 0.778,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.9010660555943011,
|
|
"grad_norm": 0.1902121814138829,
|
|
"learning_rate": 9.765251591222302e-07,
|
|
"loss": 0.7545,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 0.9014645810501146,
|
|
"grad_norm": 0.19207716877501332,
|
|
"learning_rate": 9.687299058275723e-07,
|
|
"loss": 0.8013,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 0.901863106505928,
|
|
"grad_norm": 0.19334913879349405,
|
|
"learning_rate": 9.609651184335389e-07,
|
|
"loss": 0.7946,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 0.9022616319617416,
|
|
"grad_norm": 0.19358676874591074,
|
|
"learning_rate": 9.532308093702691e-07,
|
|
"loss": 0.7772,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 0.902660157417555,
|
|
"grad_norm": 0.19148428383209684,
|
|
"learning_rate": 9.455269910191101e-07,
|
|
"loss": 0.7696,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.9030586828733685,
|
|
"grad_norm": 0.19540552907978265,
|
|
"learning_rate": 9.378536757125878e-07,
|
|
"loss": 0.8139,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 0.903457208329182,
|
|
"grad_norm": 0.19107073335621758,
|
|
"learning_rate": 9.302108757344119e-07,
|
|
"loss": 0.7858,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 0.9038557337849955,
|
|
"grad_norm": 0.1918963920445226,
|
|
"learning_rate": 9.225986033194268e-07,
|
|
"loss": 0.7788,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 0.904254259240809,
|
|
"grad_norm": 0.19310448866238283,
|
|
"learning_rate": 9.150168706536178e-07,
|
|
"loss": 0.7866,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 0.9046527846966225,
|
|
"grad_norm": 0.19687413534571704,
|
|
"learning_rate": 9.07465689874083e-07,
|
|
"loss": 0.7893,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.905051310152436,
|
|
"grad_norm": 0.1914042946404483,
|
|
"learning_rate": 8.99945073069004e-07,
|
|
"loss": 0.7748,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 0.9054498356082494,
|
|
"grad_norm": 0.2024204777517844,
|
|
"learning_rate": 8.924550322776415e-07,
|
|
"loss": 0.8568,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 0.905848361064063,
|
|
"grad_norm": 0.19403659491993944,
|
|
"learning_rate": 8.849955794903042e-07,
|
|
"loss": 0.8056,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 0.9062468865198765,
|
|
"grad_norm": 0.19411389381810215,
|
|
"learning_rate": 8.775667266483378e-07,
|
|
"loss": 0.7911,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 0.9066454119756899,
|
|
"grad_norm": 0.1924715710067694,
|
|
"learning_rate": 8.70168485644094e-07,
|
|
"loss": 0.7965,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.9070439374315035,
|
|
"grad_norm": 0.20038568330574344,
|
|
"learning_rate": 8.628008683209388e-07,
|
|
"loss": 0.7843,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 0.9074424628873169,
|
|
"grad_norm": 0.20132103527197703,
|
|
"learning_rate": 8.554638864731957e-07,
|
|
"loss": 0.7999,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 0.9078409883431304,
|
|
"grad_norm": 0.19240880279129838,
|
|
"learning_rate": 8.481575518461538e-07,
|
|
"loss": 0.7665,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 0.9082395137989439,
|
|
"grad_norm": 0.19434784566980481,
|
|
"learning_rate": 8.408818761360437e-07,
|
|
"loss": 0.8056,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 0.9086380392547574,
|
|
"grad_norm": 0.1978390018533812,
|
|
"learning_rate": 8.336368709900089e-07,
|
|
"loss": 0.8144,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.9090365647105709,
|
|
"grad_norm": 0.19566833800627478,
|
|
"learning_rate": 8.264225480061028e-07,
|
|
"loss": 0.7771,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 0.9094350901663844,
|
|
"grad_norm": 0.1975826569993677,
|
|
"learning_rate": 8.192389187332539e-07,
|
|
"loss": 0.7938,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 0.9098336156221979,
|
|
"grad_norm": 0.19045983399236568,
|
|
"learning_rate": 8.120859946712634e-07,
|
|
"loss": 0.7845,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 0.9102321410780113,
|
|
"grad_norm": 0.19130128975193195,
|
|
"learning_rate": 8.049637872707672e-07,
|
|
"loss": 0.7958,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 0.9106306665338249,
|
|
"grad_norm": 0.19085596321752288,
|
|
"learning_rate": 7.978723079332406e-07,
|
|
"loss": 0.7612,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.9110291919896384,
|
|
"grad_norm": 0.33424704454608156,
|
|
"learning_rate": 7.908115680109629e-07,
|
|
"loss": 0.7853,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 0.9114277174454518,
|
|
"grad_norm": 0.1954738492496232,
|
|
"learning_rate": 7.837815788070035e-07,
|
|
"loss": 0.8041,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 0.9118262429012653,
|
|
"grad_norm": 0.19475975240294963,
|
|
"learning_rate": 7.767823515752116e-07,
|
|
"loss": 0.7872,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 0.9122247683570788,
|
|
"grad_norm": 0.18960360869197374,
|
|
"learning_rate": 7.698138975201819e-07,
|
|
"loss": 0.8041,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 0.9126232938128923,
|
|
"grad_norm": 0.19589521054226136,
|
|
"learning_rate": 7.628762277972534e-07,
|
|
"loss": 0.7982,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.9130218192687057,
|
|
"grad_norm": 0.1917631141189516,
|
|
"learning_rate": 7.559693535124802e-07,
|
|
"loss": 0.7938,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 0.9134203447245193,
|
|
"grad_norm": 0.19253740493505767,
|
|
"learning_rate": 7.490932857226219e-07,
|
|
"loss": 0.7959,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 0.9138188701803328,
|
|
"grad_norm": 0.19582622703851235,
|
|
"learning_rate": 7.422480354351202e-07,
|
|
"loss": 0.834,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 0.9142173956361462,
|
|
"grad_norm": 0.18995947588533355,
|
|
"learning_rate": 7.354336136080809e-07,
|
|
"loss": 0.7762,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 0.9146159210919598,
|
|
"grad_norm": 0.18806413991915635,
|
|
"learning_rate": 7.286500311502686e-07,
|
|
"loss": 0.797,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.9150144465477732,
|
|
"grad_norm": 0.19277211114688542,
|
|
"learning_rate": 7.218972989210616e-07,
|
|
"loss": 0.7763,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 0.9154129720035867,
|
|
"grad_norm": 0.19199075944716948,
|
|
"learning_rate": 7.151754277304657e-07,
|
|
"loss": 0.7568,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 0.9158114974594003,
|
|
"grad_norm": 0.19072158788713017,
|
|
"learning_rate": 7.084844283390823e-07,
|
|
"loss": 0.7915,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 0.9162100229152137,
|
|
"grad_norm": 0.19205282392375037,
|
|
"learning_rate": 7.018243114580858e-07,
|
|
"loss": 0.8034,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 0.9166085483710272,
|
|
"grad_norm": 0.2052923264205816,
|
|
"learning_rate": 6.951950877492209e-07,
|
|
"loss": 0.7857,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.9170070738268407,
|
|
"grad_norm": 0.22779157975014266,
|
|
"learning_rate": 6.885967678247652e-07,
|
|
"loss": 0.756,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 0.9174055992826542,
|
|
"grad_norm": 0.18774142177297953,
|
|
"learning_rate": 6.820293622475427e-07,
|
|
"loss": 0.7857,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 0.9178041247384676,
|
|
"grad_norm": 0.19498696530660528,
|
|
"learning_rate": 6.754928815308703e-07,
|
|
"loss": 0.7991,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 0.9182026501942812,
|
|
"grad_norm": 0.19209043390951142,
|
|
"learning_rate": 6.689873361385691e-07,
|
|
"loss": 0.8101,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 0.9186011756500947,
|
|
"grad_norm": 0.19290885228459345,
|
|
"learning_rate": 6.625127364849371e-07,
|
|
"loss": 0.7955,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.9189997011059081,
|
|
"grad_norm": 0.1877743297868329,
|
|
"learning_rate": 6.560690929347324e-07,
|
|
"loss": 0.7844,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 0.9193982265617217,
|
|
"grad_norm": 0.19214675198757558,
|
|
"learning_rate": 6.49656415803157e-07,
|
|
"loss": 0.7903,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 0.9197967520175351,
|
|
"grad_norm": 0.19219057718417967,
|
|
"learning_rate": 6.432747153558416e-07,
|
|
"loss": 0.7761,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 0.9201952774733486,
|
|
"grad_norm": 0.18838660622383804,
|
|
"learning_rate": 6.369240018088297e-07,
|
|
"loss": 0.7947,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 0.9205938029291622,
|
|
"grad_norm": 0.1886108613905356,
|
|
"learning_rate": 6.306042853285532e-07,
|
|
"loss": 0.7813,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.9209923283849756,
|
|
"grad_norm": 0.1925293191301323,
|
|
"learning_rate": 6.243155760318332e-07,
|
|
"loss": 0.7982,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 0.9213908538407891,
|
|
"grad_norm": 0.19530492377194633,
|
|
"learning_rate": 6.180578839858475e-07,
|
|
"loss": 0.7885,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 0.9217893792966025,
|
|
"grad_norm": 0.18651121519218392,
|
|
"learning_rate": 6.118312192081166e-07,
|
|
"loss": 0.7949,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 0.9221879047524161,
|
|
"grad_norm": 0.19338094401034905,
|
|
"learning_rate": 6.056355916665024e-07,
|
|
"loss": 0.7717,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 0.9225864302082295,
|
|
"grad_norm": 0.18423336706407692,
|
|
"learning_rate": 5.994710112791713e-07,
|
|
"loss": 0.7811,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.922984955664043,
|
|
"grad_norm": 0.18939928604114048,
|
|
"learning_rate": 5.933374879145893e-07,
|
|
"loss": 0.7755,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 0.9233834811198566,
|
|
"grad_norm": 0.1926905369480336,
|
|
"learning_rate": 5.872350313915131e-07,
|
|
"loss": 0.8114,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 0.92378200657567,
|
|
"grad_norm": 0.19646582733405174,
|
|
"learning_rate": 5.811636514789598e-07,
|
|
"loss": 0.7871,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 0.9241805320314835,
|
|
"grad_norm": 0.19298296374648816,
|
|
"learning_rate": 5.75123357896199e-07,
|
|
"loss": 0.8039,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 0.924579057487297,
|
|
"grad_norm": 0.190458055205602,
|
|
"learning_rate": 5.691141603127381e-07,
|
|
"loss": 0.7835,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.9249775829431105,
|
|
"grad_norm": 0.1915360703578091,
|
|
"learning_rate": 5.631360683483001e-07,
|
|
"loss": 0.8234,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 0.925376108398924,
|
|
"grad_norm": 0.23747828168438873,
|
|
"learning_rate": 5.571890915728206e-07,
|
|
"loss": 0.79,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 0.9257746338547375,
|
|
"grad_norm": 0.19153058747182247,
|
|
"learning_rate": 5.512732395064224e-07,
|
|
"loss": 0.7649,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 0.926173159310551,
|
|
"grad_norm": 0.19727254538457217,
|
|
"learning_rate": 5.453885216193988e-07,
|
|
"loss": 0.8349,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 0.9265716847663644,
|
|
"grad_norm": 0.1951429962580588,
|
|
"learning_rate": 5.395349473322032e-07,
|
|
"loss": 0.7978,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.926970210222178,
|
|
"grad_norm": 0.18510338682179783,
|
|
"learning_rate": 5.337125260154397e-07,
|
|
"loss": 0.7777,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 0.9273687356779914,
|
|
"grad_norm": 0.1946540136821385,
|
|
"learning_rate": 5.279212669898326e-07,
|
|
"loss": 0.8047,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 0.9277672611338049,
|
|
"grad_norm": 0.18491969397571634,
|
|
"learning_rate": 5.221611795262283e-07,
|
|
"loss": 0.7573,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 0.9281657865896185,
|
|
"grad_norm": 0.19372900242022098,
|
|
"learning_rate": 5.164322728455684e-07,
|
|
"loss": 0.8202,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 0.9285643120454319,
|
|
"grad_norm": 0.19227025991711344,
|
|
"learning_rate": 5.107345561188836e-07,
|
|
"loss": 0.7805,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.9289628375012454,
|
|
"grad_norm": 0.1884216178497241,
|
|
"learning_rate": 5.050680384672668e-07,
|
|
"loss": 0.7911,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 0.9293613629570588,
|
|
"grad_norm": 0.19064898796693053,
|
|
"learning_rate": 4.994327289618728e-07,
|
|
"loss": 0.8286,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 0.9297598884128724,
|
|
"grad_norm": 0.19168131739896943,
|
|
"learning_rate": 4.938286366238942e-07,
|
|
"loss": 0.7741,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 0.9301584138686858,
|
|
"grad_norm": 0.19521237858027687,
|
|
"learning_rate": 4.88255770424555e-07,
|
|
"loss": 0.806,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 0.9305569393244993,
|
|
"grad_norm": 0.19197627577848786,
|
|
"learning_rate": 4.827141392850876e-07,
|
|
"loss": 0.7898,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.9309554647803129,
|
|
"grad_norm": 0.19415977793697126,
|
|
"learning_rate": 4.772037520767181e-07,
|
|
"loss": 0.7764,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 0.9313539902361263,
|
|
"grad_norm": 0.18652836321806102,
|
|
"learning_rate": 4.7172461762066356e-07,
|
|
"loss": 0.8058,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 0.9317525156919398,
|
|
"grad_norm": 0.1894930310367945,
|
|
"learning_rate": 4.662767446881078e-07,
|
|
"loss": 0.7747,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 0.9321510411477533,
|
|
"grad_norm": 0.19105060903289703,
|
|
"learning_rate": 4.6086014200018793e-07,
|
|
"loss": 0.7969,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 0.9325495666035668,
|
|
"grad_norm": 0.18939524754458784,
|
|
"learning_rate": 4.5547481822799e-07,
|
|
"loss": 0.775,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.9329480920593803,
|
|
"grad_norm": 0.18595385386063937,
|
|
"learning_rate": 4.5012078199251576e-07,
|
|
"loss": 0.7898,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 0.9333466175151938,
|
|
"grad_norm": 0.19033445395137963,
|
|
"learning_rate": 4.4479804186469353e-07,
|
|
"loss": 0.7734,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 0.9337451429710073,
|
|
"grad_norm": 0.18559439244342524,
|
|
"learning_rate": 4.3950660636534084e-07,
|
|
"loss": 0.7788,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 0.9341436684268207,
|
|
"grad_norm": 0.20079834918402295,
|
|
"learning_rate": 4.342464839651661e-07,
|
|
"loss": 0.8214,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 0.9345421938826343,
|
|
"grad_norm": 0.1912887265583529,
|
|
"learning_rate": 4.290176830847559e-07,
|
|
"loss": 0.7846,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.9349407193384477,
|
|
"grad_norm": 0.18960450357073091,
|
|
"learning_rate": 4.238202120945478e-07,
|
|
"loss": 0.7669,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 0.9353392447942612,
|
|
"grad_norm": 0.19153974072758262,
|
|
"learning_rate": 4.186540793148308e-07,
|
|
"loss": 0.812,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 0.9357377702500748,
|
|
"grad_norm": 0.18970776862717967,
|
|
"learning_rate": 4.13519293015725e-07,
|
|
"loss": 0.8019,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 0.9361362957058882,
|
|
"grad_norm": 0.18839937884535282,
|
|
"learning_rate": 4.084158614171685e-07,
|
|
"loss": 0.7991,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 0.9365348211617017,
|
|
"grad_norm": 0.19300125889679948,
|
|
"learning_rate": 4.033437926889061e-07,
|
|
"loss": 0.7821,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.9369333466175152,
|
|
"grad_norm": 0.18898379977540675,
|
|
"learning_rate": 3.983030949504829e-07,
|
|
"loss": 0.7919,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 0.9373318720733287,
|
|
"grad_norm": 0.19196900837664088,
|
|
"learning_rate": 3.932937762712108e-07,
|
|
"loss": 0.7896,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 0.9377303975291422,
|
|
"grad_norm": 0.19925612955481922,
|
|
"learning_rate": 3.883158446701796e-07,
|
|
"loss": 0.8139,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 0.9381289229849556,
|
|
"grad_norm": 0.18497982043923966,
|
|
"learning_rate": 3.833693081162326e-07,
|
|
"loss": 0.805,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 0.9385274484407692,
|
|
"grad_norm": 0.19343018564847927,
|
|
"learning_rate": 3.784541745279491e-07,
|
|
"loss": 0.7965,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.9389259738965826,
|
|
"grad_norm": 0.2122663845906148,
|
|
"learning_rate": 3.735704517736438e-07,
|
|
"loss": 0.7731,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 0.9393244993523961,
|
|
"grad_norm": 0.19304292990391148,
|
|
"learning_rate": 3.6871814767134305e-07,
|
|
"loss": 0.7985,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 0.9397230248082096,
|
|
"grad_norm": 0.18892205141585297,
|
|
"learning_rate": 3.638972699887822e-07,
|
|
"loss": 0.8119,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 0.9401215502640231,
|
|
"grad_norm": 0.1913297127895265,
|
|
"learning_rate": 3.5910782644338336e-07,
|
|
"loss": 0.7902,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 0.9405200757198366,
|
|
"grad_norm": 0.19033240742898452,
|
|
"learning_rate": 3.543498247022492e-07,
|
|
"loss": 0.7575,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.9409186011756501,
|
|
"grad_norm": 0.18332766514040255,
|
|
"learning_rate": 3.4962327238215134e-07,
|
|
"loss": 0.7598,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 0.9413171266314636,
|
|
"grad_norm": 0.18186761156092401,
|
|
"learning_rate": 3.449281770495105e-07,
|
|
"loss": 0.7943,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 0.941715652087277,
|
|
"grad_norm": 0.1965958953954756,
|
|
"learning_rate": 3.402645462204013e-07,
|
|
"loss": 0.8086,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 0.9421141775430906,
|
|
"grad_norm": 0.19135591876413088,
|
|
"learning_rate": 3.3563238736051604e-07,
|
|
"loss": 0.804,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 0.9425127029989041,
|
|
"grad_norm": 0.18843239133027376,
|
|
"learning_rate": 3.310317078851744e-07,
|
|
"loss": 0.7751,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.9429112284547175,
|
|
"grad_norm": 0.1944972137264629,
|
|
"learning_rate": 3.2646251515929597e-07,
|
|
"loss": 0.7862,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 0.9433097539105311,
|
|
"grad_norm": 0.19037760409725837,
|
|
"learning_rate": 3.2192481649740095e-07,
|
|
"loss": 0.8166,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 0.9437082793663445,
|
|
"grad_norm": 0.1871623371191181,
|
|
"learning_rate": 3.1741861916359193e-07,
|
|
"loss": 0.7655,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 0.944106804822158,
|
|
"grad_norm": 0.18764789979300736,
|
|
"learning_rate": 3.129439303715387e-07,
|
|
"loss": 0.7942,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 0.9445053302779715,
|
|
"grad_norm": 0.18934527512136454,
|
|
"learning_rate": 3.0850075728448e-07,
|
|
"loss": 0.8114,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.944903855733785,
|
|
"grad_norm": 0.18732805849733797,
|
|
"learning_rate": 3.0408910701519303e-07,
|
|
"loss": 0.783,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 0.9453023811895985,
|
|
"grad_norm": 0.20572141307841002,
|
|
"learning_rate": 2.997089866259972e-07,
|
|
"loss": 0.8062,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 0.945700906645412,
|
|
"grad_norm": 0.19247170516758175,
|
|
"learning_rate": 2.953604031287349e-07,
|
|
"loss": 0.8098,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 0.9460994321012255,
|
|
"grad_norm": 0.18598311478233595,
|
|
"learning_rate": 2.910433634847709e-07,
|
|
"loss": 0.7549,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 0.9464979575570389,
|
|
"grad_norm": 0.18855187031366835,
|
|
"learning_rate": 2.8675787460496816e-07,
|
|
"loss": 0.7688,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.9468964830128525,
|
|
"grad_norm": 0.18656543479412127,
|
|
"learning_rate": 2.8250394334967903e-07,
|
|
"loss": 0.7844,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 0.947295008468666,
|
|
"grad_norm": 0.1987614281014416,
|
|
"learning_rate": 2.7828157652874054e-07,
|
|
"loss": 0.7873,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 0.9476935339244794,
|
|
"grad_norm": 0.18946005010606964,
|
|
"learning_rate": 2.7409078090146144e-07,
|
|
"loss": 0.7919,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 0.948092059380293,
|
|
"grad_norm": 0.18729259602203205,
|
|
"learning_rate": 2.699315631766064e-07,
|
|
"loss": 0.7906,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 0.9484905848361064,
|
|
"grad_norm": 0.21584635145950695,
|
|
"learning_rate": 2.6580393001239604e-07,
|
|
"loss": 0.7525,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.9488891102919199,
|
|
"grad_norm": 0.19301764265768684,
|
|
"learning_rate": 2.617078880164825e-07,
|
|
"loss": 0.796,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 0.9492876357477333,
|
|
"grad_norm": 0.18867210144130342,
|
|
"learning_rate": 2.5764344374595187e-07,
|
|
"loss": 0.8082,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 0.9496861612035469,
|
|
"grad_norm": 0.18458535962402378,
|
|
"learning_rate": 2.5361060370729715e-07,
|
|
"loss": 0.7828,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 0.9500846866593604,
|
|
"grad_norm": 0.18940087074242587,
|
|
"learning_rate": 2.496093743564321e-07,
|
|
"loss": 0.7912,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 0.9504832121151738,
|
|
"grad_norm": 0.1967469512602545,
|
|
"learning_rate": 2.4563976209865504e-07,
|
|
"loss": 0.795,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.9508817375709874,
|
|
"grad_norm": 0.18106777816661615,
|
|
"learning_rate": 2.417017732886562e-07,
|
|
"loss": 0.7606,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 0.9512802630268008,
|
|
"grad_norm": 0.19029595392071927,
|
|
"learning_rate": 2.377954142305039e-07,
|
|
"loss": 0.7953,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 0.9516787884826143,
|
|
"grad_norm": 0.1920867643492066,
|
|
"learning_rate": 2.3392069117762706e-07,
|
|
"loss": 0.7959,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 0.9520773139384279,
|
|
"grad_norm": 0.18771665475461194,
|
|
"learning_rate": 2.300776103328173e-07,
|
|
"loss": 0.7736,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 0.9524758393942413,
|
|
"grad_norm": 0.18980640789999415,
|
|
"learning_rate": 2.2626617784820225e-07,
|
|
"loss": 0.7606,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.9528743648500548,
|
|
"grad_norm": 0.18734395867405335,
|
|
"learning_rate": 2.2248639982525688e-07,
|
|
"loss": 0.7989,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 0.9532728903058683,
|
|
"grad_norm": 0.2160018240089795,
|
|
"learning_rate": 2.1873828231477433e-07,
|
|
"loss": 0.7957,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 0.9536714157616818,
|
|
"grad_norm": 0.1868607933405286,
|
|
"learning_rate": 2.150218313168706e-07,
|
|
"loss": 0.8183,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 0.9540699412174952,
|
|
"grad_norm": 0.18834121472126297,
|
|
"learning_rate": 2.113370527809644e-07,
|
|
"loss": 0.7748,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 0.9544684666733088,
|
|
"grad_norm": 0.1866371434060772,
|
|
"learning_rate": 2.07683952605775e-07,
|
|
"loss": 0.7682,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.9548669921291223,
|
|
"grad_norm": 0.19477979464068296,
|
|
"learning_rate": 2.0406253663930675e-07,
|
|
"loss": 0.7962,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 0.9552655175849357,
|
|
"grad_norm": 0.18710243595192597,
|
|
"learning_rate": 2.0047281067884672e-07,
|
|
"loss": 0.7971,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 0.9556640430407493,
|
|
"grad_norm": 0.18958183027461045,
|
|
"learning_rate": 1.9691478047094924e-07,
|
|
"loss": 0.7851,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 0.9560625684965627,
|
|
"grad_norm": 0.18583596188293008,
|
|
"learning_rate": 1.9338845171142928e-07,
|
|
"loss": 0.7729,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 0.9564610939523762,
|
|
"grad_norm": 0.1846233484812463,
|
|
"learning_rate": 1.8989383004535121e-07,
|
|
"loss": 0.7797,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.9568596194081896,
|
|
"grad_norm": 0.18844365183443165,
|
|
"learning_rate": 1.86430921067029e-07,
|
|
"loss": 0.7869,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 0.9572581448640032,
|
|
"grad_norm": 0.1928283675611628,
|
|
"learning_rate": 1.8299973031999707e-07,
|
|
"loss": 0.8196,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 0.9576566703198167,
|
|
"grad_norm": 0.1871302583547213,
|
|
"learning_rate": 1.7960026329702618e-07,
|
|
"loss": 0.7688,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 0.9580551957756301,
|
|
"grad_norm": 0.1819281786572131,
|
|
"learning_rate": 1.762325254400965e-07,
|
|
"loss": 0.7745,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 0.9584537212314437,
|
|
"grad_norm": 0.18580491353142833,
|
|
"learning_rate": 1.7289652214039775e-07,
|
|
"loss": 0.7688,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.9588522466872571,
|
|
"grad_norm": 0.18495610694488546,
|
|
"learning_rate": 1.6959225873831586e-07,
|
|
"loss": 0.7863,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 0.9592507721430706,
|
|
"grad_norm": 0.18788375628377132,
|
|
"learning_rate": 1.6631974052342846e-07,
|
|
"loss": 0.7826,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 0.9596492975988842,
|
|
"grad_norm": 0.18917657214611222,
|
|
"learning_rate": 1.6307897273449168e-07,
|
|
"loss": 0.7734,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 0.9600478230546976,
|
|
"grad_norm": 0.18759506045359045,
|
|
"learning_rate": 1.5986996055943781e-07,
|
|
"loss": 0.7992,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 0.9604463485105111,
|
|
"grad_norm": 0.1916191115268579,
|
|
"learning_rate": 1.5669270913536427e-07,
|
|
"loss": 0.8289,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.9608448739663246,
|
|
"grad_norm": 0.18451542468901574,
|
|
"learning_rate": 1.535472235485158e-07,
|
|
"loss": 0.7726,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 0.9612433994221381,
|
|
"grad_norm": 0.18676157641440086,
|
|
"learning_rate": 1.5043350883429786e-07,
|
|
"loss": 0.7922,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 0.9616419248779515,
|
|
"grad_norm": 0.1872437071497714,
|
|
"learning_rate": 1.4735156997724765e-07,
|
|
"loss": 0.7802,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 0.9620404503337651,
|
|
"grad_norm": 0.18907840330520773,
|
|
"learning_rate": 1.4430141191103865e-07,
|
|
"loss": 0.7903,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 0.9624389757895786,
|
|
"grad_norm": 0.18712650053474555,
|
|
"learning_rate": 1.41283039518465e-07,
|
|
"loss": 0.7993,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.962837501245392,
|
|
"grad_norm": 0.19060675078211464,
|
|
"learning_rate": 1.3829645763144162e-07,
|
|
"loss": 0.7952,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 0.9632360267012056,
|
|
"grad_norm": 0.19012885112510405,
|
|
"learning_rate": 1.353416710309885e-07,
|
|
"loss": 0.7988,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 0.963634552157019,
|
|
"grad_norm": 0.18668957461300054,
|
|
"learning_rate": 1.324186844472264e-07,
|
|
"loss": 0.7676,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 0.9640330776128325,
|
|
"grad_norm": 0.18246864478928232,
|
|
"learning_rate": 1.295275025593745e-07,
|
|
"loss": 0.7837,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 0.9644316030686461,
|
|
"grad_norm": 0.18700001914029454,
|
|
"learning_rate": 1.2666812999573064e-07,
|
|
"loss": 0.7841,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.9648301285244595,
|
|
"grad_norm": 0.18732989426938132,
|
|
"learning_rate": 1.2384057133367988e-07,
|
|
"loss": 0.7682,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 0.965228653980273,
|
|
"grad_norm": 0.18764475457309285,
|
|
"learning_rate": 1.2104483109967035e-07,
|
|
"loss": 0.7989,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 0.9656271794360864,
|
|
"grad_norm": 0.18205691271384167,
|
|
"learning_rate": 1.1828091376921758e-07,
|
|
"loss": 0.761,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 0.9660257048919,
|
|
"grad_norm": 0.18817192206412373,
|
|
"learning_rate": 1.1554882376689557e-07,
|
|
"loss": 0.795,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 0.9664242303477134,
|
|
"grad_norm": 0.18225533752041095,
|
|
"learning_rate": 1.1284856546632583e-07,
|
|
"loss": 0.7544,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.966822755803527,
|
|
"grad_norm": 0.1888654968857259,
|
|
"learning_rate": 1.1018014319017056e-07,
|
|
"loss": 0.7938,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 0.9672212812593405,
|
|
"grad_norm": 0.18791322397897098,
|
|
"learning_rate": 1.0754356121013276e-07,
|
|
"loss": 0.8,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 0.9676198067151539,
|
|
"grad_norm": 0.18588223430788398,
|
|
"learning_rate": 1.0493882374694287e-07,
|
|
"loss": 0.7909,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 0.9680183321709674,
|
|
"grad_norm": 0.18541611026382643,
|
|
"learning_rate": 1.0236593497035208e-07,
|
|
"loss": 0.7986,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 0.9684168576267809,
|
|
"grad_norm": 0.18672094333830974,
|
|
"learning_rate": 9.982489899912573e-08,
|
|
"loss": 0.7854,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.9688153830825944,
|
|
"grad_norm": 0.1943543110096466,
|
|
"learning_rate": 9.731571990104105e-08,
|
|
"loss": 0.798,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 0.9692139085384079,
|
|
"grad_norm": 0.19085690756684667,
|
|
"learning_rate": 9.483840169287828e-08,
|
|
"loss": 0.7845,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 0.9696124339942214,
|
|
"grad_norm": 0.22770637291835538,
|
|
"learning_rate": 9.239294834041179e-08,
|
|
"loss": 0.8013,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 0.9700109594500349,
|
|
"grad_norm": 0.21119203670687375,
|
|
"learning_rate": 8.997936375840566e-08,
|
|
"loss": 0.8095,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 0.9704094849058483,
|
|
"grad_norm": 0.18505204962281271,
|
|
"learning_rate": 8.759765181060698e-08,
|
|
"loss": 0.7804,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.9708080103616619,
|
|
"grad_norm": 0.18558037459333185,
|
|
"learning_rate": 8.524781630974144e-08,
|
|
"loss": 0.7941,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 0.9712065358174753,
|
|
"grad_norm": 0.18719202865767845,
|
|
"learning_rate": 8.292986101750222e-08,
|
|
"loss": 0.8026,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 0.9716050612732888,
|
|
"grad_norm": 0.18361106837972332,
|
|
"learning_rate": 8.064378964455666e-08,
|
|
"loss": 0.7835,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 0.9720035867291024,
|
|
"grad_norm": 0.1941218667296314,
|
|
"learning_rate": 7.838960585051959e-08,
|
|
"loss": 0.7761,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 0.9724021121849158,
|
|
"grad_norm": 0.1875724989599244,
|
|
"learning_rate": 7.616731324396887e-08,
|
|
"loss": 0.7837,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.9728006376407293,
|
|
"grad_norm": 0.19069373972701836,
|
|
"learning_rate": 7.397691538242103e-08,
|
|
"loss": 0.8045,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 0.9731991630965428,
|
|
"grad_norm": 0.18927346704482184,
|
|
"learning_rate": 7.181841577234449e-08,
|
|
"loss": 0.8012,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 0.9735976885523563,
|
|
"grad_norm": 0.18820692363643488,
|
|
"learning_rate": 6.969181786913304e-08,
|
|
"loss": 0.7829,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 0.9739962140081698,
|
|
"grad_norm": 0.18304987062051034,
|
|
"learning_rate": 6.759712507711902e-08,
|
|
"loss": 0.7697,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 0.9743947394639833,
|
|
"grad_norm": 0.19039574921948385,
|
|
"learning_rate": 6.553434074955789e-08,
|
|
"loss": 0.7909,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.9747932649197968,
|
|
"grad_norm": 0.19093438505230914,
|
|
"learning_rate": 6.350346818862374e-08,
|
|
"loss": 0.8287,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 0.9751917903756102,
|
|
"grad_norm": 0.18824893139518173,
|
|
"learning_rate": 6.150451064540708e-08,
|
|
"loss": 0.7963,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 0.9755903158314237,
|
|
"grad_norm": 0.18807173681160894,
|
|
"learning_rate": 5.953747131990595e-08,
|
|
"loss": 0.7839,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 0.9759888412872372,
|
|
"grad_norm": 0.1833502517025838,
|
|
"learning_rate": 5.760235336102149e-08,
|
|
"loss": 0.7594,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 0.9763873667430507,
|
|
"grad_norm": 0.3654620173098497,
|
|
"learning_rate": 5.569915986656016e-08,
|
|
"loss": 0.7682,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.9767858921988642,
|
|
"grad_norm": 0.18924038600883106,
|
|
"learning_rate": 5.3827893883215964e-08,
|
|
"loss": 0.7996,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 0.9771844176546777,
|
|
"grad_norm": 0.1903600412956394,
|
|
"learning_rate": 5.198855840657491e-08,
|
|
"loss": 0.8085,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 0.9775829431104912,
|
|
"grad_norm": 0.1869548438274652,
|
|
"learning_rate": 5.01811563811061e-08,
|
|
"loss": 0.8068,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 0.9779814685663046,
|
|
"grad_norm": 0.18864866332212885,
|
|
"learning_rate": 4.8405690700161766e-08,
|
|
"loss": 0.7886,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 0.9783799940221182,
|
|
"grad_norm": 0.18929120724640708,
|
|
"learning_rate": 4.6662164205966143e-08,
|
|
"loss": 0.7762,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.9787785194779317,
|
|
"grad_norm": 0.1917795489994272,
|
|
"learning_rate": 4.495057968961769e-08,
|
|
"loss": 0.7884,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 0.9791770449337451,
|
|
"grad_norm": 0.39039385650289893,
|
|
"learning_rate": 4.327093989107578e-08,
|
|
"loss": 0.7692,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 0.9795755703895587,
|
|
"grad_norm": 0.18426417807456694,
|
|
"learning_rate": 4.162324749916735e-08,
|
|
"loss": 0.7869,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 0.9799740958453721,
|
|
"grad_norm": 0.18680439549269473,
|
|
"learning_rate": 4.0007505151571365e-08,
|
|
"loss": 0.751,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 0.9803726213011856,
|
|
"grad_norm": 0.1818392556169463,
|
|
"learning_rate": 3.8423715434823264e-08,
|
|
"loss": 0.7696,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.9807711467569991,
|
|
"grad_norm": 0.18570844625506863,
|
|
"learning_rate": 3.6871880884310486e-08,
|
|
"loss": 0.7886,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 0.9811696722128126,
|
|
"grad_norm": 0.21101630534654273,
|
|
"learning_rate": 3.5352003984259195e-08,
|
|
"loss": 0.7831,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 0.9815681976686261,
|
|
"grad_norm": 0.18069209360220204,
|
|
"learning_rate": 3.3864087167738705e-08,
|
|
"loss": 0.7504,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 0.9819667231244396,
|
|
"grad_norm": 0.1966709155844413,
|
|
"learning_rate": 3.240813281666144e-08,
|
|
"loss": 0.8465,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 0.9823652485802531,
|
|
"grad_norm": 0.1932914272847854,
|
|
"learning_rate": 3.09841432617608e-08,
|
|
"loss": 0.8142,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.9827637740360665,
|
|
"grad_norm": 0.18925513736084928,
|
|
"learning_rate": 2.959212078261553e-08,
|
|
"loss": 0.7721,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 0.98316229949188,
|
|
"grad_norm": 0.19004539946693746,
|
|
"learning_rate": 2.823206760761643e-08,
|
|
"loss": 0.8169,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 0.9835608249476935,
|
|
"grad_norm": 0.1836565966256934,
|
|
"learning_rate": 2.690398591398413e-08,
|
|
"loss": 0.7499,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 0.983959350403507,
|
|
"grad_norm": 0.18503029280898622,
|
|
"learning_rate": 2.5607877827757975e-08,
|
|
"loss": 0.8015,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 0.9843578758593206,
|
|
"grad_norm": 0.20802778839564787,
|
|
"learning_rate": 2.4343745423791588e-08,
|
|
"loss": 0.8196,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.984756401315134,
|
|
"grad_norm": 0.18807720140750997,
|
|
"learning_rate": 2.3111590725750644e-08,
|
|
"loss": 0.7935,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 0.9851549267709475,
|
|
"grad_norm": 0.18198059255273902,
|
|
"learning_rate": 2.191141570610844e-08,
|
|
"loss": 0.784,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 0.9855534522267609,
|
|
"grad_norm": 0.18391481474432778,
|
|
"learning_rate": 2.074322228614589e-08,
|
|
"loss": 0.7844,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 0.9859519776825745,
|
|
"grad_norm": 0.18825854264243994,
|
|
"learning_rate": 1.9607012335949306e-08,
|
|
"loss": 0.7916,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 0.986350503138388,
|
|
"grad_norm": 0.18782508477662568,
|
|
"learning_rate": 1.850278767439928e-08,
|
|
"loss": 0.7595,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.9867490285942014,
|
|
"grad_norm": 0.18321843468534266,
|
|
"learning_rate": 1.7430550069175157e-08,
|
|
"loss": 0.7797,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 0.987147554050015,
|
|
"grad_norm": 0.18648381218274565,
|
|
"learning_rate": 1.6390301236755003e-08,
|
|
"loss": 0.7851,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 0.9875460795058284,
|
|
"grad_norm": 0.18500156564930573,
|
|
"learning_rate": 1.53820428424023e-08,
|
|
"loss": 0.7918,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 0.9879446049616419,
|
|
"grad_norm": 0.18650103609208593,
|
|
"learning_rate": 1.4405776500170388e-08,
|
|
"loss": 0.7453,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 0.9883431304174554,
|
|
"grad_norm": 0.18474307243866062,
|
|
"learning_rate": 1.346150377290023e-08,
|
|
"loss": 0.812,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.9887416558732689,
|
|
"grad_norm": 0.18500951818710418,
|
|
"learning_rate": 1.2549226172213769e-08,
|
|
"loss": 0.7823,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 0.9891401813290824,
|
|
"grad_norm": 0.18520599480715474,
|
|
"learning_rate": 1.1668945158518352e-08,
|
|
"loss": 0.8023,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 0.9895387067848959,
|
|
"grad_norm": 0.18653565333341804,
|
|
"learning_rate": 1.0820662140997862e-08,
|
|
"loss": 0.803,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 0.9899372322407094,
|
|
"grad_norm": 0.18774709338075257,
|
|
"learning_rate": 1.0004378477610489e-08,
|
|
"loss": 0.82,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 0.9903357576965228,
|
|
"grad_norm": 0.18995043820529847,
|
|
"learning_rate": 9.220095475090951e-09,
|
|
"loss": 0.795,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.9907342831523364,
|
|
"grad_norm": 0.20276705829265076,
|
|
"learning_rate": 8.467814388948282e-09,
|
|
"loss": 0.7578,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 0.9911328086081499,
|
|
"grad_norm": 0.18309866901765423,
|
|
"learning_rate": 7.747536423456937e-09,
|
|
"loss": 0.7786,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 0.9915313340639633,
|
|
"grad_norm": 0.18809540486983145,
|
|
"learning_rate": 7.059262731661243e-09,
|
|
"loss": 0.8164,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 0.9919298595197769,
|
|
"grad_norm": 0.18502562806451164,
|
|
"learning_rate": 6.402994415377617e-09,
|
|
"loss": 0.7805,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 0.9923283849755903,
|
|
"grad_norm": 0.1859120952234941,
|
|
"learning_rate": 5.7787325251768e-09,
|
|
"loss": 0.7834,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.9927269104314038,
|
|
"grad_norm": 0.1846277966595508,
|
|
"learning_rate": 5.186478060403844e-09,
|
|
"loss": 0.7745,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 0.9931254358872172,
|
|
"grad_norm": 0.18693650346096127,
|
|
"learning_rate": 4.626231969155903e-09,
|
|
"loss": 0.7965,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 0.9935239613430308,
|
|
"grad_norm": 0.18817760919614965,
|
|
"learning_rate": 4.0979951482955636e-09,
|
|
"loss": 0.817,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 0.9939224867988443,
|
|
"grad_norm": 0.18262042873834777,
|
|
"learning_rate": 3.6017684434397348e-09,
|
|
"loss": 0.7637,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 0.9943210122546577,
|
|
"grad_norm": 0.18654490339612353,
|
|
"learning_rate": 3.1375526489685337e-09,
|
|
"loss": 0.7688,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.9947195377104713,
|
|
"grad_norm": 0.18607152410344097,
|
|
"learning_rate": 2.7053485080141827e-09,
|
|
"loss": 0.7929,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 0.9951180631662847,
|
|
"grad_norm": 0.18419897844519703,
|
|
"learning_rate": 2.3051567124587894e-09,
|
|
"loss": 0.7701,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 0.9955165886220982,
|
|
"grad_norm": 0.19186493723661452,
|
|
"learning_rate": 1.936977902949888e-09,
|
|
"loss": 0.808,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 0.9959151140779118,
|
|
"grad_norm": 0.19815019961308503,
|
|
"learning_rate": 1.6008126688737968e-09,
|
|
"loss": 0.7752,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 0.9963136395337252,
|
|
"grad_norm": 0.18417877206027317,
|
|
"learning_rate": 1.2966615483800404e-09,
|
|
"loss": 0.7644,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.9967121649895387,
|
|
"grad_norm": 0.18546354005657695,
|
|
"learning_rate": 1.0245250283613672e-09,
|
|
"loss": 0.7876,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 0.9971106904453522,
|
|
"grad_norm": 0.1876008897484704,
|
|
"learning_rate": 7.844035444648512e-10,
|
|
"loss": 0.8233,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 0.9975092159011657,
|
|
"grad_norm": 0.19981491316640151,
|
|
"learning_rate": 5.762974810852307e-10,
|
|
"loss": 0.7684,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 0.9979077413569791,
|
|
"grad_norm": 0.18665647757266646,
|
|
"learning_rate": 4.002071713626876e-10,
|
|
"loss": 0.7954,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 0.9983062668127927,
|
|
"grad_norm": 0.1894225254309972,
|
|
"learning_rate": 2.5613289719172985e-10,
|
|
"loss": 0.788,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.9987047922686062,
|
|
"grad_norm": 0.187487701192293,
|
|
"learning_rate": 1.440748892100885e-10,
|
|
"loss": 0.8017,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 0.9991033177244196,
|
|
"grad_norm": 0.1916417262477468,
|
|
"learning_rate": 6.403332680537943e-11,
|
|
"loss": 0.803,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 0.9995018431802332,
|
|
"grad_norm": 0.18369932510494486,
|
|
"learning_rate": 1.6008338108441936e-11,
|
|
"loss": 0.7746,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 0.9999003686360466,
|
|
"grad_norm": 0.19092333059410313,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.7752,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 0.9999003686360466,
|
|
"step": 2509,
|
|
"total_flos": 2400250660651008.0,
|
|
"train_loss": 0.8378047743086893,
|
|
"train_runtime": 25259.4249,
|
|
"train_samples_per_second": 57.218,
|
|
"train_steps_per_second": 0.099
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 2509,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2400250660651008.0,
|
|
"train_batch_size": 9,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|